airbyte-cdk 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +358 -0
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +104 -0
- airbyte_cdk/connector.py +123 -0
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/__init__.py +3 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
- airbyte_cdk/connector_builder/main.py +107 -0
- airbyte_cdk/connector_builder/models.py +73 -0
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +83 -0
- airbyte_cdk/destinations/__init__.py +8 -0
- airbyte_cdk/destinations/destination.py +154 -0
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
- airbyte_cdk/destinations/vector_db_based/config.py +298 -0
- airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
- airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
- airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
- airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
- airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
- airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
- airbyte_cdk/entrypoint.py +414 -0
- airbyte_cdk/exception_handler.py +56 -0
- airbyte_cdk/logger.py +109 -0
- airbyte_cdk/models/__init__.py +72 -0
- airbyte_cdk/models/airbyte_protocol.py +88 -0
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/well_known_types.py +5 -0
- airbyte_cdk/py.typed +0 -0
- airbyte_cdk/sources/__init__.py +26 -0
- airbyte_cdk/sources/abstract_source.py +326 -0
- airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
- airbyte_cdk/sources/config.py +27 -0
- airbyte_cdk/sources/connector_state_manager.py +161 -0
- airbyte_cdk/sources/declarative/__init__.py +3 -0
- airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
- airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
- airbyte_cdk/sources/declarative/auth/token.py +267 -0
- airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
- airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
- airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
- airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
- airbyte_cdk/sources/declarative/declarative_source.py +36 -0
- airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
- airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
- airbyte_cdk/sources/declarative/exceptions.py +9 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
- airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
- airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
- airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
- airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
- airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
- airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
- airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
- airbyte_cdk/sources/declarative/spec/spec.py +48 -0
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
- airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
- airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
- airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
- airbyte_cdk/sources/declarative/types.py +25 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
- airbyte_cdk/sources/file_based/config/__init__.py +0 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
- airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
- airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
- airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
- airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
- airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
- airbyte_cdk/sources/file_based/exceptions.py +159 -0
- airbyte_cdk/sources/file_based/file_based_source.py +466 -0
- airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
- airbyte_cdk/sources/file_based/file_record_data.py +22 -0
- airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
- airbyte_cdk/sources/file_based/remote_file.py +18 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
- airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
- airbyte_cdk/sources/file_based/types.py +10 -0
- airbyte_cdk/sources/http_config.py +10 -0
- airbyte_cdk/sources/http_logger.py +55 -0
- airbyte_cdk/sources/message/__init__.py +19 -0
- airbyte_cdk/sources/message/repository.py +137 -0
- airbyte_cdk/sources/source.py +95 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/__init__.py +8 -0
- airbyte_cdk/sources/streams/availability_strategy.py +84 -0
- airbyte_cdk/sources/streams/call_rate.py +704 -0
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
- airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
- airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
- airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/core.py +703 -0
- airbyte_cdk/sources/streams/http/__init__.py +10 -0
- airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +61 -0
- airbyte_cdk/sources/streams/http/http.py +673 -0
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/sources/streams/utils/__init__.py +3 -0
- airbyte_cdk/sources/types.py +169 -0
- airbyte_cdk/sources/utils/__init__.py +7 -0
- airbyte_cdk/sources/utils/casing.py +12 -0
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +53 -0
- airbyte_cdk/sources/utils/schema_helpers.py +230 -0
- airbyte_cdk/sources/utils/slice_logger.py +57 -0
- airbyte_cdk/sources/utils/transform.py +277 -0
- airbyte_cdk/sources/utils/types.py +7 -0
- airbyte_cdk/sql/__init__.py +0 -0
- airbyte_cdk/sql/_util/__init__.py +0 -0
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/__init__.py +7 -0
- airbyte_cdk/test/catalog_builder.py +81 -0
- airbyte_cdk/test/entrypoint_wrapper.py +250 -0
- airbyte_cdk/test/mock_http/__init__.py +6 -0
- airbyte_cdk/test/mock_http/matcher.py +41 -0
- airbyte_cdk/test/mock_http/mocker.py +185 -0
- airbyte_cdk/test/mock_http/request.py +103 -0
- airbyte_cdk/test/mock_http/response.py +28 -0
- airbyte_cdk/test/mock_http/response_builder.py +237 -0
- airbyte_cdk/test/state_builder.py +33 -0
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +10 -0
- airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
- airbyte_cdk/utils/analytics_message.py +25 -0
- airbyte_cdk/utils/constants.py +5 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/event_timing.py +85 -0
- airbyte_cdk/utils/is_cloud_environment.py +18 -0
- airbyte_cdk/utils/mapping_helpers.py +162 -0
- airbyte_cdk/utils/message_utils.py +26 -0
- airbyte_cdk/utils/oneof_option_config.py +33 -0
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +270 -0
- airbyte_cdk/utils/slice_hasher.py +37 -0
- airbyte_cdk/utils/spec_schema_transformations.py +26 -0
- airbyte_cdk/utils/stream_status_utils.py +43 -0
- airbyte_cdk/utils/traced_exception.py +145 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
- airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
- airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
- airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
- airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,3407 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import datetime
|
|
8
|
+
import importlib
|
|
9
|
+
import inspect
|
|
10
|
+
import re
|
|
11
|
+
from functools import partial
|
|
12
|
+
from typing import (
|
|
13
|
+
Any,
|
|
14
|
+
Callable,
|
|
15
|
+
Dict,
|
|
16
|
+
List,
|
|
17
|
+
Mapping,
|
|
18
|
+
MutableMapping,
|
|
19
|
+
Optional,
|
|
20
|
+
Type,
|
|
21
|
+
Union,
|
|
22
|
+
get_args,
|
|
23
|
+
get_origin,
|
|
24
|
+
get_type_hints,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
from isodate import parse_duration
|
|
28
|
+
from pydantic.v1 import BaseModel
|
|
29
|
+
|
|
30
|
+
from airbyte_cdk.models import FailureType, Level
|
|
31
|
+
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
|
32
|
+
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
|
|
33
|
+
from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker
|
|
34
|
+
from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
|
|
35
|
+
from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
|
|
36
|
+
from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator
|
|
37
|
+
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import (
|
|
38
|
+
DeclarativeAuthenticator,
|
|
39
|
+
NoAuth,
|
|
40
|
+
)
|
|
41
|
+
from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm
|
|
42
|
+
from airbyte_cdk.sources.declarative.auth.oauth import (
|
|
43
|
+
DeclarativeSingleUseRefreshTokenOauth2Authenticator,
|
|
44
|
+
)
|
|
45
|
+
from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator
|
|
46
|
+
from airbyte_cdk.sources.declarative.auth.token import (
|
|
47
|
+
ApiKeyAuthenticator,
|
|
48
|
+
BasicHttpAuthenticator,
|
|
49
|
+
BearerAuthenticator,
|
|
50
|
+
LegacySessionTokenAuthenticator,
|
|
51
|
+
)
|
|
52
|
+
from airbyte_cdk.sources.declarative.auth.token_provider import (
|
|
53
|
+
InterpolatedStringTokenProvider,
|
|
54
|
+
SessionTokenProvider,
|
|
55
|
+
TokenProvider,
|
|
56
|
+
)
|
|
57
|
+
from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream
|
|
58
|
+
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
|
59
|
+
from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
|
|
60
|
+
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
|
61
|
+
from airbyte_cdk.sources.declarative.decoders import (
|
|
62
|
+
Decoder,
|
|
63
|
+
IterableDecoder,
|
|
64
|
+
JsonDecoder,
|
|
65
|
+
PaginationDecoderDecorator,
|
|
66
|
+
XmlDecoder,
|
|
67
|
+
ZipfileDecoder,
|
|
68
|
+
)
|
|
69
|
+
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
|
|
70
|
+
CompositeRawDecoder,
|
|
71
|
+
CsvParser,
|
|
72
|
+
GzipParser,
|
|
73
|
+
JsonLineParser,
|
|
74
|
+
JsonParser,
|
|
75
|
+
Parser,
|
|
76
|
+
)
|
|
77
|
+
from airbyte_cdk.sources.declarative.extractors import (
|
|
78
|
+
DpathExtractor,
|
|
79
|
+
RecordFilter,
|
|
80
|
+
RecordSelector,
|
|
81
|
+
ResponseToFileExtractor,
|
|
82
|
+
)
|
|
83
|
+
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
84
|
+
ClientSideIncrementalRecordFilterDecorator,
|
|
85
|
+
)
|
|
86
|
+
from airbyte_cdk.sources.declarative.incremental import (
|
|
87
|
+
ChildPartitionResumableFullRefreshCursor,
|
|
88
|
+
ConcurrentCursorFactory,
|
|
89
|
+
ConcurrentPerPartitionCursor,
|
|
90
|
+
CursorFactory,
|
|
91
|
+
DatetimeBasedCursor,
|
|
92
|
+
DeclarativeCursor,
|
|
93
|
+
GlobalSubstreamCursor,
|
|
94
|
+
PerPartitionCursor,
|
|
95
|
+
PerPartitionWithGlobalCursor,
|
|
96
|
+
ResumableFullRefreshCursor,
|
|
97
|
+
)
|
|
98
|
+
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
|
99
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
|
|
100
|
+
from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import (
|
|
101
|
+
LegacyToPerPartitionStateMigration,
|
|
102
|
+
)
|
|
103
|
+
from airbyte_cdk.sources.declarative.models import (
|
|
104
|
+
CustomStateMigration,
|
|
105
|
+
)
|
|
106
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
107
|
+
AddedFieldDefinition as AddedFieldDefinitionModel,
|
|
108
|
+
)
|
|
109
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
110
|
+
AddFields as AddFieldsModel,
|
|
111
|
+
)
|
|
112
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
113
|
+
ApiKeyAuthenticator as ApiKeyAuthenticatorModel,
|
|
114
|
+
)
|
|
115
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
116
|
+
AsyncJobStatusMap as AsyncJobStatusMapModel,
|
|
117
|
+
)
|
|
118
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
119
|
+
AsyncRetriever as AsyncRetrieverModel,
|
|
120
|
+
)
|
|
121
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
122
|
+
BasicHttpAuthenticator as BasicHttpAuthenticatorModel,
|
|
123
|
+
)
|
|
124
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
125
|
+
BearerAuthenticator as BearerAuthenticatorModel,
|
|
126
|
+
)
|
|
127
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
128
|
+
CheckDynamicStream as CheckDynamicStreamModel,
|
|
129
|
+
)
|
|
130
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
131
|
+
CheckStream as CheckStreamModel,
|
|
132
|
+
)
|
|
133
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
134
|
+
ComplexFieldType as ComplexFieldTypeModel,
|
|
135
|
+
)
|
|
136
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
137
|
+
ComponentMappingDefinition as ComponentMappingDefinitionModel,
|
|
138
|
+
)
|
|
139
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
140
|
+
CompositeErrorHandler as CompositeErrorHandlerModel,
|
|
141
|
+
)
|
|
142
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
143
|
+
ConcurrencyLevel as ConcurrencyLevelModel,
|
|
144
|
+
)
|
|
145
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
146
|
+
ConfigComponentsResolver as ConfigComponentsResolverModel,
|
|
147
|
+
)
|
|
148
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
149
|
+
ConstantBackoffStrategy as ConstantBackoffStrategyModel,
|
|
150
|
+
)
|
|
151
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
152
|
+
CsvDecoder as CsvDecoderModel,
|
|
153
|
+
)
|
|
154
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
155
|
+
CursorPagination as CursorPaginationModel,
|
|
156
|
+
)
|
|
157
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
158
|
+
CustomAuthenticator as CustomAuthenticatorModel,
|
|
159
|
+
)
|
|
160
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
161
|
+
CustomBackoffStrategy as CustomBackoffStrategyModel,
|
|
162
|
+
)
|
|
163
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
164
|
+
CustomDecoder as CustomDecoderModel,
|
|
165
|
+
)
|
|
166
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
167
|
+
CustomErrorHandler as CustomErrorHandlerModel,
|
|
168
|
+
)
|
|
169
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
170
|
+
CustomIncrementalSync as CustomIncrementalSyncModel,
|
|
171
|
+
)
|
|
172
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
173
|
+
CustomPaginationStrategy as CustomPaginationStrategyModel,
|
|
174
|
+
)
|
|
175
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
176
|
+
CustomPartitionRouter as CustomPartitionRouterModel,
|
|
177
|
+
)
|
|
178
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
179
|
+
CustomRecordExtractor as CustomRecordExtractorModel,
|
|
180
|
+
)
|
|
181
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
182
|
+
CustomRecordFilter as CustomRecordFilterModel,
|
|
183
|
+
)
|
|
184
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
185
|
+
CustomRequester as CustomRequesterModel,
|
|
186
|
+
)
|
|
187
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
188
|
+
CustomRetriever as CustomRetrieverModel,
|
|
189
|
+
)
|
|
190
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
191
|
+
CustomSchemaLoader as CustomSchemaLoader,
|
|
192
|
+
)
|
|
193
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
194
|
+
CustomSchemaNormalization as CustomSchemaNormalizationModel,
|
|
195
|
+
)
|
|
196
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
197
|
+
CustomTransformation as CustomTransformationModel,
|
|
198
|
+
)
|
|
199
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
200
|
+
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
|
201
|
+
)
|
|
202
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
203
|
+
DeclarativeStream as DeclarativeStreamModel,
|
|
204
|
+
)
|
|
205
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
206
|
+
DefaultErrorHandler as DefaultErrorHandlerModel,
|
|
207
|
+
)
|
|
208
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
209
|
+
DefaultPaginator as DefaultPaginatorModel,
|
|
210
|
+
)
|
|
211
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
212
|
+
DpathExtractor as DpathExtractorModel,
|
|
213
|
+
)
|
|
214
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
215
|
+
DpathFlattenFields as DpathFlattenFieldsModel,
|
|
216
|
+
)
|
|
217
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
218
|
+
DynamicSchemaLoader as DynamicSchemaLoaderModel,
|
|
219
|
+
)
|
|
220
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
221
|
+
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
|
222
|
+
)
|
|
223
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
224
|
+
FileUploader as FileUploaderModel,
|
|
225
|
+
)
|
|
226
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
227
|
+
FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
|
|
228
|
+
)
|
|
229
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
230
|
+
FlattenFields as FlattenFieldsModel,
|
|
231
|
+
)
|
|
232
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
233
|
+
GzipDecoder as GzipDecoderModel,
|
|
234
|
+
)
|
|
235
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
236
|
+
HTTPAPIBudget as HTTPAPIBudgetModel,
|
|
237
|
+
)
|
|
238
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
239
|
+
HttpComponentsResolver as HttpComponentsResolverModel,
|
|
240
|
+
)
|
|
241
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
242
|
+
HttpRequester as HttpRequesterModel,
|
|
243
|
+
)
|
|
244
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
245
|
+
HttpRequestRegexMatcher as HttpRequestRegexMatcherModel,
|
|
246
|
+
)
|
|
247
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
248
|
+
HttpResponseFilter as HttpResponseFilterModel,
|
|
249
|
+
)
|
|
250
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
251
|
+
IncrementingCountCursor as IncrementingCountCursorModel,
|
|
252
|
+
)
|
|
253
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
254
|
+
InlineSchemaLoader as InlineSchemaLoaderModel,
|
|
255
|
+
)
|
|
256
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
257
|
+
IterableDecoder as IterableDecoderModel,
|
|
258
|
+
)
|
|
259
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
260
|
+
JsonDecoder as JsonDecoderModel,
|
|
261
|
+
)
|
|
262
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
263
|
+
JsonFileSchemaLoader as JsonFileSchemaLoaderModel,
|
|
264
|
+
)
|
|
265
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
266
|
+
JsonlDecoder as JsonlDecoderModel,
|
|
267
|
+
)
|
|
268
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
269
|
+
JwtAuthenticator as JwtAuthenticatorModel,
|
|
270
|
+
)
|
|
271
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
272
|
+
JwtHeaders as JwtHeadersModel,
|
|
273
|
+
)
|
|
274
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
275
|
+
JwtPayload as JwtPayloadModel,
|
|
276
|
+
)
|
|
277
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
278
|
+
KeysReplace as KeysReplaceModel,
|
|
279
|
+
)
|
|
280
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
281
|
+
KeysToLower as KeysToLowerModel,
|
|
282
|
+
)
|
|
283
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
284
|
+
KeysToSnakeCase as KeysToSnakeCaseModel,
|
|
285
|
+
)
|
|
286
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
287
|
+
LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
|
|
288
|
+
)
|
|
289
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
290
|
+
LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel,
|
|
291
|
+
)
|
|
292
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
293
|
+
ListPartitionRouter as ListPartitionRouterModel,
|
|
294
|
+
)
|
|
295
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
296
|
+
MinMaxDatetime as MinMaxDatetimeModel,
|
|
297
|
+
)
|
|
298
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
299
|
+
MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel,
|
|
300
|
+
)
|
|
301
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
302
|
+
NoAuth as NoAuthModel,
|
|
303
|
+
)
|
|
304
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
305
|
+
NoPagination as NoPaginationModel,
|
|
306
|
+
)
|
|
307
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
308
|
+
OAuthAuthenticator as OAuthAuthenticatorModel,
|
|
309
|
+
)
|
|
310
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
311
|
+
OffsetIncrement as OffsetIncrementModel,
|
|
312
|
+
)
|
|
313
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
314
|
+
PageIncrement as PageIncrementModel,
|
|
315
|
+
)
|
|
316
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
317
|
+
ParentStreamConfig as ParentStreamConfigModel,
|
|
318
|
+
)
|
|
319
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
320
|
+
Rate as RateModel,
|
|
321
|
+
)
|
|
322
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
323
|
+
RecordFilter as RecordFilterModel,
|
|
324
|
+
)
|
|
325
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
326
|
+
RecordSelector as RecordSelectorModel,
|
|
327
|
+
)
|
|
328
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
329
|
+
RemoveFields as RemoveFieldsModel,
|
|
330
|
+
)
|
|
331
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
332
|
+
RequestOption as RequestOptionModel,
|
|
333
|
+
)
|
|
334
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
335
|
+
RequestPath as RequestPathModel,
|
|
336
|
+
)
|
|
337
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
338
|
+
ResponseToFileExtractor as ResponseToFileExtractorModel,
|
|
339
|
+
)
|
|
340
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
341
|
+
SchemaNormalization as SchemaNormalizationModel,
|
|
342
|
+
)
|
|
343
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
344
|
+
SchemaTypeIdentifier as SchemaTypeIdentifierModel,
|
|
345
|
+
)
|
|
346
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
347
|
+
SelectiveAuthenticator as SelectiveAuthenticatorModel,
|
|
348
|
+
)
|
|
349
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
350
|
+
SessionTokenAuthenticator as SessionTokenAuthenticatorModel,
|
|
351
|
+
)
|
|
352
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
353
|
+
SimpleRetriever as SimpleRetrieverModel,
|
|
354
|
+
)
|
|
355
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
|
|
356
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
357
|
+
StateDelegatingStream as StateDelegatingStreamModel,
|
|
358
|
+
)
|
|
359
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
360
|
+
StreamConfig as StreamConfigModel,
|
|
361
|
+
)
|
|
362
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
363
|
+
SubstreamPartitionRouter as SubstreamPartitionRouterModel,
|
|
364
|
+
)
|
|
365
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
366
|
+
TypesMap as TypesMapModel,
|
|
367
|
+
)
|
|
368
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
369
|
+
UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel,
|
|
370
|
+
)
|
|
371
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
|
|
372
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
373
|
+
WaitTimeFromHeader as WaitTimeFromHeaderModel,
|
|
374
|
+
)
|
|
375
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
376
|
+
WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel,
|
|
377
|
+
)
|
|
378
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
379
|
+
XmlDecoder as XmlDecoderModel,
|
|
380
|
+
)
|
|
381
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
|
382
|
+
ZipfileDecoder as ZipfileDecoderModel,
|
|
383
|
+
)
|
|
384
|
+
from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
|
385
|
+
COMPONENTS_MODULE_NAME,
|
|
386
|
+
SDM_COMPONENTS_MODULE_NAME,
|
|
387
|
+
)
|
|
388
|
+
from airbyte_cdk.sources.declarative.partition_routers import (
|
|
389
|
+
CartesianProductStreamSlicer,
|
|
390
|
+
ListPartitionRouter,
|
|
391
|
+
PartitionRouter,
|
|
392
|
+
SinglePartitionRouter,
|
|
393
|
+
SubstreamPartitionRouter,
|
|
394
|
+
)
|
|
395
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
|
396
|
+
AsyncJobPartitionRouter,
|
|
397
|
+
)
|
|
398
|
+
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
|
399
|
+
ParentStreamConfig,
|
|
400
|
+
)
|
|
401
|
+
from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption
|
|
402
|
+
from airbyte_cdk.sources.declarative.requesters.error_handlers import (
|
|
403
|
+
CompositeErrorHandler,
|
|
404
|
+
DefaultErrorHandler,
|
|
405
|
+
HttpResponseFilter,
|
|
406
|
+
)
|
|
407
|
+
from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import (
|
|
408
|
+
ConstantBackoffStrategy,
|
|
409
|
+
ExponentialBackoffStrategy,
|
|
410
|
+
WaitTimeFromHeaderBackoffStrategy,
|
|
411
|
+
WaitUntilTimeFromHeaderBackoffStrategy,
|
|
412
|
+
)
|
|
413
|
+
from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository
|
|
414
|
+
from airbyte_cdk.sources.declarative.requesters.paginators import (
|
|
415
|
+
DefaultPaginator,
|
|
416
|
+
NoPagination,
|
|
417
|
+
PaginatorTestReadDecorator,
|
|
418
|
+
)
|
|
419
|
+
from airbyte_cdk.sources.declarative.requesters.paginators.strategies import (
|
|
420
|
+
CursorPaginationStrategy,
|
|
421
|
+
CursorStopCondition,
|
|
422
|
+
OffsetIncrement,
|
|
423
|
+
PageIncrement,
|
|
424
|
+
StopConditionPaginationStrategyDecorator,
|
|
425
|
+
)
|
|
426
|
+
from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType
|
|
427
|
+
from airbyte_cdk.sources.declarative.requesters.request_options import (
|
|
428
|
+
DatetimeBasedRequestOptionsProvider,
|
|
429
|
+
DefaultRequestOptionsProvider,
|
|
430
|
+
InterpolatedRequestOptionsProvider,
|
|
431
|
+
RequestOptionsProvider,
|
|
432
|
+
)
|
|
433
|
+
from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
|
|
434
|
+
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
|
|
435
|
+
from airbyte_cdk.sources.declarative.resolvers import (
|
|
436
|
+
ComponentMappingDefinition,
|
|
437
|
+
ConfigComponentsResolver,
|
|
438
|
+
HttpComponentsResolver,
|
|
439
|
+
StreamConfig,
|
|
440
|
+
)
|
|
441
|
+
from airbyte_cdk.sources.declarative.retrievers import (
|
|
442
|
+
AsyncRetriever,
|
|
443
|
+
LazySimpleRetriever,
|
|
444
|
+
SimpleRetriever,
|
|
445
|
+
SimpleRetrieverTestReadDecorator,
|
|
446
|
+
)
|
|
447
|
+
from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
|
|
448
|
+
from airbyte_cdk.sources.declarative.schema import (
|
|
449
|
+
ComplexFieldType,
|
|
450
|
+
DefaultSchemaLoader,
|
|
451
|
+
DynamicSchemaLoader,
|
|
452
|
+
InlineSchemaLoader,
|
|
453
|
+
JsonFileSchemaLoader,
|
|
454
|
+
SchemaTypeIdentifier,
|
|
455
|
+
TypesMap,
|
|
456
|
+
)
|
|
457
|
+
from airbyte_cdk.sources.declarative.spec import Spec
|
|
458
|
+
from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
|
|
459
|
+
from airbyte_cdk.sources.declarative.transformations import (
|
|
460
|
+
AddFields,
|
|
461
|
+
RecordTransformation,
|
|
462
|
+
RemoveFields,
|
|
463
|
+
)
|
|
464
|
+
from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
|
|
465
|
+
from airbyte_cdk.sources.declarative.transformations.dpath_flatten_fields import (
|
|
466
|
+
DpathFlattenFields,
|
|
467
|
+
)
|
|
468
|
+
from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
|
|
469
|
+
FlattenFields,
|
|
470
|
+
)
|
|
471
|
+
from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import (
|
|
472
|
+
KeysReplaceTransformation,
|
|
473
|
+
)
|
|
474
|
+
from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
|
|
475
|
+
KeysToLowerTransformation,
|
|
476
|
+
)
|
|
477
|
+
from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
|
|
478
|
+
KeysToSnakeCaseTransformation,
|
|
479
|
+
)
|
|
480
|
+
from airbyte_cdk.sources.message import (
|
|
481
|
+
InMemoryMessageRepository,
|
|
482
|
+
LogAppenderMessageRepositoryDecorator,
|
|
483
|
+
MessageRepository,
|
|
484
|
+
NoopMessageRepository,
|
|
485
|
+
)
|
|
486
|
+
from airbyte_cdk.sources.streams.call_rate import (
|
|
487
|
+
APIBudget,
|
|
488
|
+
FixedWindowCallRatePolicy,
|
|
489
|
+
HttpAPIBudget,
|
|
490
|
+
HttpRequestRegexMatcher,
|
|
491
|
+
MovingWindowCallRatePolicy,
|
|
492
|
+
Rate,
|
|
493
|
+
UnlimitedCallRatePolicy,
|
|
494
|
+
)
|
|
495
|
+
from airbyte_cdk.sources.streams.concurrent.clamping import (
|
|
496
|
+
ClampingEndProvider,
|
|
497
|
+
ClampingStrategy,
|
|
498
|
+
DayClampingStrategy,
|
|
499
|
+
MonthClampingStrategy,
|
|
500
|
+
NoClamping,
|
|
501
|
+
WeekClampingStrategy,
|
|
502
|
+
Weekday,
|
|
503
|
+
)
|
|
504
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
|
|
505
|
+
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
|
506
|
+
CustomFormatConcurrentStreamStateConverter,
|
|
507
|
+
DateTimeStreamStateConverter,
|
|
508
|
+
)
|
|
509
|
+
from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import (
|
|
510
|
+
IncrementingCountStreamStateConverter,
|
|
511
|
+
)
|
|
512
|
+
from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
|
|
513
|
+
from airbyte_cdk.sources.types import Config
|
|
514
|
+
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
|
515
|
+
|
|
516
|
+
ComponentDefinition = Mapping[str, Any]
|
|
517
|
+
|
|
518
|
+
SCHEMA_TRANSFORMER_TYPE_MAPPING = {
|
|
519
|
+
SchemaNormalizationModel.None_: TransformConfig.NoTransform,
|
|
520
|
+
SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
class ModelToComponentFactory:
|
|
525
|
+
EPOCH_DATETIME_FORMAT = "%s"
|
|
526
|
+
|
|
527
|
+
def __init__(
|
|
528
|
+
self,
|
|
529
|
+
limit_pages_fetched_per_slice: Optional[int] = None,
|
|
530
|
+
limit_slices_fetched: Optional[int] = None,
|
|
531
|
+
emit_connector_builder_messages: bool = False,
|
|
532
|
+
disable_retries: bool = False,
|
|
533
|
+
disable_cache: bool = False,
|
|
534
|
+
disable_resumable_full_refresh: bool = False,
|
|
535
|
+
message_repository: Optional[MessageRepository] = None,
|
|
536
|
+
connector_state_manager: Optional[ConnectorStateManager] = None,
|
|
537
|
+
max_concurrent_async_job_count: Optional[int] = None,
|
|
538
|
+
):
|
|
539
|
+
self._init_mappings()
|
|
540
|
+
self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
|
|
541
|
+
self._limit_slices_fetched = limit_slices_fetched
|
|
542
|
+
self._emit_connector_builder_messages = emit_connector_builder_messages
|
|
543
|
+
self._disable_retries = disable_retries
|
|
544
|
+
self._disable_cache = disable_cache
|
|
545
|
+
self._disable_resumable_full_refresh = disable_resumable_full_refresh
|
|
546
|
+
self._message_repository = message_repository or InMemoryMessageRepository(
|
|
547
|
+
self._evaluate_log_level(emit_connector_builder_messages)
|
|
548
|
+
)
|
|
549
|
+
self._connector_state_manager = connector_state_manager or ConnectorStateManager()
|
|
550
|
+
self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
|
|
551
|
+
self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1)
|
|
552
|
+
|
|
553
|
+
def _init_mappings(self) -> None:
|
|
554
|
+
self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
|
|
555
|
+
AddedFieldDefinitionModel: self.create_added_field_definition,
|
|
556
|
+
AddFieldsModel: self.create_add_fields,
|
|
557
|
+
ApiKeyAuthenticatorModel: self.create_api_key_authenticator,
|
|
558
|
+
BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
|
|
559
|
+
BearerAuthenticatorModel: self.create_bearer_authenticator,
|
|
560
|
+
CheckStreamModel: self.create_check_stream,
|
|
561
|
+
CheckDynamicStreamModel: self.create_check_dynamic_stream,
|
|
562
|
+
CompositeErrorHandlerModel: self.create_composite_error_handler,
|
|
563
|
+
ConcurrencyLevelModel: self.create_concurrency_level,
|
|
564
|
+
ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
|
|
565
|
+
CsvDecoderModel: self.create_csv_decoder,
|
|
566
|
+
CursorPaginationModel: self.create_cursor_pagination,
|
|
567
|
+
CustomAuthenticatorModel: self.create_custom_component,
|
|
568
|
+
CustomBackoffStrategyModel: self.create_custom_component,
|
|
569
|
+
CustomDecoderModel: self.create_custom_component,
|
|
570
|
+
CustomErrorHandlerModel: self.create_custom_component,
|
|
571
|
+
CustomIncrementalSyncModel: self.create_custom_component,
|
|
572
|
+
CustomRecordExtractorModel: self.create_custom_component,
|
|
573
|
+
CustomRecordFilterModel: self.create_custom_component,
|
|
574
|
+
CustomRequesterModel: self.create_custom_component,
|
|
575
|
+
CustomRetrieverModel: self.create_custom_component,
|
|
576
|
+
CustomSchemaLoader: self.create_custom_component,
|
|
577
|
+
CustomSchemaNormalizationModel: self.create_custom_component,
|
|
578
|
+
CustomStateMigration: self.create_custom_component,
|
|
579
|
+
CustomPaginationStrategyModel: self.create_custom_component,
|
|
580
|
+
CustomPartitionRouterModel: self.create_custom_component,
|
|
581
|
+
CustomTransformationModel: self.create_custom_component,
|
|
582
|
+
DatetimeBasedCursorModel: self.create_datetime_based_cursor,
|
|
583
|
+
DeclarativeStreamModel: self.create_declarative_stream,
|
|
584
|
+
DefaultErrorHandlerModel: self.create_default_error_handler,
|
|
585
|
+
DefaultPaginatorModel: self.create_default_paginator,
|
|
586
|
+
DpathExtractorModel: self.create_dpath_extractor,
|
|
587
|
+
ResponseToFileExtractorModel: self.create_response_to_file_extractor,
|
|
588
|
+
ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
|
|
589
|
+
SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
|
|
590
|
+
HttpRequesterModel: self.create_http_requester,
|
|
591
|
+
HttpResponseFilterModel: self.create_http_response_filter,
|
|
592
|
+
InlineSchemaLoaderModel: self.create_inline_schema_loader,
|
|
593
|
+
JsonDecoderModel: self.create_json_decoder,
|
|
594
|
+
JsonlDecoderModel: self.create_jsonl_decoder,
|
|
595
|
+
GzipDecoderModel: self.create_gzip_decoder,
|
|
596
|
+
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
|
597
|
+
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
|
|
598
|
+
KeysReplaceModel: self.create_keys_replace_transformation,
|
|
599
|
+
FlattenFieldsModel: self.create_flatten_fields,
|
|
600
|
+
DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
|
|
601
|
+
IterableDecoderModel: self.create_iterable_decoder,
|
|
602
|
+
IncrementingCountCursorModel: self.create_incrementing_count_cursor,
|
|
603
|
+
XmlDecoderModel: self.create_xml_decoder,
|
|
604
|
+
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
|
605
|
+
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
|
606
|
+
SchemaTypeIdentifierModel: self.create_schema_type_identifier,
|
|
607
|
+
TypesMapModel: self.create_types_map,
|
|
608
|
+
ComplexFieldTypeModel: self.create_complex_field_type,
|
|
609
|
+
JwtAuthenticatorModel: self.create_jwt_authenticator,
|
|
610
|
+
LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
|
|
611
|
+
ListPartitionRouterModel: self.create_list_partition_router,
|
|
612
|
+
MinMaxDatetimeModel: self.create_min_max_datetime,
|
|
613
|
+
NoAuthModel: self.create_no_auth,
|
|
614
|
+
NoPaginationModel: self.create_no_pagination,
|
|
615
|
+
OAuthAuthenticatorModel: self.create_oauth_authenticator,
|
|
616
|
+
OffsetIncrementModel: self.create_offset_increment,
|
|
617
|
+
PageIncrementModel: self.create_page_increment,
|
|
618
|
+
ParentStreamConfigModel: self.create_parent_stream_config,
|
|
619
|
+
RecordFilterModel: self.create_record_filter,
|
|
620
|
+
RecordSelectorModel: self.create_record_selector,
|
|
621
|
+
RemoveFieldsModel: self.create_remove_fields,
|
|
622
|
+
RequestPathModel: self.create_request_path,
|
|
623
|
+
RequestOptionModel: self.create_request_option,
|
|
624
|
+
LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator,
|
|
625
|
+
SelectiveAuthenticatorModel: self.create_selective_authenticator,
|
|
626
|
+
SimpleRetrieverModel: self.create_simple_retriever,
|
|
627
|
+
StateDelegatingStreamModel: self.create_state_delegating_stream,
|
|
628
|
+
SpecModel: self.create_spec,
|
|
629
|
+
SubstreamPartitionRouterModel: self.create_substream_partition_router,
|
|
630
|
+
WaitTimeFromHeaderModel: self.create_wait_time_from_header,
|
|
631
|
+
WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
|
|
632
|
+
AsyncRetrieverModel: self.create_async_retriever,
|
|
633
|
+
HttpComponentsResolverModel: self.create_http_components_resolver,
|
|
634
|
+
ConfigComponentsResolverModel: self.create_config_components_resolver,
|
|
635
|
+
StreamConfigModel: self.create_stream_config,
|
|
636
|
+
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
|
637
|
+
ZipfileDecoderModel: self.create_zipfile_decoder,
|
|
638
|
+
HTTPAPIBudgetModel: self.create_http_api_budget,
|
|
639
|
+
FileUploaderModel: self.create_file_uploader,
|
|
640
|
+
FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
|
|
641
|
+
MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
|
|
642
|
+
UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
|
|
643
|
+
RateModel: self.create_rate,
|
|
644
|
+
HttpRequestRegexMatcherModel: self.create_http_request_matcher,
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
|
648
|
+
self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR}
|
|
649
|
+
|
|
650
|
+
def create_component(
|
|
651
|
+
self,
|
|
652
|
+
model_type: Type[BaseModel],
|
|
653
|
+
component_definition: ComponentDefinition,
|
|
654
|
+
config: Config,
|
|
655
|
+
**kwargs: Any,
|
|
656
|
+
) -> Any:
|
|
657
|
+
"""
|
|
658
|
+
Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and
|
|
659
|
+
subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating
|
|
660
|
+
creating declarative components from that model.
|
|
661
|
+
|
|
662
|
+
:param model_type: The type of declarative component that is being initialized
|
|
663
|
+
:param component_definition: The mapping that represents a declarative component
|
|
664
|
+
:param config: The connector config that is provided by the customer
|
|
665
|
+
:return: The declarative component to be used at runtime
|
|
666
|
+
"""
|
|
667
|
+
|
|
668
|
+
component_type = component_definition.get("type")
|
|
669
|
+
if component_definition.get("type") != model_type.__name__:
|
|
670
|
+
raise ValueError(
|
|
671
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
declarative_component_model = model_type.parse_obj(component_definition)
|
|
675
|
+
|
|
676
|
+
if not isinstance(declarative_component_model, model_type):
|
|
677
|
+
raise ValueError(
|
|
678
|
+
f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}"
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
return self._create_component_from_model(
|
|
682
|
+
model=declarative_component_model, config=config, **kwargs
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any:
|
|
686
|
+
if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR:
|
|
687
|
+
raise ValueError(
|
|
688
|
+
f"{model.__class__} with attributes {model} is not a valid component type"
|
|
689
|
+
)
|
|
690
|
+
component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__)
|
|
691
|
+
if not component_constructor:
|
|
692
|
+
raise ValueError(f"Could not find constructor for {model.__class__}")
|
|
693
|
+
return component_constructor(model=model, config=config, **kwargs)
|
|
694
|
+
|
|
695
|
+
@staticmethod
|
|
696
|
+
def create_added_field_definition(
|
|
697
|
+
model: AddedFieldDefinitionModel, config: Config, **kwargs: Any
|
|
698
|
+
) -> AddedFieldDefinition:
|
|
699
|
+
interpolated_value = InterpolatedString.create(
|
|
700
|
+
model.value, parameters=model.parameters or {}
|
|
701
|
+
)
|
|
702
|
+
return AddedFieldDefinition(
|
|
703
|
+
path=model.path,
|
|
704
|
+
value=interpolated_value,
|
|
705
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
|
|
706
|
+
parameters=model.parameters or {},
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields:
|
|
710
|
+
added_field_definitions = [
|
|
711
|
+
self._create_component_from_model(
|
|
712
|
+
model=added_field_definition_model,
|
|
713
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(
|
|
714
|
+
added_field_definition_model.value_type
|
|
715
|
+
),
|
|
716
|
+
config=config,
|
|
717
|
+
)
|
|
718
|
+
for added_field_definition_model in model.fields
|
|
719
|
+
]
|
|
720
|
+
return AddFields(
|
|
721
|
+
fields=added_field_definitions,
|
|
722
|
+
condition=model.condition or "",
|
|
723
|
+
parameters=model.parameters or {},
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
def create_keys_to_lower_transformation(
|
|
727
|
+
self, model: KeysToLowerModel, config: Config, **kwargs: Any
|
|
728
|
+
) -> KeysToLowerTransformation:
|
|
729
|
+
return KeysToLowerTransformation()
|
|
730
|
+
|
|
731
|
+
def create_keys_to_snake_transformation(
|
|
732
|
+
self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
|
|
733
|
+
) -> KeysToSnakeCaseTransformation:
|
|
734
|
+
return KeysToSnakeCaseTransformation()
|
|
735
|
+
|
|
736
|
+
def create_keys_replace_transformation(
|
|
737
|
+
self, model: KeysReplaceModel, config: Config, **kwargs: Any
|
|
738
|
+
) -> KeysReplaceTransformation:
|
|
739
|
+
return KeysReplaceTransformation(
|
|
740
|
+
old=model.old, new=model.new, parameters=model.parameters or {}
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
def create_flatten_fields(
|
|
744
|
+
self, model: FlattenFieldsModel, config: Config, **kwargs: Any
|
|
745
|
+
) -> FlattenFields:
|
|
746
|
+
return FlattenFields(
|
|
747
|
+
flatten_lists=model.flatten_lists if model.flatten_lists is not None else True
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
def create_dpath_flatten_fields(
|
|
751
|
+
self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any
|
|
752
|
+
) -> DpathFlattenFields:
|
|
753
|
+
model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
|
|
754
|
+
return DpathFlattenFields(
|
|
755
|
+
config=config,
|
|
756
|
+
field_path=model_field_path,
|
|
757
|
+
delete_origin_value=model.delete_origin_value
|
|
758
|
+
if model.delete_origin_value is not None
|
|
759
|
+
else False,
|
|
760
|
+
replace_record=model.replace_record if model.replace_record is not None else False,
|
|
761
|
+
parameters=model.parameters or {},
|
|
762
|
+
)
|
|
763
|
+
|
|
764
|
+
@staticmethod
|
|
765
|
+
def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
|
|
766
|
+
if not value_type:
|
|
767
|
+
return None
|
|
768
|
+
names_to_types = {
|
|
769
|
+
ValueType.string: str,
|
|
770
|
+
ValueType.number: float,
|
|
771
|
+
ValueType.integer: int,
|
|
772
|
+
ValueType.boolean: bool,
|
|
773
|
+
}
|
|
774
|
+
return names_to_types[value_type]
|
|
775
|
+
|
|
776
|
+
def create_api_key_authenticator(
|
|
777
|
+
self,
|
|
778
|
+
model: ApiKeyAuthenticatorModel,
|
|
779
|
+
config: Config,
|
|
780
|
+
token_provider: Optional[TokenProvider] = None,
|
|
781
|
+
**kwargs: Any,
|
|
782
|
+
) -> ApiKeyAuthenticator:
|
|
783
|
+
if model.inject_into is None and model.header is None:
|
|
784
|
+
raise ValueError(
|
|
785
|
+
"Expected either inject_into or header to be set for ApiKeyAuthenticator"
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
if model.inject_into is not None and model.header is not None:
|
|
789
|
+
raise ValueError(
|
|
790
|
+
"inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option"
|
|
791
|
+
)
|
|
792
|
+
|
|
793
|
+
if token_provider is not None and model.api_token != "":
|
|
794
|
+
raise ValueError(
|
|
795
|
+
"If token_provider is set, api_token is ignored and has to be set to empty string."
|
|
796
|
+
)
|
|
797
|
+
|
|
798
|
+
request_option = (
|
|
799
|
+
self._create_component_from_model(
|
|
800
|
+
model.inject_into, config, parameters=model.parameters or {}
|
|
801
|
+
)
|
|
802
|
+
if model.inject_into
|
|
803
|
+
else RequestOption(
|
|
804
|
+
inject_into=RequestOptionType.header,
|
|
805
|
+
field_name=model.header or "",
|
|
806
|
+
parameters=model.parameters or {},
|
|
807
|
+
)
|
|
808
|
+
)
|
|
809
|
+
|
|
810
|
+
return ApiKeyAuthenticator(
|
|
811
|
+
token_provider=(
|
|
812
|
+
token_provider
|
|
813
|
+
if token_provider is not None
|
|
814
|
+
else InterpolatedStringTokenProvider(
|
|
815
|
+
api_token=model.api_token or "",
|
|
816
|
+
config=config,
|
|
817
|
+
parameters=model.parameters or {},
|
|
818
|
+
)
|
|
819
|
+
),
|
|
820
|
+
request_option=request_option,
|
|
821
|
+
config=config,
|
|
822
|
+
parameters=model.parameters or {},
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
def create_legacy_to_per_partition_state_migration(
|
|
826
|
+
self,
|
|
827
|
+
model: LegacyToPerPartitionStateMigrationModel,
|
|
828
|
+
config: Mapping[str, Any],
|
|
829
|
+
declarative_stream: DeclarativeStreamModel,
|
|
830
|
+
) -> LegacyToPerPartitionStateMigration:
|
|
831
|
+
retriever = declarative_stream.retriever
|
|
832
|
+
if not isinstance(retriever, SimpleRetrieverModel):
|
|
833
|
+
raise ValueError(
|
|
834
|
+
f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}"
|
|
835
|
+
)
|
|
836
|
+
partition_router = retriever.partition_router
|
|
837
|
+
if not isinstance(
|
|
838
|
+
partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel)
|
|
839
|
+
):
|
|
840
|
+
raise ValueError(
|
|
841
|
+
f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}"
|
|
842
|
+
)
|
|
843
|
+
if not hasattr(partition_router, "parent_stream_configs"):
|
|
844
|
+
raise ValueError(
|
|
845
|
+
"LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration."
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
if not hasattr(declarative_stream, "incremental_sync"):
|
|
849
|
+
raise ValueError(
|
|
850
|
+
"LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration."
|
|
851
|
+
)
|
|
852
|
+
|
|
853
|
+
return LegacyToPerPartitionStateMigration(
|
|
854
|
+
partition_router, # type: ignore # was already checked above
|
|
855
|
+
declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams.
|
|
856
|
+
config,
|
|
857
|
+
declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
|
|
858
|
+
)
|
|
859
|
+
|
|
860
|
+
def create_session_token_authenticator(
|
|
861
|
+
self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any
|
|
862
|
+
) -> Union[ApiKeyAuthenticator, BearerAuthenticator]:
|
|
863
|
+
decoder = (
|
|
864
|
+
self._create_component_from_model(model=model.decoder, config=config)
|
|
865
|
+
if model.decoder
|
|
866
|
+
else JsonDecoder(parameters={})
|
|
867
|
+
)
|
|
868
|
+
login_requester = self._create_component_from_model(
|
|
869
|
+
model=model.login_requester,
|
|
870
|
+
config=config,
|
|
871
|
+
name=f"{name}_login_requester",
|
|
872
|
+
decoder=decoder,
|
|
873
|
+
)
|
|
874
|
+
token_provider = SessionTokenProvider(
|
|
875
|
+
login_requester=login_requester,
|
|
876
|
+
session_token_path=model.session_token_path,
|
|
877
|
+
expiration_duration=parse_duration(model.expiration_duration)
|
|
878
|
+
if model.expiration_duration
|
|
879
|
+
else None,
|
|
880
|
+
parameters=model.parameters or {},
|
|
881
|
+
message_repository=self._message_repository,
|
|
882
|
+
decoder=decoder,
|
|
883
|
+
)
|
|
884
|
+
if model.request_authentication.type == "Bearer":
|
|
885
|
+
return ModelToComponentFactory.create_bearer_authenticator(
|
|
886
|
+
BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value
|
|
887
|
+
config,
|
|
888
|
+
token_provider=token_provider,
|
|
889
|
+
)
|
|
890
|
+
else:
|
|
891
|
+
return self.create_api_key_authenticator(
|
|
892
|
+
ApiKeyAuthenticatorModel(
|
|
893
|
+
type="ApiKeyAuthenticator",
|
|
894
|
+
api_token="",
|
|
895
|
+
inject_into=model.request_authentication.inject_into,
|
|
896
|
+
), # type: ignore # $parameters and headers default to None
|
|
897
|
+
config=config,
|
|
898
|
+
token_provider=token_provider,
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
@staticmethod
|
|
902
|
+
def create_basic_http_authenticator(
|
|
903
|
+
model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any
|
|
904
|
+
) -> BasicHttpAuthenticator:
|
|
905
|
+
return BasicHttpAuthenticator(
|
|
906
|
+
password=model.password or "",
|
|
907
|
+
username=model.username,
|
|
908
|
+
config=config,
|
|
909
|
+
parameters=model.parameters or {},
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
@staticmethod
|
|
913
|
+
def create_bearer_authenticator(
|
|
914
|
+
model: BearerAuthenticatorModel,
|
|
915
|
+
config: Config,
|
|
916
|
+
token_provider: Optional[TokenProvider] = None,
|
|
917
|
+
**kwargs: Any,
|
|
918
|
+
) -> BearerAuthenticator:
|
|
919
|
+
if token_provider is not None and model.api_token != "":
|
|
920
|
+
raise ValueError(
|
|
921
|
+
"If token_provider is set, api_token is ignored and has to be set to empty string."
|
|
922
|
+
)
|
|
923
|
+
return BearerAuthenticator(
|
|
924
|
+
token_provider=(
|
|
925
|
+
token_provider
|
|
926
|
+
if token_provider is not None
|
|
927
|
+
else InterpolatedStringTokenProvider(
|
|
928
|
+
api_token=model.api_token or "",
|
|
929
|
+
config=config,
|
|
930
|
+
parameters=model.parameters or {},
|
|
931
|
+
)
|
|
932
|
+
),
|
|
933
|
+
config=config,
|
|
934
|
+
parameters=model.parameters or {},
|
|
935
|
+
)
|
|
936
|
+
|
|
937
|
+
@staticmethod
|
|
938
|
+
def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream:
|
|
939
|
+
return CheckStream(stream_names=model.stream_names, parameters={})
|
|
940
|
+
|
|
941
|
+
@staticmethod
|
|
942
|
+
def create_check_dynamic_stream(
|
|
943
|
+
model: CheckDynamicStreamModel, config: Config, **kwargs: Any
|
|
944
|
+
) -> CheckDynamicStream:
|
|
945
|
+
assert model.use_check_availability is not None # for mypy
|
|
946
|
+
|
|
947
|
+
use_check_availability = model.use_check_availability
|
|
948
|
+
|
|
949
|
+
return CheckDynamicStream(
|
|
950
|
+
stream_count=model.stream_count,
|
|
951
|
+
use_check_availability=use_check_availability,
|
|
952
|
+
parameters={},
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
def create_composite_error_handler(
|
|
956
|
+
self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
|
|
957
|
+
) -> CompositeErrorHandler:
|
|
958
|
+
error_handlers = [
|
|
959
|
+
self._create_component_from_model(model=error_handler_model, config=config)
|
|
960
|
+
for error_handler_model in model.error_handlers
|
|
961
|
+
]
|
|
962
|
+
return CompositeErrorHandler(
|
|
963
|
+
error_handlers=error_handlers, parameters=model.parameters or {}
|
|
964
|
+
)
|
|
965
|
+
|
|
966
|
+
@staticmethod
|
|
967
|
+
def create_concurrency_level(
|
|
968
|
+
model: ConcurrencyLevelModel, config: Config, **kwargs: Any
|
|
969
|
+
) -> ConcurrencyLevel:
|
|
970
|
+
return ConcurrencyLevel(
|
|
971
|
+
default_concurrency=model.default_concurrency,
|
|
972
|
+
max_concurrency=model.max_concurrency,
|
|
973
|
+
config=config,
|
|
974
|
+
parameters={},
|
|
975
|
+
)
|
|
976
|
+
|
|
977
|
+
@staticmethod
|
|
978
|
+
def apply_stream_state_migrations(
|
|
979
|
+
stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any]
|
|
980
|
+
) -> MutableMapping[str, Any]:
|
|
981
|
+
if stream_state_migrations:
|
|
982
|
+
for state_migration in stream_state_migrations:
|
|
983
|
+
if state_migration.should_migrate(stream_state):
|
|
984
|
+
# The state variable is expected to be mutable but the migrate method returns an immutable mapping.
|
|
985
|
+
stream_state = dict(state_migration.migrate(stream_state))
|
|
986
|
+
return stream_state
|
|
987
|
+
|
|
988
|
+
def create_concurrent_cursor_from_datetime_based_cursor(
|
|
989
|
+
self,
|
|
990
|
+
model_type: Type[BaseModel],
|
|
991
|
+
component_definition: ComponentDefinition,
|
|
992
|
+
stream_name: str,
|
|
993
|
+
stream_namespace: Optional[str],
|
|
994
|
+
config: Config,
|
|
995
|
+
message_repository: Optional[MessageRepository] = None,
|
|
996
|
+
runtime_lookback_window: Optional[datetime.timedelta] = None,
|
|
997
|
+
stream_state_migrations: Optional[List[Any]] = None,
|
|
998
|
+
**kwargs: Any,
|
|
999
|
+
) -> ConcurrentCursor:
|
|
1000
|
+
# Per-partition incremental streams can dynamically create child cursors which will pass their current
|
|
1001
|
+
# state via the stream_state keyword argument. Incremental syncs without parent streams use the
|
|
1002
|
+
# incoming state and connector_state_manager that is initialized when the component factory is created
|
|
1003
|
+
stream_state = (
|
|
1004
|
+
self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
|
|
1005
|
+
if "stream_state" not in kwargs
|
|
1006
|
+
else kwargs["stream_state"]
|
|
1007
|
+
)
|
|
1008
|
+
stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
|
|
1009
|
+
|
|
1010
|
+
component_type = component_definition.get("type")
|
|
1011
|
+
if component_definition.get("type") != model_type.__name__:
|
|
1012
|
+
raise ValueError(
|
|
1013
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
|
1014
|
+
)
|
|
1015
|
+
|
|
1016
|
+
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
|
1017
|
+
|
|
1018
|
+
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
|
1019
|
+
raise ValueError(
|
|
1020
|
+
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
|
1021
|
+
)
|
|
1022
|
+
|
|
1023
|
+
interpolated_cursor_field = InterpolatedString.create(
|
|
1024
|
+
datetime_based_cursor_model.cursor_field,
|
|
1025
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
|
1026
|
+
)
|
|
1027
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
|
1028
|
+
|
|
1029
|
+
interpolated_partition_field_start = InterpolatedString.create(
|
|
1030
|
+
datetime_based_cursor_model.partition_field_start or "start_time",
|
|
1031
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
|
1032
|
+
)
|
|
1033
|
+
interpolated_partition_field_end = InterpolatedString.create(
|
|
1034
|
+
datetime_based_cursor_model.partition_field_end or "end_time",
|
|
1035
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
|
1036
|
+
)
|
|
1037
|
+
|
|
1038
|
+
slice_boundary_fields = (
|
|
1039
|
+
interpolated_partition_field_start.eval(config=config),
|
|
1040
|
+
interpolated_partition_field_end.eval(config=config),
|
|
1041
|
+
)
|
|
1042
|
+
|
|
1043
|
+
datetime_format = datetime_based_cursor_model.datetime_format
|
|
1044
|
+
|
|
1045
|
+
cursor_granularity = (
|
|
1046
|
+
parse_duration(datetime_based_cursor_model.cursor_granularity)
|
|
1047
|
+
if datetime_based_cursor_model.cursor_granularity
|
|
1048
|
+
else None
|
|
1049
|
+
)
|
|
1050
|
+
|
|
1051
|
+
lookback_window = None
|
|
1052
|
+
interpolated_lookback_window = (
|
|
1053
|
+
InterpolatedString.create(
|
|
1054
|
+
datetime_based_cursor_model.lookback_window,
|
|
1055
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
|
1056
|
+
)
|
|
1057
|
+
if datetime_based_cursor_model.lookback_window
|
|
1058
|
+
else None
|
|
1059
|
+
)
|
|
1060
|
+
if interpolated_lookback_window:
|
|
1061
|
+
evaluated_lookback_window = interpolated_lookback_window.eval(config=config)
|
|
1062
|
+
if evaluated_lookback_window:
|
|
1063
|
+
lookback_window = parse_duration(evaluated_lookback_window)
|
|
1064
|
+
|
|
1065
|
+
connector_state_converter: DateTimeStreamStateConverter
|
|
1066
|
+
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
|
1067
|
+
datetime_format=datetime_format,
|
|
1068
|
+
input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
|
|
1069
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
|
1070
|
+
cursor_granularity=cursor_granularity,
|
|
1071
|
+
)
|
|
1072
|
+
|
|
1073
|
+
# Adjusts the stream state by applying the runtime lookback window.
|
|
1074
|
+
# This is used to ensure correct state handling in case of failed partitions.
|
|
1075
|
+
stream_state_value = stream_state.get(cursor_field.cursor_field_key)
|
|
1076
|
+
if runtime_lookback_window and stream_state_value:
|
|
1077
|
+
new_stream_state = (
|
|
1078
|
+
connector_state_converter.parse_timestamp(stream_state_value)
|
|
1079
|
+
- runtime_lookback_window
|
|
1080
|
+
)
|
|
1081
|
+
stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
|
|
1082
|
+
new_stream_state
|
|
1083
|
+
)
|
|
1084
|
+
|
|
1085
|
+
start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
|
|
1086
|
+
if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
|
|
1087
|
+
start_date_runtime_value = self.create_min_max_datetime(
|
|
1088
|
+
model=datetime_based_cursor_model.start_datetime, config=config
|
|
1089
|
+
)
|
|
1090
|
+
else:
|
|
1091
|
+
start_date_runtime_value = datetime_based_cursor_model.start_datetime
|
|
1092
|
+
|
|
1093
|
+
end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]]
|
|
1094
|
+
if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel):
|
|
1095
|
+
end_date_runtime_value = self.create_min_max_datetime(
|
|
1096
|
+
model=datetime_based_cursor_model.end_datetime, config=config
|
|
1097
|
+
)
|
|
1098
|
+
else:
|
|
1099
|
+
end_date_runtime_value = datetime_based_cursor_model.end_datetime
|
|
1100
|
+
|
|
1101
|
+
interpolated_start_date = MinMaxDatetime.create(
|
|
1102
|
+
interpolated_string_or_min_max_datetime=start_date_runtime_value,
|
|
1103
|
+
parameters=datetime_based_cursor_model.parameters,
|
|
1104
|
+
)
|
|
1105
|
+
interpolated_end_date = (
|
|
1106
|
+
None
|
|
1107
|
+
if not end_date_runtime_value
|
|
1108
|
+
else MinMaxDatetime.create(
|
|
1109
|
+
end_date_runtime_value, datetime_based_cursor_model.parameters
|
|
1110
|
+
)
|
|
1111
|
+
)
|
|
1112
|
+
|
|
1113
|
+
# If datetime format is not specified then start/end datetime should inherit it from the stream slicer
|
|
1114
|
+
if not interpolated_start_date.datetime_format:
|
|
1115
|
+
interpolated_start_date.datetime_format = datetime_format
|
|
1116
|
+
if interpolated_end_date and not interpolated_end_date.datetime_format:
|
|
1117
|
+
interpolated_end_date.datetime_format = datetime_format
|
|
1118
|
+
|
|
1119
|
+
start_date = interpolated_start_date.get_datetime(config=config)
|
|
1120
|
+
end_date_provider = (
|
|
1121
|
+
partial(interpolated_end_date.get_datetime, config)
|
|
1122
|
+
if interpolated_end_date
|
|
1123
|
+
else connector_state_converter.get_end_provider()
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
if (
|
|
1127
|
+
datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity
|
|
1128
|
+
) or (
|
|
1129
|
+
not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity
|
|
1130
|
+
):
|
|
1131
|
+
raise ValueError(
|
|
1132
|
+
f"If step is defined, cursor_granularity should be as well and vice-versa. "
|
|
1133
|
+
f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`"
|
|
1134
|
+
)
|
|
1135
|
+
|
|
1136
|
+
# When step is not defined, default to a step size from the starting date to the present moment
|
|
1137
|
+
step_length = datetime.timedelta.max
|
|
1138
|
+
interpolated_step = (
|
|
1139
|
+
InterpolatedString.create(
|
|
1140
|
+
datetime_based_cursor_model.step,
|
|
1141
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
|
1142
|
+
)
|
|
1143
|
+
if datetime_based_cursor_model.step
|
|
1144
|
+
else None
|
|
1145
|
+
)
|
|
1146
|
+
if interpolated_step:
|
|
1147
|
+
evaluated_step = interpolated_step.eval(config)
|
|
1148
|
+
if evaluated_step:
|
|
1149
|
+
step_length = parse_duration(evaluated_step)
|
|
1150
|
+
|
|
1151
|
+
clamping_strategy: ClampingStrategy = NoClamping()
|
|
1152
|
+
if datetime_based_cursor_model.clamping:
|
|
1153
|
+
# While it is undesirable to interpolate within the model factory (as opposed to at runtime),
|
|
1154
|
+
# it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
|
|
1155
|
+
# object which we want to keep agnostic of being low-code
|
|
1156
|
+
target = InterpolatedString(
|
|
1157
|
+
string=datetime_based_cursor_model.clamping.target,
|
|
1158
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
|
1159
|
+
)
|
|
1160
|
+
evaluated_target = target.eval(config=config)
|
|
1161
|
+
match evaluated_target:
|
|
1162
|
+
case "DAY":
|
|
1163
|
+
clamping_strategy = DayClampingStrategy()
|
|
1164
|
+
end_date_provider = ClampingEndProvider(
|
|
1165
|
+
DayClampingStrategy(is_ceiling=False),
|
|
1166
|
+
end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
|
1167
|
+
granularity=cursor_granularity or datetime.timedelta(seconds=1),
|
|
1168
|
+
)
|
|
1169
|
+
case "WEEK":
|
|
1170
|
+
if (
|
|
1171
|
+
not datetime_based_cursor_model.clamping.target_details
|
|
1172
|
+
or "weekday" not in datetime_based_cursor_model.clamping.target_details
|
|
1173
|
+
):
|
|
1174
|
+
raise ValueError(
|
|
1175
|
+
"Given WEEK clamping, weekday needs to be provided as target_details"
|
|
1176
|
+
)
|
|
1177
|
+
weekday = self._assemble_weekday(
|
|
1178
|
+
datetime_based_cursor_model.clamping.target_details["weekday"]
|
|
1179
|
+
)
|
|
1180
|
+
clamping_strategy = WeekClampingStrategy(weekday)
|
|
1181
|
+
end_date_provider = ClampingEndProvider(
|
|
1182
|
+
WeekClampingStrategy(weekday, is_ceiling=False),
|
|
1183
|
+
end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
|
1184
|
+
granularity=cursor_granularity or datetime.timedelta(days=1),
|
|
1185
|
+
)
|
|
1186
|
+
case "MONTH":
|
|
1187
|
+
clamping_strategy = MonthClampingStrategy()
|
|
1188
|
+
end_date_provider = ClampingEndProvider(
|
|
1189
|
+
MonthClampingStrategy(is_ceiling=False),
|
|
1190
|
+
end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
|
1191
|
+
granularity=cursor_granularity or datetime.timedelta(days=1),
|
|
1192
|
+
)
|
|
1193
|
+
case _:
|
|
1194
|
+
raise ValueError(
|
|
1195
|
+
f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
|
|
1196
|
+
)
|
|
1197
|
+
|
|
1198
|
+
return ConcurrentCursor(
|
|
1199
|
+
stream_name=stream_name,
|
|
1200
|
+
stream_namespace=stream_namespace,
|
|
1201
|
+
stream_state=stream_state,
|
|
1202
|
+
message_repository=message_repository or self._message_repository,
|
|
1203
|
+
connector_state_manager=self._connector_state_manager,
|
|
1204
|
+
connector_state_converter=connector_state_converter,
|
|
1205
|
+
cursor_field=cursor_field,
|
|
1206
|
+
slice_boundary_fields=slice_boundary_fields,
|
|
1207
|
+
start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
|
1208
|
+
end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
|
1209
|
+
lookback_window=lookback_window,
|
|
1210
|
+
slice_range=step_length,
|
|
1211
|
+
cursor_granularity=cursor_granularity,
|
|
1212
|
+
clamping_strategy=clamping_strategy,
|
|
1213
|
+
)
|
|
1214
|
+
|
|
1215
|
+
def create_concurrent_cursor_from_incrementing_count_cursor(
|
|
1216
|
+
self,
|
|
1217
|
+
model_type: Type[BaseModel],
|
|
1218
|
+
component_definition: ComponentDefinition,
|
|
1219
|
+
stream_name: str,
|
|
1220
|
+
stream_namespace: Optional[str],
|
|
1221
|
+
config: Config,
|
|
1222
|
+
message_repository: Optional[MessageRepository] = None,
|
|
1223
|
+
**kwargs: Any,
|
|
1224
|
+
) -> ConcurrentCursor:
|
|
1225
|
+
# Per-partition incremental streams can dynamically create child cursors which will pass their current
|
|
1226
|
+
# state via the stream_state keyword argument. Incremental syncs without parent streams use the
|
|
1227
|
+
# incoming state and connector_state_manager that is initialized when the component factory is created
|
|
1228
|
+
stream_state = (
|
|
1229
|
+
self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
|
|
1230
|
+
if "stream_state" not in kwargs
|
|
1231
|
+
else kwargs["stream_state"]
|
|
1232
|
+
)
|
|
1233
|
+
|
|
1234
|
+
component_type = component_definition.get("type")
|
|
1235
|
+
if component_definition.get("type") != model_type.__name__:
|
|
1236
|
+
raise ValueError(
|
|
1237
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
|
1238
|
+
)
|
|
1239
|
+
|
|
1240
|
+
incrementing_count_cursor_model = model_type.parse_obj(component_definition)
|
|
1241
|
+
|
|
1242
|
+
if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
|
|
1243
|
+
raise ValueError(
|
|
1244
|
+
f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
|
|
1245
|
+
)
|
|
1246
|
+
|
|
1247
|
+
interpolated_start_value = (
|
|
1248
|
+
InterpolatedString.create(
|
|
1249
|
+
incrementing_count_cursor_model.start_value, # type: ignore
|
|
1250
|
+
parameters=incrementing_count_cursor_model.parameters or {},
|
|
1251
|
+
)
|
|
1252
|
+
if incrementing_count_cursor_model.start_value
|
|
1253
|
+
else 0
|
|
1254
|
+
)
|
|
1255
|
+
|
|
1256
|
+
interpolated_cursor_field = InterpolatedString.create(
|
|
1257
|
+
incrementing_count_cursor_model.cursor_field,
|
|
1258
|
+
parameters=incrementing_count_cursor_model.parameters or {},
|
|
1259
|
+
)
|
|
1260
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
|
1261
|
+
|
|
1262
|
+
connector_state_converter = IncrementingCountStreamStateConverter(
|
|
1263
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
|
1264
|
+
)
|
|
1265
|
+
|
|
1266
|
+
return ConcurrentCursor(
|
|
1267
|
+
stream_name=stream_name,
|
|
1268
|
+
stream_namespace=stream_namespace,
|
|
1269
|
+
stream_state=stream_state,
|
|
1270
|
+
message_repository=message_repository or self._message_repository,
|
|
1271
|
+
connector_state_manager=self._connector_state_manager,
|
|
1272
|
+
connector_state_converter=connector_state_converter,
|
|
1273
|
+
cursor_field=cursor_field,
|
|
1274
|
+
slice_boundary_fields=None,
|
|
1275
|
+
start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
|
1276
|
+
end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
|
1277
|
+
)
|
|
1278
|
+
|
|
1279
|
+
def _assemble_weekday(self, weekday: str) -> Weekday:
|
|
1280
|
+
match weekday:
|
|
1281
|
+
case "MONDAY":
|
|
1282
|
+
return Weekday.MONDAY
|
|
1283
|
+
case "TUESDAY":
|
|
1284
|
+
return Weekday.TUESDAY
|
|
1285
|
+
case "WEDNESDAY":
|
|
1286
|
+
return Weekday.WEDNESDAY
|
|
1287
|
+
case "THURSDAY":
|
|
1288
|
+
return Weekday.THURSDAY
|
|
1289
|
+
case "FRIDAY":
|
|
1290
|
+
return Weekday.FRIDAY
|
|
1291
|
+
case "SATURDAY":
|
|
1292
|
+
return Weekday.SATURDAY
|
|
1293
|
+
case "SUNDAY":
|
|
1294
|
+
return Weekday.SUNDAY
|
|
1295
|
+
case _:
|
|
1296
|
+
raise ValueError(f"Unknown weekday {weekday}")
|
|
1297
|
+
|
|
1298
|
+
def create_concurrent_cursor_from_perpartition_cursor(
|
|
1299
|
+
self,
|
|
1300
|
+
state_manager: ConnectorStateManager,
|
|
1301
|
+
model_type: Type[BaseModel],
|
|
1302
|
+
component_definition: ComponentDefinition,
|
|
1303
|
+
stream_name: str,
|
|
1304
|
+
stream_namespace: Optional[str],
|
|
1305
|
+
config: Config,
|
|
1306
|
+
stream_state: MutableMapping[str, Any],
|
|
1307
|
+
partition_router: PartitionRouter,
|
|
1308
|
+
stream_state_migrations: Optional[List[Any]] = None,
|
|
1309
|
+
**kwargs: Any,
|
|
1310
|
+
) -> ConcurrentPerPartitionCursor:
|
|
1311
|
+
component_type = component_definition.get("type")
|
|
1312
|
+
if component_definition.get("type") != model_type.__name__:
|
|
1313
|
+
raise ValueError(
|
|
1314
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
|
1315
|
+
)
|
|
1316
|
+
|
|
1317
|
+
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
|
1318
|
+
|
|
1319
|
+
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
|
1320
|
+
raise ValueError(
|
|
1321
|
+
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
|
1322
|
+
)
|
|
1323
|
+
|
|
1324
|
+
interpolated_cursor_field = InterpolatedString.create(
|
|
1325
|
+
datetime_based_cursor_model.cursor_field,
|
|
1326
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
|
1327
|
+
)
|
|
1328
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
|
1329
|
+
|
|
1330
|
+
datetime_format = datetime_based_cursor_model.datetime_format
|
|
1331
|
+
|
|
1332
|
+
cursor_granularity = (
|
|
1333
|
+
parse_duration(datetime_based_cursor_model.cursor_granularity)
|
|
1334
|
+
if datetime_based_cursor_model.cursor_granularity
|
|
1335
|
+
else None
|
|
1336
|
+
)
|
|
1337
|
+
|
|
1338
|
+
connector_state_converter: DateTimeStreamStateConverter
|
|
1339
|
+
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
|
1340
|
+
datetime_format=datetime_format,
|
|
1341
|
+
input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
|
|
1342
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
|
1343
|
+
cursor_granularity=cursor_granularity,
|
|
1344
|
+
)
|
|
1345
|
+
|
|
1346
|
+
# Create the cursor factory
|
|
1347
|
+
cursor_factory = ConcurrentCursorFactory(
|
|
1348
|
+
partial(
|
|
1349
|
+
self.create_concurrent_cursor_from_datetime_based_cursor,
|
|
1350
|
+
state_manager=state_manager,
|
|
1351
|
+
model_type=model_type,
|
|
1352
|
+
component_definition=component_definition,
|
|
1353
|
+
stream_name=stream_name,
|
|
1354
|
+
stream_namespace=stream_namespace,
|
|
1355
|
+
config=config,
|
|
1356
|
+
message_repository=NoopMessageRepository(),
|
|
1357
|
+
stream_state_migrations=stream_state_migrations,
|
|
1358
|
+
)
|
|
1359
|
+
)
|
|
1360
|
+
stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
|
|
1361
|
+
|
|
1362
|
+
# Return the concurrent cursor and state converter
|
|
1363
|
+
return ConcurrentPerPartitionCursor(
|
|
1364
|
+
cursor_factory=cursor_factory,
|
|
1365
|
+
partition_router=partition_router,
|
|
1366
|
+
stream_name=stream_name,
|
|
1367
|
+
stream_namespace=stream_namespace,
|
|
1368
|
+
stream_state=stream_state,
|
|
1369
|
+
message_repository=self._message_repository, # type: ignore
|
|
1370
|
+
connector_state_manager=state_manager,
|
|
1371
|
+
connector_state_converter=connector_state_converter,
|
|
1372
|
+
cursor_field=cursor_field,
|
|
1373
|
+
)
|
|
1374
|
+
|
|
1375
|
+
@staticmethod
|
|
1376
|
+
def create_constant_backoff_strategy(
|
|
1377
|
+
model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
|
|
1378
|
+
) -> ConstantBackoffStrategy:
|
|
1379
|
+
return ConstantBackoffStrategy(
|
|
1380
|
+
backoff_time_in_seconds=model.backoff_time_in_seconds,
|
|
1381
|
+
config=config,
|
|
1382
|
+
parameters=model.parameters or {},
|
|
1383
|
+
)
|
|
1384
|
+
|
|
1385
|
+
def create_cursor_pagination(
|
|
1386
|
+
self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
|
|
1387
|
+
) -> CursorPaginationStrategy:
|
|
1388
|
+
if isinstance(decoder, PaginationDecoderDecorator):
|
|
1389
|
+
inner_decoder = decoder.decoder
|
|
1390
|
+
else:
|
|
1391
|
+
inner_decoder = decoder
|
|
1392
|
+
decoder = PaginationDecoderDecorator(decoder=decoder)
|
|
1393
|
+
|
|
1394
|
+
if self._is_supported_decoder_for_pagination(inner_decoder):
|
|
1395
|
+
decoder_to_use = decoder
|
|
1396
|
+
else:
|
|
1397
|
+
raise ValueError(
|
|
1398
|
+
self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
|
|
1399
|
+
)
|
|
1400
|
+
|
|
1401
|
+
return CursorPaginationStrategy(
|
|
1402
|
+
cursor_value=model.cursor_value,
|
|
1403
|
+
decoder=decoder_to_use,
|
|
1404
|
+
page_size=model.page_size,
|
|
1405
|
+
stop_condition=model.stop_condition,
|
|
1406
|
+
config=config,
|
|
1407
|
+
parameters=model.parameters or {},
|
|
1408
|
+
)
|
|
1409
|
+
|
|
1410
|
+
def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any:
|
|
1411
|
+
"""
|
|
1412
|
+
Generically creates a custom component based on the model type and a class_name reference to the custom Python class being
|
|
1413
|
+
instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
|
|
1414
|
+
:param model: The Pydantic model of the custom component being created
|
|
1415
|
+
:param config: The custom defined connector config
|
|
1416
|
+
:return: The declarative component built from the Pydantic model to be used at runtime
|
|
1417
|
+
"""
|
|
1418
|
+
custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
|
|
1419
|
+
component_fields = get_type_hints(custom_component_class)
|
|
1420
|
+
model_args = model.dict()
|
|
1421
|
+
model_args["config"] = config
|
|
1422
|
+
|
|
1423
|
+
# There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions
|
|
1424
|
+
# we defer to these arguments over the component's definition
|
|
1425
|
+
for key, arg in kwargs.items():
|
|
1426
|
+
model_args[key] = arg
|
|
1427
|
+
|
|
1428
|
+
# Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not
|
|
1429
|
+
# defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to
|
|
1430
|
+
# the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components
|
|
1431
|
+
for model_field, model_value in model_args.items():
|
|
1432
|
+
# If a custom component field doesn't have a type set, we try to use the type hints to infer the type
|
|
1433
|
+
if (
|
|
1434
|
+
isinstance(model_value, dict)
|
|
1435
|
+
and "type" not in model_value
|
|
1436
|
+
and model_field in component_fields
|
|
1437
|
+
):
|
|
1438
|
+
derived_type = self._derive_component_type_from_type_hints(
|
|
1439
|
+
component_fields.get(model_field)
|
|
1440
|
+
)
|
|
1441
|
+
if derived_type:
|
|
1442
|
+
model_value["type"] = derived_type
|
|
1443
|
+
|
|
1444
|
+
if self._is_component(model_value):
|
|
1445
|
+
model_args[model_field] = self._create_nested_component(
|
|
1446
|
+
model, model_field, model_value, config
|
|
1447
|
+
)
|
|
1448
|
+
elif isinstance(model_value, list):
|
|
1449
|
+
vals = []
|
|
1450
|
+
for v in model_value:
|
|
1451
|
+
if isinstance(v, dict) and "type" not in v and model_field in component_fields:
|
|
1452
|
+
derived_type = self._derive_component_type_from_type_hints(
|
|
1453
|
+
component_fields.get(model_field)
|
|
1454
|
+
)
|
|
1455
|
+
if derived_type:
|
|
1456
|
+
v["type"] = derived_type
|
|
1457
|
+
if self._is_component(v):
|
|
1458
|
+
vals.append(self._create_nested_component(model, model_field, v, config))
|
|
1459
|
+
else:
|
|
1460
|
+
vals.append(v)
|
|
1461
|
+
model_args[model_field] = vals
|
|
1462
|
+
|
|
1463
|
+
kwargs = {
|
|
1464
|
+
class_field: model_args[class_field]
|
|
1465
|
+
for class_field in component_fields.keys()
|
|
1466
|
+
if class_field in model_args
|
|
1467
|
+
}
|
|
1468
|
+
return custom_component_class(**kwargs)
|
|
1469
|
+
|
|
1470
|
+
@staticmethod
|
|
1471
|
+
def _get_class_from_fully_qualified_class_name(
|
|
1472
|
+
full_qualified_class_name: str,
|
|
1473
|
+
) -> Any:
|
|
1474
|
+
"""Get a class from its fully qualified name.
|
|
1475
|
+
|
|
1476
|
+
If a custom components module is needed, we assume it is already registered - probably
|
|
1477
|
+
as `source_declarative_manifest.components` or `components`.
|
|
1478
|
+
|
|
1479
|
+
Args:
|
|
1480
|
+
full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
|
|
1481
|
+
|
|
1482
|
+
Returns:
|
|
1483
|
+
Any: The class object.
|
|
1484
|
+
|
|
1485
|
+
Raises:
|
|
1486
|
+
ValueError: If the class cannot be loaded.
|
|
1487
|
+
"""
|
|
1488
|
+
split = full_qualified_class_name.split(".")
|
|
1489
|
+
module_name_full = ".".join(split[:-1])
|
|
1490
|
+
class_name = split[-1]
|
|
1491
|
+
|
|
1492
|
+
try:
|
|
1493
|
+
module_ref = importlib.import_module(module_name_full)
|
|
1494
|
+
except ModuleNotFoundError as e:
|
|
1495
|
+
if split[0] == "source_declarative_manifest":
|
|
1496
|
+
# During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append
|
|
1497
|
+
try:
|
|
1498
|
+
import os
|
|
1499
|
+
|
|
1500
|
+
module_name_with_source_declarative_manifest = ".".join(split[1:-1])
|
|
1501
|
+
module_ref = importlib.import_module(
|
|
1502
|
+
module_name_with_source_declarative_manifest
|
|
1503
|
+
)
|
|
1504
|
+
except ModuleNotFoundError:
|
|
1505
|
+
raise ValueError(f"Could not load module `{module_name_full}`.") from e
|
|
1506
|
+
else:
|
|
1507
|
+
raise ValueError(f"Could not load module `{module_name_full}`.") from e
|
|
1508
|
+
|
|
1509
|
+
try:
|
|
1510
|
+
return getattr(module_ref, class_name)
|
|
1511
|
+
except AttributeError as e:
|
|
1512
|
+
raise ValueError(
|
|
1513
|
+
f"Could not load class `{class_name}` from module `{module_name_full}`.",
|
|
1514
|
+
) from e
|
|
1515
|
+
|
|
1516
|
+
@staticmethod
|
|
1517
|
+
def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
|
|
1518
|
+
interface = field_type
|
|
1519
|
+
while True:
|
|
1520
|
+
origin = get_origin(interface)
|
|
1521
|
+
if origin:
|
|
1522
|
+
# Unnest types until we reach the raw type
|
|
1523
|
+
# List[T] -> T
|
|
1524
|
+
# Optional[List[T]] -> T
|
|
1525
|
+
args = get_args(interface)
|
|
1526
|
+
interface = args[0]
|
|
1527
|
+
else:
|
|
1528
|
+
break
|
|
1529
|
+
if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface):
|
|
1530
|
+
return interface.__name__
|
|
1531
|
+
return None
|
|
1532
|
+
|
|
1533
|
+
@staticmethod
|
|
1534
|
+
def is_builtin_type(cls: Optional[Type[Any]]) -> bool:
|
|
1535
|
+
if not cls:
|
|
1536
|
+
return False
|
|
1537
|
+
return cls.__module__ == "builtins"
|
|
1538
|
+
|
|
1539
|
+
@staticmethod
|
|
1540
|
+
def _extract_missing_parameters(error: TypeError) -> List[str]:
|
|
1541
|
+
parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error))
|
|
1542
|
+
if parameter_search:
|
|
1543
|
+
return re.findall(r"\'(.+?)\'", parameter_search.group(1))
|
|
1544
|
+
else:
|
|
1545
|
+
return []
|
|
1546
|
+
|
|
1547
|
+
def _create_nested_component(
|
|
1548
|
+
self, model: Any, model_field: str, model_value: Any, config: Config
|
|
1549
|
+
) -> Any:
|
|
1550
|
+
type_name = model_value.get("type", None)
|
|
1551
|
+
if not type_name:
|
|
1552
|
+
# If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent
|
|
1553
|
+
return model_value
|
|
1554
|
+
|
|
1555
|
+
model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None)
|
|
1556
|
+
if model_type:
|
|
1557
|
+
parsed_model = model_type.parse_obj(model_value)
|
|
1558
|
+
try:
|
|
1559
|
+
# To improve usability of the language, certain fields are shared between components. This can come in the form of
|
|
1560
|
+
# a parent component passing some of its fields to a child component or the parent extracting fields from other child
|
|
1561
|
+
# components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base
|
|
1562
|
+
# while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created
|
|
1563
|
+
# generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that
|
|
1564
|
+
# are needed by a component and could not be shared.
|
|
1565
|
+
model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__)
|
|
1566
|
+
constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs
|
|
1567
|
+
model_parameters = model_value.get("$parameters", {})
|
|
1568
|
+
matching_parameters = {
|
|
1569
|
+
kwarg: model_parameters[kwarg]
|
|
1570
|
+
for kwarg in constructor_kwargs
|
|
1571
|
+
if kwarg in model_parameters
|
|
1572
|
+
}
|
|
1573
|
+
return self._create_component_from_model(
|
|
1574
|
+
model=parsed_model, config=config, **matching_parameters
|
|
1575
|
+
)
|
|
1576
|
+
except TypeError as error:
|
|
1577
|
+
missing_parameters = self._extract_missing_parameters(error)
|
|
1578
|
+
if missing_parameters:
|
|
1579
|
+
raise ValueError(
|
|
1580
|
+
f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide "
|
|
1581
|
+
+ ", ".join(
|
|
1582
|
+
(
|
|
1583
|
+
f"{type_name}.$parameters.{parameter}"
|
|
1584
|
+
for parameter in missing_parameters
|
|
1585
|
+
)
|
|
1586
|
+
)
|
|
1587
|
+
)
|
|
1588
|
+
raise TypeError(
|
|
1589
|
+
f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}"
|
|
1590
|
+
)
|
|
1591
|
+
else:
|
|
1592
|
+
raise ValueError(
|
|
1593
|
+
f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'"
|
|
1594
|
+
)
|
|
1595
|
+
|
|
1596
|
+
@staticmethod
|
|
1597
|
+
def _is_component(model_value: Any) -> bool:
|
|
1598
|
+
return isinstance(model_value, dict) and model_value.get("type") is not None
|
|
1599
|
+
|
|
1600
|
+
def create_datetime_based_cursor(
|
|
1601
|
+
self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any
|
|
1602
|
+
) -> DatetimeBasedCursor:
|
|
1603
|
+
start_datetime: Union[str, MinMaxDatetime] = (
|
|
1604
|
+
model.start_datetime
|
|
1605
|
+
if isinstance(model.start_datetime, str)
|
|
1606
|
+
else self.create_min_max_datetime(model.start_datetime, config)
|
|
1607
|
+
)
|
|
1608
|
+
end_datetime: Union[str, MinMaxDatetime, None] = None
|
|
1609
|
+
if model.is_data_feed and model.end_datetime:
|
|
1610
|
+
raise ValueError("Data feed does not support end_datetime")
|
|
1611
|
+
if model.is_data_feed and model.is_client_side_incremental:
|
|
1612
|
+
raise ValueError(
|
|
1613
|
+
"`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them."
|
|
1614
|
+
)
|
|
1615
|
+
if model.end_datetime:
|
|
1616
|
+
end_datetime = (
|
|
1617
|
+
model.end_datetime
|
|
1618
|
+
if isinstance(model.end_datetime, str)
|
|
1619
|
+
else self.create_min_max_datetime(model.end_datetime, config)
|
|
1620
|
+
)
|
|
1621
|
+
|
|
1622
|
+
end_time_option = (
|
|
1623
|
+
self._create_component_from_model(
|
|
1624
|
+
model.end_time_option, config, parameters=model.parameters or {}
|
|
1625
|
+
)
|
|
1626
|
+
if model.end_time_option
|
|
1627
|
+
else None
|
|
1628
|
+
)
|
|
1629
|
+
start_time_option = (
|
|
1630
|
+
self._create_component_from_model(
|
|
1631
|
+
model.start_time_option, config, parameters=model.parameters or {}
|
|
1632
|
+
)
|
|
1633
|
+
if model.start_time_option
|
|
1634
|
+
else None
|
|
1635
|
+
)
|
|
1636
|
+
|
|
1637
|
+
return DatetimeBasedCursor(
|
|
1638
|
+
cursor_field=model.cursor_field,
|
|
1639
|
+
cursor_datetime_formats=model.cursor_datetime_formats
|
|
1640
|
+
if model.cursor_datetime_formats
|
|
1641
|
+
else [],
|
|
1642
|
+
cursor_granularity=model.cursor_granularity,
|
|
1643
|
+
datetime_format=model.datetime_format,
|
|
1644
|
+
end_datetime=end_datetime,
|
|
1645
|
+
start_datetime=start_datetime,
|
|
1646
|
+
step=model.step,
|
|
1647
|
+
end_time_option=end_time_option,
|
|
1648
|
+
lookback_window=model.lookback_window,
|
|
1649
|
+
start_time_option=start_time_option,
|
|
1650
|
+
partition_field_end=model.partition_field_end,
|
|
1651
|
+
partition_field_start=model.partition_field_start,
|
|
1652
|
+
message_repository=self._message_repository,
|
|
1653
|
+
is_compare_strictly=model.is_compare_strictly,
|
|
1654
|
+
config=config,
|
|
1655
|
+
parameters=model.parameters or {},
|
|
1656
|
+
)
|
|
1657
|
+
|
|
1658
|
+
def create_declarative_stream(
|
|
1659
|
+
self, model: DeclarativeStreamModel, config: Config, **kwargs: Any
|
|
1660
|
+
) -> DeclarativeStream:
|
|
1661
|
+
# When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
|
|
1662
|
+
# components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
|
|
1663
|
+
# Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
|
|
1664
|
+
# the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one.
|
|
1665
|
+
combined_slicers = self._merge_stream_slicers(model=model, config=config)
|
|
1666
|
+
|
|
1667
|
+
primary_key = model.primary_key.__root__ if model.primary_key else None
|
|
1668
|
+
stop_condition_on_cursor = (
|
|
1669
|
+
model.incremental_sync
|
|
1670
|
+
and hasattr(model.incremental_sync, "is_data_feed")
|
|
1671
|
+
and model.incremental_sync.is_data_feed
|
|
1672
|
+
)
|
|
1673
|
+
client_side_incremental_sync = None
|
|
1674
|
+
if (
|
|
1675
|
+
model.incremental_sync
|
|
1676
|
+
and hasattr(model.incremental_sync, "is_client_side_incremental")
|
|
1677
|
+
and model.incremental_sync.is_client_side_incremental
|
|
1678
|
+
):
|
|
1679
|
+
supported_slicers = (
|
|
1680
|
+
DatetimeBasedCursor,
|
|
1681
|
+
GlobalSubstreamCursor,
|
|
1682
|
+
PerPartitionWithGlobalCursor,
|
|
1683
|
+
)
|
|
1684
|
+
if combined_slicers and not isinstance(combined_slicers, supported_slicers):
|
|
1685
|
+
raise ValueError(
|
|
1686
|
+
"Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
|
|
1687
|
+
)
|
|
1688
|
+
cursor = (
|
|
1689
|
+
combined_slicers
|
|
1690
|
+
if isinstance(
|
|
1691
|
+
combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
|
|
1692
|
+
)
|
|
1693
|
+
else self._create_component_from_model(model=model.incremental_sync, config=config)
|
|
1694
|
+
)
|
|
1695
|
+
|
|
1696
|
+
client_side_incremental_sync = {"cursor": cursor}
|
|
1697
|
+
|
|
1698
|
+
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
|
1699
|
+
cursor_model = model.incremental_sync
|
|
1700
|
+
|
|
1701
|
+
end_time_option = (
|
|
1702
|
+
self._create_component_from_model(
|
|
1703
|
+
cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
|
|
1704
|
+
)
|
|
1705
|
+
if cursor_model.end_time_option
|
|
1706
|
+
else None
|
|
1707
|
+
)
|
|
1708
|
+
start_time_option = (
|
|
1709
|
+
self._create_component_from_model(
|
|
1710
|
+
cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
|
|
1711
|
+
)
|
|
1712
|
+
if cursor_model.start_time_option
|
|
1713
|
+
else None
|
|
1714
|
+
)
|
|
1715
|
+
|
|
1716
|
+
request_options_provider = DatetimeBasedRequestOptionsProvider(
|
|
1717
|
+
start_time_option=start_time_option,
|
|
1718
|
+
end_time_option=end_time_option,
|
|
1719
|
+
partition_field_start=cursor_model.partition_field_end,
|
|
1720
|
+
partition_field_end=cursor_model.partition_field_end,
|
|
1721
|
+
config=config,
|
|
1722
|
+
parameters=model.parameters or {},
|
|
1723
|
+
)
|
|
1724
|
+
elif model.incremental_sync and isinstance(
|
|
1725
|
+
model.incremental_sync, IncrementingCountCursorModel
|
|
1726
|
+
):
|
|
1727
|
+
cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
|
|
1728
|
+
|
|
1729
|
+
start_time_option = (
|
|
1730
|
+
self._create_component_from_model(
|
|
1731
|
+
cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
|
|
1732
|
+
config,
|
|
1733
|
+
parameters=cursor_model.parameters or {},
|
|
1734
|
+
)
|
|
1735
|
+
if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
|
|
1736
|
+
else None
|
|
1737
|
+
)
|
|
1738
|
+
|
|
1739
|
+
# The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
|
|
1740
|
+
# the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
|
|
1741
|
+
partition_field_start = "start"
|
|
1742
|
+
|
|
1743
|
+
request_options_provider = DatetimeBasedRequestOptionsProvider(
|
|
1744
|
+
start_time_option=start_time_option,
|
|
1745
|
+
partition_field_start=partition_field_start,
|
|
1746
|
+
config=config,
|
|
1747
|
+
parameters=model.parameters or {},
|
|
1748
|
+
)
|
|
1749
|
+
else:
|
|
1750
|
+
request_options_provider = None
|
|
1751
|
+
|
|
1752
|
+
transformations = []
|
|
1753
|
+
if model.transformations:
|
|
1754
|
+
for transformation_model in model.transformations:
|
|
1755
|
+
transformations.append(
|
|
1756
|
+
self._create_component_from_model(model=transformation_model, config=config)
|
|
1757
|
+
)
|
|
1758
|
+
file_uploader = None
|
|
1759
|
+
if model.file_uploader:
|
|
1760
|
+
file_uploader = self._create_component_from_model(
|
|
1761
|
+
model=model.file_uploader, config=config
|
|
1762
|
+
)
|
|
1763
|
+
|
|
1764
|
+
retriever = self._create_component_from_model(
|
|
1765
|
+
model=model.retriever,
|
|
1766
|
+
config=config,
|
|
1767
|
+
name=model.name,
|
|
1768
|
+
primary_key=primary_key,
|
|
1769
|
+
stream_slicer=combined_slicers,
|
|
1770
|
+
request_options_provider=request_options_provider,
|
|
1771
|
+
stop_condition_on_cursor=stop_condition_on_cursor,
|
|
1772
|
+
client_side_incremental_sync=client_side_incremental_sync,
|
|
1773
|
+
transformations=transformations,
|
|
1774
|
+
file_uploader=file_uploader,
|
|
1775
|
+
incremental_sync=model.incremental_sync,
|
|
1776
|
+
)
|
|
1777
|
+
cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
|
|
1778
|
+
|
|
1779
|
+
if model.state_migrations:
|
|
1780
|
+
state_transformations = [
|
|
1781
|
+
self._create_component_from_model(state_migration, config, declarative_stream=model)
|
|
1782
|
+
for state_migration in model.state_migrations
|
|
1783
|
+
]
|
|
1784
|
+
else:
|
|
1785
|
+
state_transformations = []
|
|
1786
|
+
|
|
1787
|
+
if model.schema_loader:
|
|
1788
|
+
schema_loader = self._create_component_from_model(
|
|
1789
|
+
model=model.schema_loader, config=config
|
|
1790
|
+
)
|
|
1791
|
+
else:
|
|
1792
|
+
options = model.parameters or {}
|
|
1793
|
+
if "name" not in options:
|
|
1794
|
+
options["name"] = model.name
|
|
1795
|
+
schema_loader = DefaultSchemaLoader(config=config, parameters=options)
|
|
1796
|
+
|
|
1797
|
+
return DeclarativeStream(
|
|
1798
|
+
name=model.name or "",
|
|
1799
|
+
primary_key=primary_key,
|
|
1800
|
+
retriever=retriever,
|
|
1801
|
+
schema_loader=schema_loader,
|
|
1802
|
+
stream_cursor_field=cursor_field or "",
|
|
1803
|
+
state_migrations=state_transformations,
|
|
1804
|
+
config=config,
|
|
1805
|
+
parameters=model.parameters or {},
|
|
1806
|
+
)
|
|
1807
|
+
|
|
1808
|
+
def _build_stream_slicer_from_partition_router(
|
|
1809
|
+
self,
|
|
1810
|
+
model: Union[
|
|
1811
|
+
AsyncRetrieverModel,
|
|
1812
|
+
CustomRetrieverModel,
|
|
1813
|
+
SimpleRetrieverModel,
|
|
1814
|
+
],
|
|
1815
|
+
config: Config,
|
|
1816
|
+
stream_name: Optional[str] = None,
|
|
1817
|
+
) -> Optional[PartitionRouter]:
|
|
1818
|
+
if (
|
|
1819
|
+
hasattr(model, "partition_router")
|
|
1820
|
+
and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
|
|
1821
|
+
and model.partition_router
|
|
1822
|
+
):
|
|
1823
|
+
stream_slicer_model = model.partition_router
|
|
1824
|
+
if isinstance(stream_slicer_model, list):
|
|
1825
|
+
return CartesianProductStreamSlicer(
|
|
1826
|
+
[
|
|
1827
|
+
self._create_component_from_model(
|
|
1828
|
+
model=slicer, config=config, stream_name=stream_name or ""
|
|
1829
|
+
)
|
|
1830
|
+
for slicer in stream_slicer_model
|
|
1831
|
+
],
|
|
1832
|
+
parameters={},
|
|
1833
|
+
)
|
|
1834
|
+
else:
|
|
1835
|
+
return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
|
|
1836
|
+
model=stream_slicer_model, config=config, stream_name=stream_name or ""
|
|
1837
|
+
)
|
|
1838
|
+
return None
|
|
1839
|
+
|
|
1840
|
+
def _build_incremental_cursor(
|
|
1841
|
+
self,
|
|
1842
|
+
model: DeclarativeStreamModel,
|
|
1843
|
+
stream_slicer: Optional[PartitionRouter],
|
|
1844
|
+
config: Config,
|
|
1845
|
+
) -> Optional[StreamSlicer]:
|
|
1846
|
+
if model.incremental_sync and stream_slicer:
|
|
1847
|
+
if model.retriever.type == "AsyncRetriever":
|
|
1848
|
+
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
|
1849
|
+
state_manager=self._connector_state_manager,
|
|
1850
|
+
model_type=DatetimeBasedCursorModel,
|
|
1851
|
+
component_definition=model.incremental_sync.__dict__,
|
|
1852
|
+
stream_name=model.name or "",
|
|
1853
|
+
stream_namespace=None,
|
|
1854
|
+
config=config or {},
|
|
1855
|
+
stream_state={},
|
|
1856
|
+
partition_router=stream_slicer,
|
|
1857
|
+
)
|
|
1858
|
+
|
|
1859
|
+
incremental_sync_model = model.incremental_sync
|
|
1860
|
+
cursor_component = self._create_component_from_model(
|
|
1861
|
+
model=incremental_sync_model, config=config
|
|
1862
|
+
)
|
|
1863
|
+
is_global_cursor = (
|
|
1864
|
+
hasattr(incremental_sync_model, "global_substream_cursor")
|
|
1865
|
+
and incremental_sync_model.global_substream_cursor
|
|
1866
|
+
)
|
|
1867
|
+
|
|
1868
|
+
if is_global_cursor:
|
|
1869
|
+
return GlobalSubstreamCursor(
|
|
1870
|
+
stream_cursor=cursor_component, partition_router=stream_slicer
|
|
1871
|
+
)
|
|
1872
|
+
return PerPartitionWithGlobalCursor(
|
|
1873
|
+
cursor_factory=CursorFactory(
|
|
1874
|
+
lambda: self._create_component_from_model(
|
|
1875
|
+
model=incremental_sync_model, config=config
|
|
1876
|
+
),
|
|
1877
|
+
),
|
|
1878
|
+
partition_router=stream_slicer,
|
|
1879
|
+
stream_cursor=cursor_component,
|
|
1880
|
+
)
|
|
1881
|
+
elif model.incremental_sync:
|
|
1882
|
+
if model.retriever.type == "AsyncRetriever":
|
|
1883
|
+
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
|
1884
|
+
model_type=DatetimeBasedCursorModel,
|
|
1885
|
+
component_definition=model.incremental_sync.__dict__,
|
|
1886
|
+
stream_name=model.name or "",
|
|
1887
|
+
stream_namespace=None,
|
|
1888
|
+
config=config or {},
|
|
1889
|
+
stream_state_migrations=model.state_migrations,
|
|
1890
|
+
)
|
|
1891
|
+
return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync
|
|
1892
|
+
return None
|
|
1893
|
+
|
|
1894
|
+
def _build_resumable_cursor(
|
|
1895
|
+
self,
|
|
1896
|
+
model: Union[
|
|
1897
|
+
AsyncRetrieverModel,
|
|
1898
|
+
CustomRetrieverModel,
|
|
1899
|
+
SimpleRetrieverModel,
|
|
1900
|
+
],
|
|
1901
|
+
stream_slicer: Optional[PartitionRouter],
|
|
1902
|
+
) -> Optional[StreamSlicer]:
|
|
1903
|
+
if hasattr(model, "paginator") and model.paginator and not stream_slicer:
|
|
1904
|
+
# For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
|
|
1905
|
+
return ResumableFullRefreshCursor(parameters={})
|
|
1906
|
+
elif stream_slicer:
|
|
1907
|
+
# For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
|
|
1908
|
+
return PerPartitionCursor(
|
|
1909
|
+
cursor_factory=CursorFactory(
|
|
1910
|
+
create_function=partial(ChildPartitionResumableFullRefreshCursor, {})
|
|
1911
|
+
),
|
|
1912
|
+
partition_router=stream_slicer,
|
|
1913
|
+
)
|
|
1914
|
+
return None
|
|
1915
|
+
|
|
1916
|
+
def _merge_stream_slicers(
|
|
1917
|
+
self, model: DeclarativeStreamModel, config: Config
|
|
1918
|
+
) -> Optional[StreamSlicer]:
|
|
1919
|
+
retriever_model = model.retriever
|
|
1920
|
+
|
|
1921
|
+
stream_slicer = self._build_stream_slicer_from_partition_router(
|
|
1922
|
+
retriever_model, config, stream_name=model.name
|
|
1923
|
+
)
|
|
1924
|
+
|
|
1925
|
+
if retriever_model.type == "AsyncRetriever":
|
|
1926
|
+
is_not_datetime_cursor = (
|
|
1927
|
+
model.incremental_sync.type != "DatetimeBasedCursor"
|
|
1928
|
+
if model.incremental_sync
|
|
1929
|
+
else None
|
|
1930
|
+
)
|
|
1931
|
+
is_partition_router = (
|
|
1932
|
+
bool(retriever_model.partition_router) if model.incremental_sync else None
|
|
1933
|
+
)
|
|
1934
|
+
|
|
1935
|
+
if is_not_datetime_cursor:
|
|
1936
|
+
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
|
|
1937
|
+
# support or unordered slices (for example, when we trigger reports for January and February, the report
|
|
1938
|
+
# in February can be completed first). Once we have support for custom concurrent cursor or have a new
|
|
1939
|
+
# implementation available in the CDK, we can enable more cursors here.
|
|
1940
|
+
raise ValueError(
|
|
1941
|
+
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
|
|
1942
|
+
)
|
|
1943
|
+
|
|
1944
|
+
if is_partition_router and not stream_slicer:
|
|
1945
|
+
# Note that this development is also done in parallel to the per partition development which once merged
|
|
1946
|
+
# we could support here by calling create_concurrent_cursor_from_perpartition_cursor
|
|
1947
|
+
raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
|
|
1948
|
+
|
|
1949
|
+
if model.incremental_sync:
|
|
1950
|
+
return self._build_incremental_cursor(model, stream_slicer, config)
|
|
1951
|
+
|
|
1952
|
+
return (
|
|
1953
|
+
stream_slicer
|
|
1954
|
+
if self._disable_resumable_full_refresh
|
|
1955
|
+
else self._build_resumable_cursor(retriever_model, stream_slicer)
|
|
1956
|
+
)
|
|
1957
|
+
|
|
1958
|
+
def create_default_error_handler(
|
|
1959
|
+
self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
|
|
1960
|
+
) -> DefaultErrorHandler:
|
|
1961
|
+
backoff_strategies = []
|
|
1962
|
+
if model.backoff_strategies:
|
|
1963
|
+
for backoff_strategy_model in model.backoff_strategies:
|
|
1964
|
+
backoff_strategies.append(
|
|
1965
|
+
self._create_component_from_model(model=backoff_strategy_model, config=config)
|
|
1966
|
+
)
|
|
1967
|
+
|
|
1968
|
+
response_filters = []
|
|
1969
|
+
if model.response_filters:
|
|
1970
|
+
for response_filter_model in model.response_filters:
|
|
1971
|
+
response_filters.append(
|
|
1972
|
+
self._create_component_from_model(model=response_filter_model, config=config)
|
|
1973
|
+
)
|
|
1974
|
+
response_filters.append(
|
|
1975
|
+
HttpResponseFilter(config=config, parameters=model.parameters or {})
|
|
1976
|
+
)
|
|
1977
|
+
|
|
1978
|
+
return DefaultErrorHandler(
|
|
1979
|
+
backoff_strategies=backoff_strategies,
|
|
1980
|
+
max_retries=model.max_retries,
|
|
1981
|
+
response_filters=response_filters,
|
|
1982
|
+
config=config,
|
|
1983
|
+
parameters=model.parameters or {},
|
|
1984
|
+
)
|
|
1985
|
+
|
|
1986
|
+
def create_default_paginator(
|
|
1987
|
+
self,
|
|
1988
|
+
model: DefaultPaginatorModel,
|
|
1989
|
+
config: Config,
|
|
1990
|
+
*,
|
|
1991
|
+
url_base: str,
|
|
1992
|
+
decoder: Optional[Decoder] = None,
|
|
1993
|
+
cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
|
|
1994
|
+
) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
|
|
1995
|
+
if decoder:
|
|
1996
|
+
if self._is_supported_decoder_for_pagination(decoder):
|
|
1997
|
+
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
|
1998
|
+
else:
|
|
1999
|
+
raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder)))
|
|
2000
|
+
else:
|
|
2001
|
+
decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
|
|
2002
|
+
page_size_option = (
|
|
2003
|
+
self._create_component_from_model(model=model.page_size_option, config=config)
|
|
2004
|
+
if model.page_size_option
|
|
2005
|
+
else None
|
|
2006
|
+
)
|
|
2007
|
+
page_token_option = (
|
|
2008
|
+
self._create_component_from_model(model=model.page_token_option, config=config)
|
|
2009
|
+
if model.page_token_option
|
|
2010
|
+
else None
|
|
2011
|
+
)
|
|
2012
|
+
pagination_strategy = self._create_component_from_model(
|
|
2013
|
+
model=model.pagination_strategy, config=config, decoder=decoder_to_use
|
|
2014
|
+
)
|
|
2015
|
+
if cursor_used_for_stop_condition:
|
|
2016
|
+
pagination_strategy = StopConditionPaginationStrategyDecorator(
|
|
2017
|
+
pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition)
|
|
2018
|
+
)
|
|
2019
|
+
paginator = DefaultPaginator(
|
|
2020
|
+
decoder=decoder_to_use,
|
|
2021
|
+
page_size_option=page_size_option,
|
|
2022
|
+
page_token_option=page_token_option,
|
|
2023
|
+
pagination_strategy=pagination_strategy,
|
|
2024
|
+
url_base=url_base,
|
|
2025
|
+
config=config,
|
|
2026
|
+
parameters=model.parameters or {},
|
|
2027
|
+
)
|
|
2028
|
+
if self._limit_pages_fetched_per_slice:
|
|
2029
|
+
return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice)
|
|
2030
|
+
return paginator
|
|
2031
|
+
|
|
2032
|
+
def create_dpath_extractor(
|
|
2033
|
+
self,
|
|
2034
|
+
model: DpathExtractorModel,
|
|
2035
|
+
config: Config,
|
|
2036
|
+
decoder: Optional[Decoder] = None,
|
|
2037
|
+
**kwargs: Any,
|
|
2038
|
+
) -> DpathExtractor:
|
|
2039
|
+
if decoder:
|
|
2040
|
+
decoder_to_use = decoder
|
|
2041
|
+
else:
|
|
2042
|
+
decoder_to_use = JsonDecoder(parameters={})
|
|
2043
|
+
model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
|
|
2044
|
+
return DpathExtractor(
|
|
2045
|
+
decoder=decoder_to_use,
|
|
2046
|
+
field_path=model_field_path,
|
|
2047
|
+
config=config,
|
|
2048
|
+
parameters=model.parameters or {},
|
|
2049
|
+
)
|
|
2050
|
+
|
|
2051
|
+
def create_response_to_file_extractor(
|
|
2052
|
+
self,
|
|
2053
|
+
model: ResponseToFileExtractorModel,
|
|
2054
|
+
**kwargs: Any,
|
|
2055
|
+
) -> ResponseToFileExtractor:
|
|
2056
|
+
return ResponseToFileExtractor(parameters=model.parameters or {})
|
|
2057
|
+
|
|
2058
|
+
@staticmethod
|
|
2059
|
+
def create_exponential_backoff_strategy(
|
|
2060
|
+
model: ExponentialBackoffStrategyModel, config: Config
|
|
2061
|
+
) -> ExponentialBackoffStrategy:
|
|
2062
|
+
return ExponentialBackoffStrategy(
|
|
2063
|
+
factor=model.factor or 5, parameters=model.parameters or {}, config=config
|
|
2064
|
+
)
|
|
2065
|
+
|
|
2066
|
+
def create_http_requester(
|
|
2067
|
+
self,
|
|
2068
|
+
model: HttpRequesterModel,
|
|
2069
|
+
config: Config,
|
|
2070
|
+
decoder: Decoder = JsonDecoder(parameters={}),
|
|
2071
|
+
*,
|
|
2072
|
+
name: str,
|
|
2073
|
+
) -> HttpRequester:
|
|
2074
|
+
authenticator = (
|
|
2075
|
+
self._create_component_from_model(
|
|
2076
|
+
model=model.authenticator,
|
|
2077
|
+
config=config,
|
|
2078
|
+
url_base=model.url_base,
|
|
2079
|
+
name=name,
|
|
2080
|
+
decoder=decoder,
|
|
2081
|
+
)
|
|
2082
|
+
if model.authenticator
|
|
2083
|
+
else None
|
|
2084
|
+
)
|
|
2085
|
+
error_handler = (
|
|
2086
|
+
self._create_component_from_model(model=model.error_handler, config=config)
|
|
2087
|
+
if model.error_handler
|
|
2088
|
+
else DefaultErrorHandler(
|
|
2089
|
+
backoff_strategies=[],
|
|
2090
|
+
response_filters=[],
|
|
2091
|
+
config=config,
|
|
2092
|
+
parameters=model.parameters or {},
|
|
2093
|
+
)
|
|
2094
|
+
)
|
|
2095
|
+
|
|
2096
|
+
api_budget = self._api_budget
|
|
2097
|
+
|
|
2098
|
+
request_options_provider = InterpolatedRequestOptionsProvider(
|
|
2099
|
+
request_body_data=model.request_body_data,
|
|
2100
|
+
request_body_json=model.request_body_json,
|
|
2101
|
+
request_headers=model.request_headers,
|
|
2102
|
+
request_parameters=model.request_parameters,
|
|
2103
|
+
config=config,
|
|
2104
|
+
parameters=model.parameters or {},
|
|
2105
|
+
)
|
|
2106
|
+
|
|
2107
|
+
assert model.use_cache is not None # for mypy
|
|
2108
|
+
assert model.http_method is not None # for mypy
|
|
2109
|
+
|
|
2110
|
+
use_cache = model.use_cache and not self._disable_cache
|
|
2111
|
+
|
|
2112
|
+
return HttpRequester(
|
|
2113
|
+
name=name,
|
|
2114
|
+
url_base=model.url_base,
|
|
2115
|
+
path=model.path,
|
|
2116
|
+
authenticator=authenticator,
|
|
2117
|
+
error_handler=error_handler,
|
|
2118
|
+
api_budget=api_budget,
|
|
2119
|
+
http_method=HttpMethod[model.http_method.value],
|
|
2120
|
+
request_options_provider=request_options_provider,
|
|
2121
|
+
config=config,
|
|
2122
|
+
disable_retries=self._disable_retries,
|
|
2123
|
+
parameters=model.parameters or {},
|
|
2124
|
+
message_repository=self._message_repository,
|
|
2125
|
+
use_cache=use_cache,
|
|
2126
|
+
decoder=decoder,
|
|
2127
|
+
stream_response=decoder.is_stream_response() if decoder else False,
|
|
2128
|
+
)
|
|
2129
|
+
|
|
2130
|
+
@staticmethod
|
|
2131
|
+
def create_http_response_filter(
|
|
2132
|
+
model: HttpResponseFilterModel, config: Config, **kwargs: Any
|
|
2133
|
+
) -> HttpResponseFilter:
|
|
2134
|
+
if model.action:
|
|
2135
|
+
action = ResponseAction(model.action.value)
|
|
2136
|
+
else:
|
|
2137
|
+
action = None
|
|
2138
|
+
|
|
2139
|
+
failure_type = FailureType(model.failure_type.value) if model.failure_type else None
|
|
2140
|
+
|
|
2141
|
+
http_codes = (
|
|
2142
|
+
set(model.http_codes) if model.http_codes else set()
|
|
2143
|
+
) # JSON schema notation has no set data type. The schema enforces an array of unique elements
|
|
2144
|
+
|
|
2145
|
+
return HttpResponseFilter(
|
|
2146
|
+
action=action,
|
|
2147
|
+
failure_type=failure_type,
|
|
2148
|
+
error_message=model.error_message or "",
|
|
2149
|
+
error_message_contains=model.error_message_contains or "",
|
|
2150
|
+
http_codes=http_codes,
|
|
2151
|
+
predicate=model.predicate or "",
|
|
2152
|
+
config=config,
|
|
2153
|
+
parameters=model.parameters or {},
|
|
2154
|
+
)
|
|
2155
|
+
|
|
2156
|
+
@staticmethod
|
|
2157
|
+
def create_inline_schema_loader(
|
|
2158
|
+
model: InlineSchemaLoaderModel, config: Config, **kwargs: Any
|
|
2159
|
+
) -> InlineSchemaLoader:
|
|
2160
|
+
return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
|
|
2161
|
+
|
|
2162
|
+
def create_complex_field_type(
|
|
2163
|
+
self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
|
|
2164
|
+
) -> ComplexFieldType:
|
|
2165
|
+
items = (
|
|
2166
|
+
self._create_component_from_model(model=model.items, config=config)
|
|
2167
|
+
if isinstance(model.items, ComplexFieldTypeModel)
|
|
2168
|
+
else model.items
|
|
2169
|
+
)
|
|
2170
|
+
|
|
2171
|
+
return ComplexFieldType(field_type=model.field_type, items=items)
|
|
2172
|
+
|
|
2173
|
+
def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
|
|
2174
|
+
target_type = (
|
|
2175
|
+
self._create_component_from_model(model=model.target_type, config=config)
|
|
2176
|
+
if isinstance(model.target_type, ComplexFieldTypeModel)
|
|
2177
|
+
else model.target_type
|
|
2178
|
+
)
|
|
2179
|
+
|
|
2180
|
+
return TypesMap(
|
|
2181
|
+
target_type=target_type,
|
|
2182
|
+
current_type=model.current_type,
|
|
2183
|
+
condition=model.condition if model.condition is not None else "True",
|
|
2184
|
+
)
|
|
2185
|
+
|
|
2186
|
+
def create_schema_type_identifier(
|
|
2187
|
+
self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
|
|
2188
|
+
) -> SchemaTypeIdentifier:
|
|
2189
|
+
types_mapping = []
|
|
2190
|
+
if model.types_mapping:
|
|
2191
|
+
types_mapping.extend(
|
|
2192
|
+
[
|
|
2193
|
+
self._create_component_from_model(types_map, config=config)
|
|
2194
|
+
for types_map in model.types_mapping
|
|
2195
|
+
]
|
|
2196
|
+
)
|
|
2197
|
+
model_schema_pointer: List[Union[InterpolatedString, str]] = (
|
|
2198
|
+
[x for x in model.schema_pointer] if model.schema_pointer else []
|
|
2199
|
+
)
|
|
2200
|
+
model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
|
|
2201
|
+
model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
|
|
2202
|
+
[x for x in model.type_pointer] if model.type_pointer else None
|
|
2203
|
+
)
|
|
2204
|
+
|
|
2205
|
+
return SchemaTypeIdentifier(
|
|
2206
|
+
schema_pointer=model_schema_pointer,
|
|
2207
|
+
key_pointer=model_key_pointer,
|
|
2208
|
+
type_pointer=model_type_pointer,
|
|
2209
|
+
types_mapping=types_mapping,
|
|
2210
|
+
parameters=model.parameters or {},
|
|
2211
|
+
)
|
|
2212
|
+
|
|
2213
|
+
def create_dynamic_schema_loader(
|
|
2214
|
+
self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
|
|
2215
|
+
) -> DynamicSchemaLoader:
|
|
2216
|
+
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
|
2217
|
+
combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
|
|
2218
|
+
|
|
2219
|
+
schema_transformations = []
|
|
2220
|
+
if model.schema_transformations:
|
|
2221
|
+
for transformation_model in model.schema_transformations:
|
|
2222
|
+
schema_transformations.append(
|
|
2223
|
+
self._create_component_from_model(model=transformation_model, config=config)
|
|
2224
|
+
)
|
|
2225
|
+
|
|
2226
|
+
retriever = self._create_component_from_model(
|
|
2227
|
+
model=model.retriever,
|
|
2228
|
+
config=config,
|
|
2229
|
+
name="",
|
|
2230
|
+
primary_key=None,
|
|
2231
|
+
stream_slicer=combined_slicers,
|
|
2232
|
+
transformations=[],
|
|
2233
|
+
)
|
|
2234
|
+
schema_type_identifier = self._create_component_from_model(
|
|
2235
|
+
model.schema_type_identifier, config=config, parameters=model.parameters or {}
|
|
2236
|
+
)
|
|
2237
|
+
return DynamicSchemaLoader(
|
|
2238
|
+
retriever=retriever,
|
|
2239
|
+
config=config,
|
|
2240
|
+
schema_transformations=schema_transformations,
|
|
2241
|
+
schema_type_identifier=schema_type_identifier,
|
|
2242
|
+
parameters=model.parameters or {},
|
|
2243
|
+
)
|
|
2244
|
+
|
|
2245
|
+
@staticmethod
|
|
2246
|
+
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
|
|
2247
|
+
return JsonDecoder(parameters={})
|
|
2248
|
+
|
|
2249
|
+
def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
|
|
2250
|
+
return CompositeRawDecoder(
|
|
2251
|
+
parser=ModelToComponentFactory._get_parser(model, config),
|
|
2252
|
+
stream_response=False if self._emit_connector_builder_messages else True,
|
|
2253
|
+
)
|
|
2254
|
+
|
|
2255
|
+
def create_jsonl_decoder(
|
|
2256
|
+
self, model: JsonlDecoderModel, config: Config, **kwargs: Any
|
|
2257
|
+
) -> Decoder:
|
|
2258
|
+
return CompositeRawDecoder(
|
|
2259
|
+
parser=ModelToComponentFactory._get_parser(model, config),
|
|
2260
|
+
stream_response=False if self._emit_connector_builder_messages else True,
|
|
2261
|
+
)
|
|
2262
|
+
|
|
2263
|
+
def create_gzip_decoder(
|
|
2264
|
+
self, model: GzipDecoderModel, config: Config, **kwargs: Any
|
|
2265
|
+
) -> Decoder:
|
|
2266
|
+
_compressed_response_types = {
|
|
2267
|
+
"gzip",
|
|
2268
|
+
"x-gzip",
|
|
2269
|
+
"gzip, deflate",
|
|
2270
|
+
"x-gzip, deflate",
|
|
2271
|
+
"application/zip",
|
|
2272
|
+
"application/gzip",
|
|
2273
|
+
"application/x-gzip",
|
|
2274
|
+
"application/x-zip-compressed",
|
|
2275
|
+
}
|
|
2276
|
+
|
|
2277
|
+
gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser
|
|
2278
|
+
|
|
2279
|
+
if self._emit_connector_builder_messages:
|
|
2280
|
+
# This is very surprising but if the response is not streamed,
|
|
2281
|
+
# CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw,
|
|
2282
|
+
# which uses urllib3 directly and does not uncompress the data.
|
|
2283
|
+
return CompositeRawDecoder(gzip_parser.inner_parser, False)
|
|
2284
|
+
|
|
2285
|
+
return CompositeRawDecoder.by_headers(
|
|
2286
|
+
[({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
|
|
2287
|
+
stream_response=True,
|
|
2288
|
+
fallback_parser=gzip_parser.inner_parser,
|
|
2289
|
+
)
|
|
2290
|
+
|
|
2291
|
+
@staticmethod
|
|
2292
|
+
def create_incrementing_count_cursor(
|
|
2293
|
+
model: IncrementingCountCursorModel, config: Config, **kwargs: Any
|
|
2294
|
+
) -> DatetimeBasedCursor:
|
|
2295
|
+
# This should not actually get used anywhere at runtime, but needed to add this to pass checks since
|
|
2296
|
+
# we still parse models into components. The issue is that there's no runtime implementation of a
|
|
2297
|
+
# IncrementingCountCursor.
|
|
2298
|
+
# A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
|
|
2299
|
+
return DatetimeBasedCursor(
|
|
2300
|
+
cursor_field=model.cursor_field,
|
|
2301
|
+
datetime_format="%Y-%m-%d",
|
|
2302
|
+
start_datetime="2024-12-12",
|
|
2303
|
+
config=config,
|
|
2304
|
+
parameters={},
|
|
2305
|
+
)
|
|
2306
|
+
|
|
2307
|
+
@staticmethod
|
|
2308
|
+
def create_iterable_decoder(
|
|
2309
|
+
model: IterableDecoderModel, config: Config, **kwargs: Any
|
|
2310
|
+
) -> IterableDecoder:
|
|
2311
|
+
return IterableDecoder(parameters={})
|
|
2312
|
+
|
|
2313
|
+
@staticmethod
|
|
2314
|
+
def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
|
|
2315
|
+
return XmlDecoder(parameters={})
|
|
2316
|
+
|
|
2317
|
+
def create_zipfile_decoder(
|
|
2318
|
+
self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
|
|
2319
|
+
) -> ZipfileDecoder:
|
|
2320
|
+
return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config))
|
|
2321
|
+
|
|
2322
|
+
@staticmethod
|
|
2323
|
+
def _get_parser(model: BaseModel, config: Config) -> Parser:
|
|
2324
|
+
if isinstance(model, JsonDecoderModel):
|
|
2325
|
+
# Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases
|
|
2326
|
+
return JsonParser()
|
|
2327
|
+
elif isinstance(model, JsonlDecoderModel):
|
|
2328
|
+
return JsonLineParser()
|
|
2329
|
+
elif isinstance(model, CsvDecoderModel):
|
|
2330
|
+
return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
|
|
2331
|
+
elif isinstance(model, GzipDecoderModel):
|
|
2332
|
+
return GzipParser(
|
|
2333
|
+
inner_parser=ModelToComponentFactory._get_parser(model.decoder, config)
|
|
2334
|
+
)
|
|
2335
|
+
elif isinstance(
|
|
2336
|
+
model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel)
|
|
2337
|
+
):
|
|
2338
|
+
raise ValueError(f"Decoder type {model} does not have parser associated to it")
|
|
2339
|
+
|
|
2340
|
+
raise ValueError(f"Unknown decoder type {model}")
|
|
2341
|
+
|
|
2342
|
+
@staticmethod
|
|
2343
|
+
def create_json_file_schema_loader(
|
|
2344
|
+
model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
|
|
2345
|
+
) -> JsonFileSchemaLoader:
|
|
2346
|
+
return JsonFileSchemaLoader(
|
|
2347
|
+
file_path=model.file_path or "", config=config, parameters=model.parameters or {}
|
|
2348
|
+
)
|
|
2349
|
+
|
|
2350
|
+
@staticmethod
|
|
2351
|
+
def create_jwt_authenticator(
|
|
2352
|
+
model: JwtAuthenticatorModel, config: Config, **kwargs: Any
|
|
2353
|
+
) -> JwtAuthenticator:
|
|
2354
|
+
jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None)
|
|
2355
|
+
jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None)
|
|
2356
|
+
return JwtAuthenticator(
|
|
2357
|
+
config=config,
|
|
2358
|
+
parameters=model.parameters or {},
|
|
2359
|
+
algorithm=JwtAlgorithm(model.algorithm.value),
|
|
2360
|
+
secret_key=model.secret_key,
|
|
2361
|
+
base64_encode_secret_key=model.base64_encode_secret_key,
|
|
2362
|
+
token_duration=model.token_duration,
|
|
2363
|
+
header_prefix=model.header_prefix,
|
|
2364
|
+
kid=jwt_headers.kid,
|
|
2365
|
+
typ=jwt_headers.typ,
|
|
2366
|
+
cty=jwt_headers.cty,
|
|
2367
|
+
iss=jwt_payload.iss,
|
|
2368
|
+
sub=jwt_payload.sub,
|
|
2369
|
+
aud=jwt_payload.aud,
|
|
2370
|
+
additional_jwt_headers=model.additional_jwt_headers,
|
|
2371
|
+
additional_jwt_payload=model.additional_jwt_payload,
|
|
2372
|
+
)
|
|
2373
|
+
|
|
2374
|
+
def create_list_partition_router(
|
|
2375
|
+
self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
|
|
2376
|
+
) -> ListPartitionRouter:
|
|
2377
|
+
request_option = (
|
|
2378
|
+
self._create_component_from_model(model.request_option, config)
|
|
2379
|
+
if model.request_option
|
|
2380
|
+
else None
|
|
2381
|
+
)
|
|
2382
|
+
return ListPartitionRouter(
|
|
2383
|
+
cursor_field=model.cursor_field,
|
|
2384
|
+
request_option=request_option,
|
|
2385
|
+
values=model.values,
|
|
2386
|
+
config=config,
|
|
2387
|
+
parameters=model.parameters or {},
|
|
2388
|
+
)
|
|
2389
|
+
|
|
2390
|
+
@staticmethod
|
|
2391
|
+
def create_min_max_datetime(
|
|
2392
|
+
model: MinMaxDatetimeModel, config: Config, **kwargs: Any
|
|
2393
|
+
) -> MinMaxDatetime:
|
|
2394
|
+
return MinMaxDatetime(
|
|
2395
|
+
datetime=model.datetime,
|
|
2396
|
+
datetime_format=model.datetime_format or "",
|
|
2397
|
+
max_datetime=model.max_datetime or "",
|
|
2398
|
+
min_datetime=model.min_datetime or "",
|
|
2399
|
+
parameters=model.parameters or {},
|
|
2400
|
+
)
|
|
2401
|
+
|
|
2402
|
+
@staticmethod
|
|
2403
|
+
def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth:
|
|
2404
|
+
return NoAuth(parameters=model.parameters or {})
|
|
2405
|
+
|
|
2406
|
+
@staticmethod
|
|
2407
|
+
def create_no_pagination(
|
|
2408
|
+
model: NoPaginationModel, config: Config, **kwargs: Any
|
|
2409
|
+
) -> NoPagination:
|
|
2410
|
+
return NoPagination(parameters={})
|
|
2411
|
+
|
|
2412
|
+
def create_oauth_authenticator(
|
|
2413
|
+
self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
|
|
2414
|
+
) -> DeclarativeOauth2Authenticator:
|
|
2415
|
+
profile_assertion = (
|
|
2416
|
+
self._create_component_from_model(model.profile_assertion, config=config)
|
|
2417
|
+
if model.profile_assertion
|
|
2418
|
+
else None
|
|
2419
|
+
)
|
|
2420
|
+
|
|
2421
|
+
if model.refresh_token_updater:
|
|
2422
|
+
# ignore type error because fixing it would have a lot of dependencies, revisit later
|
|
2423
|
+
return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
|
|
2424
|
+
config,
|
|
2425
|
+
InterpolatedString.create(
|
|
2426
|
+
model.token_refresh_endpoint, # type: ignore
|
|
2427
|
+
parameters=model.parameters or {},
|
|
2428
|
+
).eval(config),
|
|
2429
|
+
access_token_name=InterpolatedString.create(
|
|
2430
|
+
model.access_token_name or "access_token", parameters=model.parameters or {}
|
|
2431
|
+
).eval(config),
|
|
2432
|
+
refresh_token_name=model.refresh_token_updater.refresh_token_name,
|
|
2433
|
+
expires_in_name=InterpolatedString.create(
|
|
2434
|
+
model.expires_in_name or "expires_in", parameters=model.parameters or {}
|
|
2435
|
+
).eval(config),
|
|
2436
|
+
client_id_name=InterpolatedString.create(
|
|
2437
|
+
model.client_id_name or "client_id", parameters=model.parameters or {}
|
|
2438
|
+
).eval(config),
|
|
2439
|
+
client_id=InterpolatedString.create(
|
|
2440
|
+
model.client_id, parameters=model.parameters or {}
|
|
2441
|
+
).eval(config)
|
|
2442
|
+
if model.client_id
|
|
2443
|
+
else model.client_id,
|
|
2444
|
+
client_secret_name=InterpolatedString.create(
|
|
2445
|
+
model.client_secret_name or "client_secret", parameters=model.parameters or {}
|
|
2446
|
+
).eval(config),
|
|
2447
|
+
client_secret=InterpolatedString.create(
|
|
2448
|
+
model.client_secret, parameters=model.parameters or {}
|
|
2449
|
+
).eval(config)
|
|
2450
|
+
if model.client_secret
|
|
2451
|
+
else model.client_secret,
|
|
2452
|
+
access_token_config_path=model.refresh_token_updater.access_token_config_path,
|
|
2453
|
+
refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
|
|
2454
|
+
token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
|
|
2455
|
+
grant_type_name=InterpolatedString.create(
|
|
2456
|
+
model.grant_type_name or "grant_type", parameters=model.parameters or {}
|
|
2457
|
+
).eval(config),
|
|
2458
|
+
grant_type=InterpolatedString.create(
|
|
2459
|
+
model.grant_type or "refresh_token", parameters=model.parameters or {}
|
|
2460
|
+
).eval(config),
|
|
2461
|
+
refresh_request_body=InterpolatedMapping(
|
|
2462
|
+
model.refresh_request_body or {}, parameters=model.parameters or {}
|
|
2463
|
+
).eval(config),
|
|
2464
|
+
refresh_request_headers=InterpolatedMapping(
|
|
2465
|
+
model.refresh_request_headers or {}, parameters=model.parameters or {}
|
|
2466
|
+
).eval(config),
|
|
2467
|
+
scopes=model.scopes,
|
|
2468
|
+
token_expiry_date_format=model.token_expiry_date_format,
|
|
2469
|
+
message_repository=self._message_repository,
|
|
2470
|
+
refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes,
|
|
2471
|
+
refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key,
|
|
2472
|
+
refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values,
|
|
2473
|
+
)
|
|
2474
|
+
# ignore type error because fixing it would have a lot of dependencies, revisit later
|
|
2475
|
+
return DeclarativeOauth2Authenticator( # type: ignore
|
|
2476
|
+
access_token_name=model.access_token_name or "access_token",
|
|
2477
|
+
access_token_value=model.access_token_value,
|
|
2478
|
+
client_id_name=model.client_id_name or "client_id",
|
|
2479
|
+
client_id=model.client_id,
|
|
2480
|
+
client_secret_name=model.client_secret_name or "client_secret",
|
|
2481
|
+
client_secret=model.client_secret,
|
|
2482
|
+
expires_in_name=model.expires_in_name or "expires_in",
|
|
2483
|
+
grant_type_name=model.grant_type_name or "grant_type",
|
|
2484
|
+
grant_type=model.grant_type or "refresh_token",
|
|
2485
|
+
refresh_request_body=model.refresh_request_body,
|
|
2486
|
+
refresh_request_headers=model.refresh_request_headers,
|
|
2487
|
+
refresh_token_name=model.refresh_token_name or "refresh_token",
|
|
2488
|
+
refresh_token=model.refresh_token,
|
|
2489
|
+
scopes=model.scopes,
|
|
2490
|
+
token_expiry_date=model.token_expiry_date,
|
|
2491
|
+
token_expiry_date_format=model.token_expiry_date_format,
|
|
2492
|
+
token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
|
|
2493
|
+
token_refresh_endpoint=model.token_refresh_endpoint,
|
|
2494
|
+
config=config,
|
|
2495
|
+
parameters=model.parameters or {},
|
|
2496
|
+
message_repository=self._message_repository,
|
|
2497
|
+
profile_assertion=profile_assertion,
|
|
2498
|
+
use_profile_assertion=model.use_profile_assertion,
|
|
2499
|
+
)
|
|
2500
|
+
|
|
2501
|
+
def create_offset_increment(
|
|
2502
|
+
self, model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
|
|
2503
|
+
) -> OffsetIncrement:
|
|
2504
|
+
if isinstance(decoder, PaginationDecoderDecorator):
|
|
2505
|
+
inner_decoder = decoder.decoder
|
|
2506
|
+
else:
|
|
2507
|
+
inner_decoder = decoder
|
|
2508
|
+
decoder = PaginationDecoderDecorator(decoder=decoder)
|
|
2509
|
+
|
|
2510
|
+
if self._is_supported_decoder_for_pagination(inner_decoder):
|
|
2511
|
+
decoder_to_use = decoder
|
|
2512
|
+
else:
|
|
2513
|
+
raise ValueError(
|
|
2514
|
+
self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
|
|
2515
|
+
)
|
|
2516
|
+
|
|
2517
|
+
return OffsetIncrement(
|
|
2518
|
+
page_size=model.page_size,
|
|
2519
|
+
config=config,
|
|
2520
|
+
decoder=decoder_to_use,
|
|
2521
|
+
inject_on_first_request=model.inject_on_first_request or False,
|
|
2522
|
+
parameters=model.parameters or {},
|
|
2523
|
+
)
|
|
2524
|
+
|
|
2525
|
+
@staticmethod
|
|
2526
|
+
def create_page_increment(
|
|
2527
|
+
model: PageIncrementModel, config: Config, **kwargs: Any
|
|
2528
|
+
) -> PageIncrement:
|
|
2529
|
+
return PageIncrement(
|
|
2530
|
+
page_size=model.page_size,
|
|
2531
|
+
config=config,
|
|
2532
|
+
start_from_page=model.start_from_page or 0,
|
|
2533
|
+
inject_on_first_request=model.inject_on_first_request or False,
|
|
2534
|
+
parameters=model.parameters or {},
|
|
2535
|
+
)
|
|
2536
|
+
|
|
2537
|
+
def create_parent_stream_config(
|
|
2538
|
+
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
|
2539
|
+
) -> ParentStreamConfig:
|
|
2540
|
+
declarative_stream = self._create_component_from_model(
|
|
2541
|
+
model.stream, config=config, **kwargs
|
|
2542
|
+
)
|
|
2543
|
+
request_option = (
|
|
2544
|
+
self._create_component_from_model(model.request_option, config=config)
|
|
2545
|
+
if model.request_option
|
|
2546
|
+
else None
|
|
2547
|
+
)
|
|
2548
|
+
|
|
2549
|
+
if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer):
|
|
2550
|
+
raise ValueError(
|
|
2551
|
+
"The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed."
|
|
2552
|
+
)
|
|
2553
|
+
|
|
2554
|
+
model_lazy_read_pointer: List[Union[InterpolatedString, str]] = (
|
|
2555
|
+
[x for x in model.lazy_read_pointer] if model.lazy_read_pointer else []
|
|
2556
|
+
)
|
|
2557
|
+
|
|
2558
|
+
return ParentStreamConfig(
|
|
2559
|
+
parent_key=model.parent_key,
|
|
2560
|
+
request_option=request_option,
|
|
2561
|
+
stream=declarative_stream,
|
|
2562
|
+
partition_field=model.partition_field,
|
|
2563
|
+
config=config,
|
|
2564
|
+
incremental_dependency=model.incremental_dependency or False,
|
|
2565
|
+
parameters=model.parameters or {},
|
|
2566
|
+
extra_fields=model.extra_fields,
|
|
2567
|
+
lazy_read_pointer=model_lazy_read_pointer,
|
|
2568
|
+
)
|
|
2569
|
+
|
|
2570
|
+
@staticmethod
|
|
2571
|
+
def create_record_filter(
|
|
2572
|
+
model: RecordFilterModel, config: Config, **kwargs: Any
|
|
2573
|
+
) -> RecordFilter:
|
|
2574
|
+
return RecordFilter(
|
|
2575
|
+
condition=model.condition or "", config=config, parameters=model.parameters or {}
|
|
2576
|
+
)
|
|
2577
|
+
|
|
2578
|
+
@staticmethod
|
|
2579
|
+
def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath:
|
|
2580
|
+
return RequestPath(parameters={})
|
|
2581
|
+
|
|
2582
|
+
@staticmethod
|
|
2583
|
+
def create_request_option(
|
|
2584
|
+
model: RequestOptionModel, config: Config, **kwargs: Any
|
|
2585
|
+
) -> RequestOption:
|
|
2586
|
+
inject_into = RequestOptionType(model.inject_into.value)
|
|
2587
|
+
field_path: Optional[List[Union[InterpolatedString, str]]] = (
|
|
2588
|
+
[
|
|
2589
|
+
InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
|
|
2590
|
+
for segment in model.field_path
|
|
2591
|
+
]
|
|
2592
|
+
if model.field_path
|
|
2593
|
+
else None
|
|
2594
|
+
)
|
|
2595
|
+
field_name = (
|
|
2596
|
+
InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
|
|
2597
|
+
if model.field_name
|
|
2598
|
+
else None
|
|
2599
|
+
)
|
|
2600
|
+
return RequestOption(
|
|
2601
|
+
field_name=field_name,
|
|
2602
|
+
field_path=field_path,
|
|
2603
|
+
inject_into=inject_into,
|
|
2604
|
+
parameters=kwargs.get("parameters", {}),
|
|
2605
|
+
)
|
|
2606
|
+
|
|
2607
|
+
def create_record_selector(
|
|
2608
|
+
self,
|
|
2609
|
+
model: RecordSelectorModel,
|
|
2610
|
+
config: Config,
|
|
2611
|
+
*,
|
|
2612
|
+
name: str,
|
|
2613
|
+
transformations: List[RecordTransformation] | None = None,
|
|
2614
|
+
decoder: Decoder | None = None,
|
|
2615
|
+
client_side_incremental_sync: Dict[str, Any] | None = None,
|
|
2616
|
+
file_uploader: Optional[FileUploader] = None,
|
|
2617
|
+
**kwargs: Any,
|
|
2618
|
+
) -> RecordSelector:
|
|
2619
|
+
extractor = self._create_component_from_model(
|
|
2620
|
+
model=model.extractor, decoder=decoder, config=config
|
|
2621
|
+
)
|
|
2622
|
+
record_filter = (
|
|
2623
|
+
self._create_component_from_model(model.record_filter, config=config)
|
|
2624
|
+
if model.record_filter
|
|
2625
|
+
else None
|
|
2626
|
+
)
|
|
2627
|
+
|
|
2628
|
+
assert model.transform_before_filtering is not None # for mypy
|
|
2629
|
+
|
|
2630
|
+
transform_before_filtering = model.transform_before_filtering
|
|
2631
|
+
if client_side_incremental_sync:
|
|
2632
|
+
record_filter = ClientSideIncrementalRecordFilterDecorator(
|
|
2633
|
+
config=config,
|
|
2634
|
+
parameters=model.parameters,
|
|
2635
|
+
condition=model.record_filter.condition
|
|
2636
|
+
if (model.record_filter and hasattr(model.record_filter, "condition"))
|
|
2637
|
+
else None,
|
|
2638
|
+
**client_side_incremental_sync,
|
|
2639
|
+
)
|
|
2640
|
+
transform_before_filtering = True
|
|
2641
|
+
|
|
2642
|
+
schema_normalization = (
|
|
2643
|
+
TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
|
|
2644
|
+
if isinstance(model.schema_normalization, SchemaNormalizationModel)
|
|
2645
|
+
else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here
|
|
2646
|
+
)
|
|
2647
|
+
|
|
2648
|
+
return RecordSelector(
|
|
2649
|
+
extractor=extractor,
|
|
2650
|
+
name=name,
|
|
2651
|
+
config=config,
|
|
2652
|
+
record_filter=record_filter,
|
|
2653
|
+
transformations=transformations or [],
|
|
2654
|
+
file_uploader=file_uploader,
|
|
2655
|
+
schema_normalization=schema_normalization,
|
|
2656
|
+
parameters=model.parameters or {},
|
|
2657
|
+
transform_before_filtering=transform_before_filtering,
|
|
2658
|
+
)
|
|
2659
|
+
|
|
2660
|
+
@staticmethod
|
|
2661
|
+
def create_remove_fields(
|
|
2662
|
+
model: RemoveFieldsModel, config: Config, **kwargs: Any
|
|
2663
|
+
) -> RemoveFields:
|
|
2664
|
+
return RemoveFields(
|
|
2665
|
+
field_pointers=model.field_pointers, condition=model.condition or "", parameters={}
|
|
2666
|
+
)
|
|
2667
|
+
|
|
2668
|
+
def create_selective_authenticator(
|
|
2669
|
+
self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any
|
|
2670
|
+
) -> DeclarativeAuthenticator:
|
|
2671
|
+
authenticators = {
|
|
2672
|
+
name: self._create_component_from_model(model=auth, config=config)
|
|
2673
|
+
for name, auth in model.authenticators.items()
|
|
2674
|
+
}
|
|
2675
|
+
# SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error
|
|
2676
|
+
return SelectiveAuthenticator( # type: ignore[abstract]
|
|
2677
|
+
config=config,
|
|
2678
|
+
authenticators=authenticators,
|
|
2679
|
+
authenticator_selection_path=model.authenticator_selection_path,
|
|
2680
|
+
**kwargs,
|
|
2681
|
+
)
|
|
2682
|
+
|
|
2683
|
+
@staticmethod
|
|
2684
|
+
def create_legacy_session_token_authenticator(
|
|
2685
|
+
model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any
|
|
2686
|
+
) -> LegacySessionTokenAuthenticator:
|
|
2687
|
+
return LegacySessionTokenAuthenticator(
|
|
2688
|
+
api_url=url_base,
|
|
2689
|
+
header=model.header,
|
|
2690
|
+
login_url=model.login_url,
|
|
2691
|
+
password=model.password or "",
|
|
2692
|
+
session_token=model.session_token or "",
|
|
2693
|
+
session_token_response_key=model.session_token_response_key or "",
|
|
2694
|
+
username=model.username or "",
|
|
2695
|
+
validate_session_url=model.validate_session_url,
|
|
2696
|
+
config=config,
|
|
2697
|
+
parameters=model.parameters or {},
|
|
2698
|
+
)
|
|
2699
|
+
|
|
2700
|
+
def create_simple_retriever(
|
|
2701
|
+
self,
|
|
2702
|
+
model: SimpleRetrieverModel,
|
|
2703
|
+
config: Config,
|
|
2704
|
+
*,
|
|
2705
|
+
name: str,
|
|
2706
|
+
primary_key: Optional[Union[str, List[str], List[List[str]]]],
|
|
2707
|
+
stream_slicer: Optional[StreamSlicer],
|
|
2708
|
+
request_options_provider: Optional[RequestOptionsProvider] = None,
|
|
2709
|
+
stop_condition_on_cursor: bool = False,
|
|
2710
|
+
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
|
2711
|
+
transformations: List[RecordTransformation],
|
|
2712
|
+
file_uploader: Optional[FileUploader] = None,
|
|
2713
|
+
incremental_sync: Optional[
|
|
2714
|
+
Union[
|
|
2715
|
+
IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
|
|
2716
|
+
]
|
|
2717
|
+
] = None,
|
|
2718
|
+
**kwargs: Any,
|
|
2719
|
+
) -> SimpleRetriever:
|
|
2720
|
+
decoder = (
|
|
2721
|
+
self._create_component_from_model(model=model.decoder, config=config)
|
|
2722
|
+
if model.decoder
|
|
2723
|
+
else JsonDecoder(parameters={})
|
|
2724
|
+
)
|
|
2725
|
+
requester = self._create_component_from_model(
|
|
2726
|
+
model=model.requester, decoder=decoder, config=config, name=name
|
|
2727
|
+
)
|
|
2728
|
+
record_selector = self._create_component_from_model(
|
|
2729
|
+
model=model.record_selector,
|
|
2730
|
+
name=name,
|
|
2731
|
+
config=config,
|
|
2732
|
+
decoder=decoder,
|
|
2733
|
+
transformations=transformations,
|
|
2734
|
+
client_side_incremental_sync=client_side_incremental_sync,
|
|
2735
|
+
file_uploader=file_uploader,
|
|
2736
|
+
)
|
|
2737
|
+
url_base = (
|
|
2738
|
+
model.requester.url_base
|
|
2739
|
+
if hasattr(model.requester, "url_base")
|
|
2740
|
+
else requester.get_url_base()
|
|
2741
|
+
)
|
|
2742
|
+
|
|
2743
|
+
# Define cursor only if per partition or common incremental support is needed
|
|
2744
|
+
cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
|
|
2745
|
+
|
|
2746
|
+
if (
|
|
2747
|
+
not isinstance(stream_slicer, DatetimeBasedCursor)
|
|
2748
|
+
or type(stream_slicer) is not DatetimeBasedCursor
|
|
2749
|
+
):
|
|
2750
|
+
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
|
|
2751
|
+
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
|
|
2752
|
+
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
|
|
2753
|
+
# request_options_provider
|
|
2754
|
+
request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
|
|
2755
|
+
elif not request_options_provider:
|
|
2756
|
+
request_options_provider = DefaultRequestOptionsProvider(parameters={})
|
|
2757
|
+
|
|
2758
|
+
stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
|
|
2759
|
+
|
|
2760
|
+
cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None
|
|
2761
|
+
paginator = (
|
|
2762
|
+
self._create_component_from_model(
|
|
2763
|
+
model=model.paginator,
|
|
2764
|
+
config=config,
|
|
2765
|
+
url_base=url_base,
|
|
2766
|
+
decoder=decoder,
|
|
2767
|
+
cursor_used_for_stop_condition=cursor_used_for_stop_condition,
|
|
2768
|
+
)
|
|
2769
|
+
if model.paginator
|
|
2770
|
+
else NoPagination(parameters={})
|
|
2771
|
+
)
|
|
2772
|
+
|
|
2773
|
+
ignore_stream_slicer_parameters_on_paginated_requests = (
|
|
2774
|
+
model.ignore_stream_slicer_parameters_on_paginated_requests or False
|
|
2775
|
+
)
|
|
2776
|
+
|
|
2777
|
+
if (
|
|
2778
|
+
model.partition_router
|
|
2779
|
+
and isinstance(model.partition_router, SubstreamPartitionRouterModel)
|
|
2780
|
+
and not bool(self._connector_state_manager.get_stream_state(name, None))
|
|
2781
|
+
and any(
|
|
2782
|
+
parent_stream_config.lazy_read_pointer
|
|
2783
|
+
for parent_stream_config in model.partition_router.parent_stream_configs
|
|
2784
|
+
)
|
|
2785
|
+
):
|
|
2786
|
+
if incremental_sync:
|
|
2787
|
+
if incremental_sync.type != "DatetimeBasedCursor":
|
|
2788
|
+
raise ValueError(
|
|
2789
|
+
f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}."
|
|
2790
|
+
)
|
|
2791
|
+
|
|
2792
|
+
elif incremental_sync.step or incremental_sync.cursor_granularity:
|
|
2793
|
+
raise ValueError(
|
|
2794
|
+
f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}."
|
|
2795
|
+
)
|
|
2796
|
+
|
|
2797
|
+
if model.decoder and model.decoder.type != "JsonDecoder":
|
|
2798
|
+
raise ValueError(
|
|
2799
|
+
f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}."
|
|
2800
|
+
)
|
|
2801
|
+
|
|
2802
|
+
return LazySimpleRetriever(
|
|
2803
|
+
name=name,
|
|
2804
|
+
paginator=paginator,
|
|
2805
|
+
primary_key=primary_key,
|
|
2806
|
+
requester=requester,
|
|
2807
|
+
record_selector=record_selector,
|
|
2808
|
+
stream_slicer=stream_slicer,
|
|
2809
|
+
request_option_provider=request_options_provider,
|
|
2810
|
+
cursor=cursor,
|
|
2811
|
+
config=config,
|
|
2812
|
+
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
|
2813
|
+
parameters=model.parameters or {},
|
|
2814
|
+
)
|
|
2815
|
+
|
|
2816
|
+
if self._limit_slices_fetched or self._emit_connector_builder_messages:
|
|
2817
|
+
return SimpleRetrieverTestReadDecorator(
|
|
2818
|
+
name=name,
|
|
2819
|
+
paginator=paginator,
|
|
2820
|
+
primary_key=primary_key,
|
|
2821
|
+
requester=requester,
|
|
2822
|
+
record_selector=record_selector,
|
|
2823
|
+
stream_slicer=stream_slicer,
|
|
2824
|
+
request_option_provider=request_options_provider,
|
|
2825
|
+
cursor=cursor,
|
|
2826
|
+
config=config,
|
|
2827
|
+
maximum_number_of_slices=self._limit_slices_fetched or 5,
|
|
2828
|
+
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
|
2829
|
+
parameters=model.parameters or {},
|
|
2830
|
+
)
|
|
2831
|
+
return SimpleRetriever(
|
|
2832
|
+
name=name,
|
|
2833
|
+
paginator=paginator,
|
|
2834
|
+
primary_key=primary_key,
|
|
2835
|
+
requester=requester,
|
|
2836
|
+
record_selector=record_selector,
|
|
2837
|
+
stream_slicer=stream_slicer,
|
|
2838
|
+
request_option_provider=request_options_provider,
|
|
2839
|
+
cursor=cursor,
|
|
2840
|
+
config=config,
|
|
2841
|
+
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
|
2842
|
+
parameters=model.parameters or {},
|
|
2843
|
+
)
|
|
2844
|
+
|
|
2845
|
+
def create_state_delegating_stream(
|
|
2846
|
+
self,
|
|
2847
|
+
model: StateDelegatingStreamModel,
|
|
2848
|
+
config: Config,
|
|
2849
|
+
has_parent_state: Optional[bool] = None,
|
|
2850
|
+
**kwargs: Any,
|
|
2851
|
+
) -> DeclarativeStream:
|
|
2852
|
+
if (
|
|
2853
|
+
model.full_refresh_stream.name != model.name
|
|
2854
|
+
or model.name != model.incremental_stream.name
|
|
2855
|
+
):
|
|
2856
|
+
raise ValueError(
|
|
2857
|
+
f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
|
|
2858
|
+
)
|
|
2859
|
+
|
|
2860
|
+
stream_model = (
|
|
2861
|
+
model.incremental_stream
|
|
2862
|
+
if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
|
|
2863
|
+
else model.full_refresh_stream
|
|
2864
|
+
)
|
|
2865
|
+
|
|
2866
|
+
return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description
|
|
2867
|
+
|
|
2868
|
+
def _create_async_job_status_mapping(
|
|
2869
|
+
self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
|
|
2870
|
+
) -> Mapping[str, AsyncJobStatus]:
|
|
2871
|
+
api_status_to_cdk_status = {}
|
|
2872
|
+
for cdk_status, api_statuses in model.dict().items():
|
|
2873
|
+
if cdk_status == "type":
|
|
2874
|
+
# This is an element of the dict because of the typing of the CDK but it is not a CDK status
|
|
2875
|
+
continue
|
|
2876
|
+
|
|
2877
|
+
for status in api_statuses:
|
|
2878
|
+
if status in api_status_to_cdk_status:
|
|
2879
|
+
raise ValueError(
|
|
2880
|
+
f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once"
|
|
2881
|
+
)
|
|
2882
|
+
api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status)
|
|
2883
|
+
return api_status_to_cdk_status
|
|
2884
|
+
|
|
2885
|
+
def _get_async_job_status(self, status: str) -> AsyncJobStatus:
|
|
2886
|
+
match status:
|
|
2887
|
+
case "running":
|
|
2888
|
+
return AsyncJobStatus.RUNNING
|
|
2889
|
+
case "completed":
|
|
2890
|
+
return AsyncJobStatus.COMPLETED
|
|
2891
|
+
case "failed":
|
|
2892
|
+
return AsyncJobStatus.FAILED
|
|
2893
|
+
case "timeout":
|
|
2894
|
+
return AsyncJobStatus.TIMED_OUT
|
|
2895
|
+
case _:
|
|
2896
|
+
raise ValueError(f"Unsupported CDK status {status}")
|
|
2897
|
+
|
|
2898
|
+
def create_async_retriever(
|
|
2899
|
+
self,
|
|
2900
|
+
model: AsyncRetrieverModel,
|
|
2901
|
+
config: Config,
|
|
2902
|
+
*,
|
|
2903
|
+
name: str,
|
|
2904
|
+
primary_key: Optional[
|
|
2905
|
+
Union[str, List[str], List[List[str]]]
|
|
2906
|
+
], # this seems to be needed to match create_simple_retriever
|
|
2907
|
+
stream_slicer: Optional[StreamSlicer],
|
|
2908
|
+
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
|
2909
|
+
transformations: List[RecordTransformation],
|
|
2910
|
+
**kwargs: Any,
|
|
2911
|
+
) -> AsyncRetriever:
|
|
2912
|
+
def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever:
|
|
2913
|
+
record_selector = RecordSelector(
|
|
2914
|
+
extractor=download_extractor,
|
|
2915
|
+
name=name,
|
|
2916
|
+
record_filter=None,
|
|
2917
|
+
transformations=transformations,
|
|
2918
|
+
schema_normalization=TypeTransformer(TransformConfig.NoTransform),
|
|
2919
|
+
config=config,
|
|
2920
|
+
parameters={},
|
|
2921
|
+
)
|
|
2922
|
+
paginator = (
|
|
2923
|
+
self._create_component_from_model(
|
|
2924
|
+
model=model.download_paginator,
|
|
2925
|
+
decoder=decoder,
|
|
2926
|
+
config=config,
|
|
2927
|
+
url_base="",
|
|
2928
|
+
)
|
|
2929
|
+
if model.download_paginator
|
|
2930
|
+
else NoPagination(parameters={})
|
|
2931
|
+
)
|
|
2932
|
+
maximum_number_of_slices = self._limit_slices_fetched or 5
|
|
2933
|
+
|
|
2934
|
+
if self._limit_slices_fetched or self._emit_connector_builder_messages:
|
|
2935
|
+
return SimpleRetrieverTestReadDecorator(
|
|
2936
|
+
requester=download_requester,
|
|
2937
|
+
record_selector=record_selector,
|
|
2938
|
+
primary_key=None,
|
|
2939
|
+
name=job_download_components_name,
|
|
2940
|
+
paginator=paginator,
|
|
2941
|
+
config=config,
|
|
2942
|
+
parameters={},
|
|
2943
|
+
maximum_number_of_slices=maximum_number_of_slices,
|
|
2944
|
+
)
|
|
2945
|
+
|
|
2946
|
+
return SimpleRetriever(
|
|
2947
|
+
requester=download_requester,
|
|
2948
|
+
record_selector=record_selector,
|
|
2949
|
+
primary_key=None,
|
|
2950
|
+
name=job_download_components_name,
|
|
2951
|
+
paginator=paginator,
|
|
2952
|
+
config=config,
|
|
2953
|
+
parameters={},
|
|
2954
|
+
)
|
|
2955
|
+
|
|
2956
|
+
def _get_job_timeout() -> datetime.timedelta:
|
|
2957
|
+
user_defined_timeout: Optional[int] = (
|
|
2958
|
+
int(
|
|
2959
|
+
InterpolatedString.create(
|
|
2960
|
+
str(model.polling_job_timeout),
|
|
2961
|
+
parameters={},
|
|
2962
|
+
).eval(config)
|
|
2963
|
+
)
|
|
2964
|
+
if model.polling_job_timeout
|
|
2965
|
+
else None
|
|
2966
|
+
)
|
|
2967
|
+
|
|
2968
|
+
# check for user defined timeout during the test read or 15 minutes
|
|
2969
|
+
test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15)
|
|
2970
|
+
# default value for non-connector builder is 60 minutes.
|
|
2971
|
+
default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60)
|
|
2972
|
+
|
|
2973
|
+
return (
|
|
2974
|
+
test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout
|
|
2975
|
+
)
|
|
2976
|
+
|
|
2977
|
+
decoder = (
|
|
2978
|
+
self._create_component_from_model(model=model.decoder, config=config)
|
|
2979
|
+
if model.decoder
|
|
2980
|
+
else JsonDecoder(parameters={})
|
|
2981
|
+
)
|
|
2982
|
+
record_selector = self._create_component_from_model(
|
|
2983
|
+
model=model.record_selector,
|
|
2984
|
+
config=config,
|
|
2985
|
+
decoder=decoder,
|
|
2986
|
+
name=name,
|
|
2987
|
+
transformations=transformations,
|
|
2988
|
+
client_side_incremental_sync=client_side_incremental_sync,
|
|
2989
|
+
)
|
|
2990
|
+
stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
|
|
2991
|
+
creation_requester = self._create_component_from_model(
|
|
2992
|
+
model=model.creation_requester,
|
|
2993
|
+
decoder=decoder,
|
|
2994
|
+
config=config,
|
|
2995
|
+
name=f"job creation - {name}",
|
|
2996
|
+
)
|
|
2997
|
+
polling_requester = self._create_component_from_model(
|
|
2998
|
+
model=model.polling_requester,
|
|
2999
|
+
decoder=decoder,
|
|
3000
|
+
config=config,
|
|
3001
|
+
name=f"job polling - {name}",
|
|
3002
|
+
)
|
|
3003
|
+
job_download_components_name = f"job download - {name}"
|
|
3004
|
+
download_decoder = (
|
|
3005
|
+
self._create_component_from_model(model=model.download_decoder, config=config)
|
|
3006
|
+
if model.download_decoder
|
|
3007
|
+
else JsonDecoder(parameters={})
|
|
3008
|
+
)
|
|
3009
|
+
download_extractor = (
|
|
3010
|
+
self._create_component_from_model(
|
|
3011
|
+
model=model.download_extractor,
|
|
3012
|
+
config=config,
|
|
3013
|
+
decoder=download_decoder,
|
|
3014
|
+
parameters=model.parameters,
|
|
3015
|
+
)
|
|
3016
|
+
if model.download_extractor
|
|
3017
|
+
else DpathExtractor(
|
|
3018
|
+
[],
|
|
3019
|
+
config=config,
|
|
3020
|
+
decoder=download_decoder,
|
|
3021
|
+
parameters=model.parameters or {},
|
|
3022
|
+
)
|
|
3023
|
+
)
|
|
3024
|
+
download_requester = self._create_component_from_model(
|
|
3025
|
+
model=model.download_requester,
|
|
3026
|
+
decoder=download_decoder,
|
|
3027
|
+
config=config,
|
|
3028
|
+
name=job_download_components_name,
|
|
3029
|
+
)
|
|
3030
|
+
download_retriever = _get_download_retriever()
|
|
3031
|
+
abort_requester = (
|
|
3032
|
+
self._create_component_from_model(
|
|
3033
|
+
model=model.abort_requester,
|
|
3034
|
+
decoder=decoder,
|
|
3035
|
+
config=config,
|
|
3036
|
+
name=f"job abort - {name}",
|
|
3037
|
+
)
|
|
3038
|
+
if model.abort_requester
|
|
3039
|
+
else None
|
|
3040
|
+
)
|
|
3041
|
+
delete_requester = (
|
|
3042
|
+
self._create_component_from_model(
|
|
3043
|
+
model=model.delete_requester,
|
|
3044
|
+
decoder=decoder,
|
|
3045
|
+
config=config,
|
|
3046
|
+
name=f"job delete - {name}",
|
|
3047
|
+
)
|
|
3048
|
+
if model.delete_requester
|
|
3049
|
+
else None
|
|
3050
|
+
)
|
|
3051
|
+
download_target_requester = (
|
|
3052
|
+
self._create_component_from_model(
|
|
3053
|
+
model=model.download_target_requester,
|
|
3054
|
+
decoder=decoder,
|
|
3055
|
+
config=config,
|
|
3056
|
+
name=f"job extract_url - {name}",
|
|
3057
|
+
)
|
|
3058
|
+
if model.download_target_requester
|
|
3059
|
+
else None
|
|
3060
|
+
)
|
|
3061
|
+
status_extractor = self._create_component_from_model(
|
|
3062
|
+
model=model.status_extractor, decoder=decoder, config=config, name=name
|
|
3063
|
+
)
|
|
3064
|
+
download_target_extractor = self._create_component_from_model(
|
|
3065
|
+
model=model.download_target_extractor,
|
|
3066
|
+
decoder=decoder,
|
|
3067
|
+
config=config,
|
|
3068
|
+
name=name,
|
|
3069
|
+
)
|
|
3070
|
+
|
|
3071
|
+
job_repository: AsyncJobRepository = AsyncHttpJobRepository(
|
|
3072
|
+
creation_requester=creation_requester,
|
|
3073
|
+
polling_requester=polling_requester,
|
|
3074
|
+
download_retriever=download_retriever,
|
|
3075
|
+
download_target_requester=download_target_requester,
|
|
3076
|
+
abort_requester=abort_requester,
|
|
3077
|
+
delete_requester=delete_requester,
|
|
3078
|
+
status_extractor=status_extractor,
|
|
3079
|
+
status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
|
|
3080
|
+
download_target_extractor=download_target_extractor,
|
|
3081
|
+
job_timeout=_get_job_timeout(),
|
|
3082
|
+
)
|
|
3083
|
+
|
|
3084
|
+
async_job_partition_router = AsyncJobPartitionRouter(
|
|
3085
|
+
job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
|
|
3086
|
+
job_repository,
|
|
3087
|
+
stream_slices,
|
|
3088
|
+
self._job_tracker,
|
|
3089
|
+
self._message_repository,
|
|
3090
|
+
has_bulk_parent=False,
|
|
3091
|
+
# FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
|
|
3092
|
+
),
|
|
3093
|
+
stream_slicer=stream_slicer,
|
|
3094
|
+
config=config,
|
|
3095
|
+
parameters=model.parameters or {},
|
|
3096
|
+
)
|
|
3097
|
+
|
|
3098
|
+
return AsyncRetriever(
|
|
3099
|
+
record_selector=record_selector,
|
|
3100
|
+
stream_slicer=async_job_partition_router,
|
|
3101
|
+
config=config,
|
|
3102
|
+
parameters=model.parameters or {},
|
|
3103
|
+
)
|
|
3104
|
+
|
|
3105
|
+
@staticmethod
|
|
3106
|
+
def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
|
|
3107
|
+
return Spec(
|
|
3108
|
+
connection_specification=model.connection_specification,
|
|
3109
|
+
documentation_url=model.documentation_url,
|
|
3110
|
+
advanced_auth=model.advanced_auth,
|
|
3111
|
+
parameters={},
|
|
3112
|
+
)
|
|
3113
|
+
|
|
3114
|
+
def create_substream_partition_router(
|
|
3115
|
+
self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any
|
|
3116
|
+
) -> SubstreamPartitionRouter:
|
|
3117
|
+
parent_stream_configs = []
|
|
3118
|
+
if model.parent_stream_configs:
|
|
3119
|
+
parent_stream_configs.extend(
|
|
3120
|
+
[
|
|
3121
|
+
self._create_message_repository_substream_wrapper(
|
|
3122
|
+
model=parent_stream_config, config=config, **kwargs
|
|
3123
|
+
)
|
|
3124
|
+
for parent_stream_config in model.parent_stream_configs
|
|
3125
|
+
]
|
|
3126
|
+
)
|
|
3127
|
+
|
|
3128
|
+
return SubstreamPartitionRouter(
|
|
3129
|
+
parent_stream_configs=parent_stream_configs,
|
|
3130
|
+
parameters=model.parameters or {},
|
|
3131
|
+
config=config,
|
|
3132
|
+
)
|
|
3133
|
+
|
|
3134
|
+
def _create_message_repository_substream_wrapper(
|
|
3135
|
+
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
|
3136
|
+
) -> Any:
|
|
3137
|
+
substream_factory = ModelToComponentFactory(
|
|
3138
|
+
limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
|
|
3139
|
+
limit_slices_fetched=self._limit_slices_fetched,
|
|
3140
|
+
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
|
3141
|
+
disable_retries=self._disable_retries,
|
|
3142
|
+
disable_cache=self._disable_cache,
|
|
3143
|
+
message_repository=LogAppenderMessageRepositoryDecorator(
|
|
3144
|
+
{"airbyte_cdk": {"stream": {"is_substream": True}}, "http": {"is_auxiliary": True}},
|
|
3145
|
+
self._message_repository,
|
|
3146
|
+
self._evaluate_log_level(self._emit_connector_builder_messages),
|
|
3147
|
+
),
|
|
3148
|
+
)
|
|
3149
|
+
|
|
3150
|
+
# This flag will be used exclusively for StateDelegatingStream when a parent stream is created
|
|
3151
|
+
has_parent_state = bool(
|
|
3152
|
+
self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
|
|
3153
|
+
if model.incremental_dependency
|
|
3154
|
+
else False
|
|
3155
|
+
)
|
|
3156
|
+
return substream_factory._create_component_from_model(
|
|
3157
|
+
model=model, config=config, has_parent_state=has_parent_state, **kwargs
|
|
3158
|
+
)
|
|
3159
|
+
|
|
3160
|
+
@staticmethod
|
|
3161
|
+
def create_wait_time_from_header(
|
|
3162
|
+
model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
|
|
3163
|
+
) -> WaitTimeFromHeaderBackoffStrategy:
|
|
3164
|
+
return WaitTimeFromHeaderBackoffStrategy(
|
|
3165
|
+
header=model.header,
|
|
3166
|
+
parameters=model.parameters or {},
|
|
3167
|
+
config=config,
|
|
3168
|
+
regex=model.regex,
|
|
3169
|
+
max_waiting_time_in_seconds=model.max_waiting_time_in_seconds
|
|
3170
|
+
if model.max_waiting_time_in_seconds is not None
|
|
3171
|
+
else None,
|
|
3172
|
+
)
|
|
3173
|
+
|
|
3174
|
+
@staticmethod
|
|
3175
|
+
def create_wait_until_time_from_header(
|
|
3176
|
+
model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any
|
|
3177
|
+
) -> WaitUntilTimeFromHeaderBackoffStrategy:
|
|
3178
|
+
return WaitUntilTimeFromHeaderBackoffStrategy(
|
|
3179
|
+
header=model.header,
|
|
3180
|
+
parameters=model.parameters or {},
|
|
3181
|
+
config=config,
|
|
3182
|
+
min_wait=model.min_wait,
|
|
3183
|
+
regex=model.regex,
|
|
3184
|
+
)
|
|
3185
|
+
|
|
3186
|
+
def get_message_repository(self) -> MessageRepository:
|
|
3187
|
+
return self._message_repository
|
|
3188
|
+
|
|
3189
|
+
def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level:
|
|
3190
|
+
return Level.DEBUG if emit_connector_builder_messages else Level.INFO
|
|
3191
|
+
|
|
3192
|
+
@staticmethod
|
|
3193
|
+
def create_components_mapping_definition(
|
|
3194
|
+
model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any
|
|
3195
|
+
) -> ComponentMappingDefinition:
|
|
3196
|
+
interpolated_value = InterpolatedString.create(
|
|
3197
|
+
model.value, parameters=model.parameters or {}
|
|
3198
|
+
)
|
|
3199
|
+
field_path = [
|
|
3200
|
+
InterpolatedString.create(path, parameters=model.parameters or {})
|
|
3201
|
+
for path in model.field_path
|
|
3202
|
+
]
|
|
3203
|
+
return ComponentMappingDefinition(
|
|
3204
|
+
field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString
|
|
3205
|
+
value=interpolated_value,
|
|
3206
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
|
|
3207
|
+
parameters=model.parameters or {},
|
|
3208
|
+
)
|
|
3209
|
+
|
|
3210
|
+
def create_http_components_resolver(
|
|
3211
|
+
self, model: HttpComponentsResolverModel, config: Config
|
|
3212
|
+
) -> Any:
|
|
3213
|
+
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
|
3214
|
+
combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
|
|
3215
|
+
|
|
3216
|
+
retriever = self._create_component_from_model(
|
|
3217
|
+
model=model.retriever,
|
|
3218
|
+
config=config,
|
|
3219
|
+
name="",
|
|
3220
|
+
primary_key=None,
|
|
3221
|
+
stream_slicer=stream_slicer if stream_slicer else combined_slicers,
|
|
3222
|
+
transformations=[],
|
|
3223
|
+
)
|
|
3224
|
+
|
|
3225
|
+
components_mapping = [
|
|
3226
|
+
self._create_component_from_model(
|
|
3227
|
+
model=components_mapping_definition_model,
|
|
3228
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(
|
|
3229
|
+
components_mapping_definition_model.value_type
|
|
3230
|
+
),
|
|
3231
|
+
config=config,
|
|
3232
|
+
)
|
|
3233
|
+
for components_mapping_definition_model in model.components_mapping
|
|
3234
|
+
]
|
|
3235
|
+
|
|
3236
|
+
return HttpComponentsResolver(
|
|
3237
|
+
retriever=retriever,
|
|
3238
|
+
config=config,
|
|
3239
|
+
components_mapping=components_mapping,
|
|
3240
|
+
parameters=model.parameters or {},
|
|
3241
|
+
)
|
|
3242
|
+
|
|
3243
|
+
@staticmethod
|
|
3244
|
+
def create_stream_config(
|
|
3245
|
+
model: StreamConfigModel, config: Config, **kwargs: Any
|
|
3246
|
+
) -> StreamConfig:
|
|
3247
|
+
model_configs_pointer: List[Union[InterpolatedString, str]] = (
|
|
3248
|
+
[x for x in model.configs_pointer] if model.configs_pointer else []
|
|
3249
|
+
)
|
|
3250
|
+
|
|
3251
|
+
return StreamConfig(
|
|
3252
|
+
configs_pointer=model_configs_pointer,
|
|
3253
|
+
parameters=model.parameters or {},
|
|
3254
|
+
)
|
|
3255
|
+
|
|
3256
|
+
def create_config_components_resolver(
|
|
3257
|
+
self, model: ConfigComponentsResolverModel, config: Config
|
|
3258
|
+
) -> Any:
|
|
3259
|
+
stream_config = self._create_component_from_model(
|
|
3260
|
+
model.stream_config, config=config, parameters=model.parameters or {}
|
|
3261
|
+
)
|
|
3262
|
+
|
|
3263
|
+
components_mapping = [
|
|
3264
|
+
self._create_component_from_model(
|
|
3265
|
+
model=components_mapping_definition_model,
|
|
3266
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(
|
|
3267
|
+
components_mapping_definition_model.value_type
|
|
3268
|
+
),
|
|
3269
|
+
config=config,
|
|
3270
|
+
)
|
|
3271
|
+
for components_mapping_definition_model in model.components_mapping
|
|
3272
|
+
]
|
|
3273
|
+
|
|
3274
|
+
return ConfigComponentsResolver(
|
|
3275
|
+
stream_config=stream_config,
|
|
3276
|
+
config=config,
|
|
3277
|
+
components_mapping=components_mapping,
|
|
3278
|
+
parameters=model.parameters or {},
|
|
3279
|
+
)
|
|
3280
|
+
|
|
3281
|
+
_UNSUPPORTED_DECODER_ERROR = (
|
|
3282
|
+
"Specified decoder of {decoder_type} is not supported for pagination."
|
|
3283
|
+
"Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead."
|
|
3284
|
+
"If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`."
|
|
3285
|
+
)
|
|
3286
|
+
|
|
3287
|
+
def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool:
|
|
3288
|
+
if isinstance(decoder, (JsonDecoder, XmlDecoder)):
|
|
3289
|
+
return True
|
|
3290
|
+
elif isinstance(decoder, CompositeRawDecoder):
|
|
3291
|
+
return self._is_supported_parser_for_pagination(decoder.parser)
|
|
3292
|
+
else:
|
|
3293
|
+
return False
|
|
3294
|
+
|
|
3295
|
+
def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
|
|
3296
|
+
if isinstance(parser, JsonParser):
|
|
3297
|
+
return True
|
|
3298
|
+
elif isinstance(parser, GzipParser):
|
|
3299
|
+
return isinstance(parser.inner_parser, JsonParser)
|
|
3300
|
+
else:
|
|
3301
|
+
return False
|
|
3302
|
+
|
|
3303
|
+
def create_http_api_budget(
|
|
3304
|
+
self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
|
|
3305
|
+
) -> HttpAPIBudget:
|
|
3306
|
+
policies = [
|
|
3307
|
+
self._create_component_from_model(model=policy, config=config)
|
|
3308
|
+
for policy in model.policies
|
|
3309
|
+
]
|
|
3310
|
+
|
|
3311
|
+
return HttpAPIBudget(
|
|
3312
|
+
policies=policies,
|
|
3313
|
+
ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
|
|
3314
|
+
ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
|
|
3315
|
+
status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
|
|
3316
|
+
)
|
|
3317
|
+
|
|
3318
|
+
def create_fixed_window_call_rate_policy(
|
|
3319
|
+
self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
|
|
3320
|
+
) -> FixedWindowCallRatePolicy:
|
|
3321
|
+
matchers = [
|
|
3322
|
+
self._create_component_from_model(model=matcher, config=config)
|
|
3323
|
+
for matcher in model.matchers
|
|
3324
|
+
]
|
|
3325
|
+
|
|
3326
|
+
# Set the initial reset timestamp to 10 days from now.
|
|
3327
|
+
# This value will be updated by the first request.
|
|
3328
|
+
return FixedWindowCallRatePolicy(
|
|
3329
|
+
next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
|
|
3330
|
+
period=parse_duration(model.period),
|
|
3331
|
+
call_limit=model.call_limit,
|
|
3332
|
+
matchers=matchers,
|
|
3333
|
+
)
|
|
3334
|
+
|
|
3335
|
+
def create_file_uploader(
|
|
3336
|
+
self, model: FileUploaderModel, config: Config, **kwargs: Any
|
|
3337
|
+
) -> FileUploader:
|
|
3338
|
+
name = "File Uploader"
|
|
3339
|
+
requester = self._create_component_from_model(
|
|
3340
|
+
model=model.requester,
|
|
3341
|
+
config=config,
|
|
3342
|
+
name=name,
|
|
3343
|
+
**kwargs,
|
|
3344
|
+
)
|
|
3345
|
+
download_target_extractor = self._create_component_from_model(
|
|
3346
|
+
model=model.download_target_extractor,
|
|
3347
|
+
config=config,
|
|
3348
|
+
name=name,
|
|
3349
|
+
**kwargs,
|
|
3350
|
+
)
|
|
3351
|
+
return FileUploader(
|
|
3352
|
+
requester=requester,
|
|
3353
|
+
download_target_extractor=download_target_extractor,
|
|
3354
|
+
config=config,
|
|
3355
|
+
parameters=model.parameters or {},
|
|
3356
|
+
filename_extractor=model.filename_extractor if model.filename_extractor else None,
|
|
3357
|
+
)
|
|
3358
|
+
|
|
3359
|
+
def create_moving_window_call_rate_policy(
|
|
3360
|
+
self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
|
|
3361
|
+
) -> MovingWindowCallRatePolicy:
|
|
3362
|
+
rates = [
|
|
3363
|
+
self._create_component_from_model(model=rate, config=config) for rate in model.rates
|
|
3364
|
+
]
|
|
3365
|
+
matchers = [
|
|
3366
|
+
self._create_component_from_model(model=matcher, config=config)
|
|
3367
|
+
for matcher in model.matchers
|
|
3368
|
+
]
|
|
3369
|
+
return MovingWindowCallRatePolicy(
|
|
3370
|
+
rates=rates,
|
|
3371
|
+
matchers=matchers,
|
|
3372
|
+
)
|
|
3373
|
+
|
|
3374
|
+
def create_unlimited_call_rate_policy(
|
|
3375
|
+
self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
|
|
3376
|
+
) -> UnlimitedCallRatePolicy:
|
|
3377
|
+
matchers = [
|
|
3378
|
+
self._create_component_from_model(model=matcher, config=config)
|
|
3379
|
+
for matcher in model.matchers
|
|
3380
|
+
]
|
|
3381
|
+
|
|
3382
|
+
return UnlimitedCallRatePolicy(
|
|
3383
|
+
matchers=matchers,
|
|
3384
|
+
)
|
|
3385
|
+
|
|
3386
|
+
def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
|
|
3387
|
+
interpolated_limit = InterpolatedString.create(str(model.limit), parameters={})
|
|
3388
|
+
return Rate(
|
|
3389
|
+
limit=int(interpolated_limit.eval(config=config)),
|
|
3390
|
+
interval=parse_duration(model.interval),
|
|
3391
|
+
)
|
|
3392
|
+
|
|
3393
|
+
def create_http_request_matcher(
|
|
3394
|
+
self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
|
|
3395
|
+
) -> HttpRequestRegexMatcher:
|
|
3396
|
+
return HttpRequestRegexMatcher(
|
|
3397
|
+
method=model.method,
|
|
3398
|
+
url_base=model.url_base,
|
|
3399
|
+
url_path_pattern=model.url_path_pattern,
|
|
3400
|
+
params=model.params,
|
|
3401
|
+
headers=model.headers,
|
|
3402
|
+
)
|
|
3403
|
+
|
|
3404
|
+
def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
|
|
3405
|
+
self._api_budget = self.create_component(
|
|
3406
|
+
model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
|
|
3407
|
+
)
|