airbyte-cdk 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +358 -0
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +104 -0
- airbyte_cdk/connector.py +123 -0
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/__init__.py +3 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
- airbyte_cdk/connector_builder/main.py +107 -0
- airbyte_cdk/connector_builder/models.py +73 -0
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +83 -0
- airbyte_cdk/destinations/__init__.py +8 -0
- airbyte_cdk/destinations/destination.py +154 -0
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
- airbyte_cdk/destinations/vector_db_based/config.py +298 -0
- airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
- airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
- airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
- airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
- airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
- airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
- airbyte_cdk/entrypoint.py +414 -0
- airbyte_cdk/exception_handler.py +56 -0
- airbyte_cdk/logger.py +109 -0
- airbyte_cdk/models/__init__.py +72 -0
- airbyte_cdk/models/airbyte_protocol.py +88 -0
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/well_known_types.py +5 -0
- airbyte_cdk/py.typed +0 -0
- airbyte_cdk/sources/__init__.py +26 -0
- airbyte_cdk/sources/abstract_source.py +326 -0
- airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
- airbyte_cdk/sources/config.py +27 -0
- airbyte_cdk/sources/connector_state_manager.py +161 -0
- airbyte_cdk/sources/declarative/__init__.py +3 -0
- airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
- airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
- airbyte_cdk/sources/declarative/auth/token.py +267 -0
- airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
- airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
- airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
- airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
- airbyte_cdk/sources/declarative/declarative_source.py +36 -0
- airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
- airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
- airbyte_cdk/sources/declarative/exceptions.py +9 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
- airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
- airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
- airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
- airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
- airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
- airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
- airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
- airbyte_cdk/sources/declarative/spec/spec.py +48 -0
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
- airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
- airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
- airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
- airbyte_cdk/sources/declarative/types.py +25 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
- airbyte_cdk/sources/file_based/config/__init__.py +0 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
- airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
- airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
- airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
- airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
- airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
- airbyte_cdk/sources/file_based/exceptions.py +159 -0
- airbyte_cdk/sources/file_based/file_based_source.py +466 -0
- airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
- airbyte_cdk/sources/file_based/file_record_data.py +22 -0
- airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
- airbyte_cdk/sources/file_based/remote_file.py +18 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
- airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
- airbyte_cdk/sources/file_based/types.py +10 -0
- airbyte_cdk/sources/http_config.py +10 -0
- airbyte_cdk/sources/http_logger.py +55 -0
- airbyte_cdk/sources/message/__init__.py +19 -0
- airbyte_cdk/sources/message/repository.py +137 -0
- airbyte_cdk/sources/source.py +95 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/__init__.py +8 -0
- airbyte_cdk/sources/streams/availability_strategy.py +84 -0
- airbyte_cdk/sources/streams/call_rate.py +704 -0
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
- airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
- airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
- airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/core.py +703 -0
- airbyte_cdk/sources/streams/http/__init__.py +10 -0
- airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +61 -0
- airbyte_cdk/sources/streams/http/http.py +673 -0
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/sources/streams/utils/__init__.py +3 -0
- airbyte_cdk/sources/types.py +169 -0
- airbyte_cdk/sources/utils/__init__.py +7 -0
- airbyte_cdk/sources/utils/casing.py +12 -0
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +53 -0
- airbyte_cdk/sources/utils/schema_helpers.py +230 -0
- airbyte_cdk/sources/utils/slice_logger.py +57 -0
- airbyte_cdk/sources/utils/transform.py +277 -0
- airbyte_cdk/sources/utils/types.py +7 -0
- airbyte_cdk/sql/__init__.py +0 -0
- airbyte_cdk/sql/_util/__init__.py +0 -0
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/__init__.py +7 -0
- airbyte_cdk/test/catalog_builder.py +81 -0
- airbyte_cdk/test/entrypoint_wrapper.py +250 -0
- airbyte_cdk/test/mock_http/__init__.py +6 -0
- airbyte_cdk/test/mock_http/matcher.py +41 -0
- airbyte_cdk/test/mock_http/mocker.py +185 -0
- airbyte_cdk/test/mock_http/request.py +103 -0
- airbyte_cdk/test/mock_http/response.py +28 -0
- airbyte_cdk/test/mock_http/response_builder.py +237 -0
- airbyte_cdk/test/state_builder.py +33 -0
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +10 -0
- airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
- airbyte_cdk/utils/analytics_message.py +25 -0
- airbyte_cdk/utils/constants.py +5 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/event_timing.py +85 -0
- airbyte_cdk/utils/is_cloud_environment.py +18 -0
- airbyte_cdk/utils/mapping_helpers.py +162 -0
- airbyte_cdk/utils/message_utils.py +26 -0
- airbyte_cdk/utils/oneof_option_config.py +33 -0
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +270 -0
- airbyte_cdk/utils/slice_hasher.py +37 -0
- airbyte_cdk/utils/spec_schema_transformations.py +26 -0
- airbyte_cdk/utils/stream_status_utils.py +43 -0
- airbyte_cdk/utils/traced_exception.py +145 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
- airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
- airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
- airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
- airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Contains functions to compile custom code from text."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from collections.abc import Mapping
|
|
7
|
+
from types import ModuleType
|
|
8
|
+
from typing import Any, cast
|
|
9
|
+
|
|
10
|
+
from typing_extensions import Literal
|
|
11
|
+
|
|
12
|
+
ChecksumType = Literal["md5", "sha256"]
|
|
13
|
+
CHECKSUM_FUNCTIONS = {
|
|
14
|
+
"md5": hashlib.md5,
|
|
15
|
+
"sha256": hashlib.sha256,
|
|
16
|
+
}
|
|
17
|
+
COMPONENTS_MODULE_NAME = "components"
|
|
18
|
+
SDM_COMPONENTS_MODULE_NAME = "source_declarative_manifest.components"
|
|
19
|
+
INJECTED_MANIFEST = "__injected_declarative_manifest"
|
|
20
|
+
INJECTED_COMPONENTS_PY = "__injected_components_py"
|
|
21
|
+
INJECTED_COMPONENTS_PY_CHECKSUMS = "__injected_components_py_checksums"
|
|
22
|
+
ENV_VAR_ALLOW_CUSTOM_CODE = "AIRBYTE_ENABLE_UNSAFE_CODE"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AirbyteCodeTamperedError(Exception):
|
|
26
|
+
"""Raised when the connector's components module does not match its checksum.
|
|
27
|
+
|
|
28
|
+
This is a fatal error, as it can be a sign of code tampering.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AirbyteCustomCodeNotPermittedError(Exception):
|
|
33
|
+
"""Raised when custom code is attempted to be run in an environment that does not support it."""
|
|
34
|
+
|
|
35
|
+
def __init__(self) -> None:
|
|
36
|
+
super().__init__(
|
|
37
|
+
"Custom connector code is not permitted in this environment. "
|
|
38
|
+
"If you need to run custom code, please ask your administrator to set the `AIRBYTE_ENABLE_UNSAFE_CODE` "
|
|
39
|
+
"environment variable to 'true' in your Airbyte environment. "
|
|
40
|
+
"If you see this message in Airbyte Cloud, your workspace does not allow executing "
|
|
41
|
+
"custom connector code."
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _hash_text(input_text: str, hash_type: str = "md5") -> str:
|
|
46
|
+
"""Return the hash of the input text using the specified hash type."""
|
|
47
|
+
if not input_text:
|
|
48
|
+
raise ValueError("Hash input text cannot be empty.")
|
|
49
|
+
|
|
50
|
+
hash_object = CHECKSUM_FUNCTIONS[hash_type]()
|
|
51
|
+
hash_object.update(input_text.encode())
|
|
52
|
+
return hash_object.hexdigest()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def custom_code_execution_permitted() -> bool:
|
|
56
|
+
"""Return `True` if custom code execution is permitted, otherwise `False`.
|
|
57
|
+
|
|
58
|
+
Custom code execution is permitted if the `AIRBYTE_ENABLE_UNSAFE_CODE` environment variable is set to 'true'.
|
|
59
|
+
"""
|
|
60
|
+
return os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() == "true"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def validate_python_code(
|
|
64
|
+
code_text: str,
|
|
65
|
+
checksums: dict[str, str] | None,
|
|
66
|
+
) -> None:
|
|
67
|
+
"""Validate the provided Python code text against the provided checksums.
|
|
68
|
+
|
|
69
|
+
Currently we fail if no checksums are provided, although this may change in the future.
|
|
70
|
+
"""
|
|
71
|
+
if not code_text:
|
|
72
|
+
# No code provided, nothing to validate.
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
if not checksums:
|
|
76
|
+
raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
|
|
77
|
+
|
|
78
|
+
for checksum_type, checksum in checksums.items():
|
|
79
|
+
if checksum_type not in CHECKSUM_FUNCTIONS:
|
|
80
|
+
raise ValueError(
|
|
81
|
+
f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
calculated_checksum = _hash_text(code_text, checksum_type)
|
|
85
|
+
if calculated_checksum != checksum:
|
|
86
|
+
raise AirbyteCodeTamperedError(
|
|
87
|
+
f"{checksum_type} checksum does not match."
|
|
88
|
+
+ str(
|
|
89
|
+
{
|
|
90
|
+
"expected_checksum": checksum,
|
|
91
|
+
"actual_checksum": calculated_checksum,
|
|
92
|
+
"code_text": code_text,
|
|
93
|
+
}
|
|
94
|
+
),
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_registered_components_module(
|
|
99
|
+
config: Mapping[str, Any] | None,
|
|
100
|
+
) -> ModuleType | None:
|
|
101
|
+
"""Get a components module object based on the provided config.
|
|
102
|
+
|
|
103
|
+
If custom python components is provided, this will be loaded. Otherwise, we will
|
|
104
|
+
attempt to load from the `components` module already imported/registered in sys.modules.
|
|
105
|
+
|
|
106
|
+
If custom `components.py` text is provided in config, it will be registered with sys.modules
|
|
107
|
+
so that it can be later imported by manifest declarations which reference the provided classes.
|
|
108
|
+
|
|
109
|
+
Returns `None` if no components is provided and the `components` module is not found.
|
|
110
|
+
"""
|
|
111
|
+
if config and config.get(INJECTED_COMPONENTS_PY, None):
|
|
112
|
+
if not custom_code_execution_permitted():
|
|
113
|
+
raise AirbyteCustomCodeNotPermittedError
|
|
114
|
+
|
|
115
|
+
# Create a new module object and execute the provided Python code text within it
|
|
116
|
+
python_text: str = config[INJECTED_COMPONENTS_PY]
|
|
117
|
+
return register_components_module_from_string(
|
|
118
|
+
components_py_text=python_text,
|
|
119
|
+
checksums=config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None),
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Check for `components` or `source_declarative_manifest.components`.
|
|
123
|
+
if SDM_COMPONENTS_MODULE_NAME in sys.modules:
|
|
124
|
+
return cast(ModuleType, sys.modules.get(SDM_COMPONENTS_MODULE_NAME))
|
|
125
|
+
|
|
126
|
+
if COMPONENTS_MODULE_NAME in sys.modules:
|
|
127
|
+
return cast(ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME))
|
|
128
|
+
|
|
129
|
+
# Could not find module 'components' in `sys.modules`
|
|
130
|
+
# and INJECTED_COMPONENTS_PY was not provided in config.
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def register_components_module_from_string(
|
|
135
|
+
components_py_text: str,
|
|
136
|
+
checksums: dict[str, Any] | None,
|
|
137
|
+
) -> ModuleType:
|
|
138
|
+
"""Load and return the components module from a provided string containing the python code."""
|
|
139
|
+
# First validate the code
|
|
140
|
+
validate_python_code(
|
|
141
|
+
code_text=components_py_text,
|
|
142
|
+
checksums=checksums,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Create a new module object
|
|
146
|
+
components_module = ModuleType(name=COMPONENTS_MODULE_NAME)
|
|
147
|
+
|
|
148
|
+
# Execute the module text in the module's namespace
|
|
149
|
+
exec(components_py_text, components_module.__dict__)
|
|
150
|
+
|
|
151
|
+
# Register the module in `sys.modules`` so it can be imported as
|
|
152
|
+
# `source_declarative_manifest.components` and/or `components`.
|
|
153
|
+
sys.modules[SDM_COMPONENTS_MODULE_NAME] = components_module
|
|
154
|
+
sys.modules[COMPONENTS_MODULE_NAME] = components_module
|
|
155
|
+
|
|
156
|
+
# Now you can import and use the module
|
|
157
|
+
return components_module
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CircularReferenceException(Exception):
|
|
7
|
+
"""
|
|
8
|
+
Raised when a circular reference is detected in a manifest.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, reference: str) -> None:
|
|
12
|
+
super().__init__(f"Circular reference found: {reference}")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class UndefinedReferenceException(Exception):
|
|
16
|
+
"""
|
|
17
|
+
Raised when refering to an undefined reference.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, path: str, reference: str) -> None:
|
|
21
|
+
super().__init__(f"Undefined reference {reference} from {path}")
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import copy
|
|
6
|
+
import typing
|
|
7
|
+
from typing import Any, Mapping
|
|
8
|
+
|
|
9
|
+
PARAMETERS_STR = "$parameters"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
DEFAULT_MODEL_TYPES: Mapping[str, str] = {
|
|
13
|
+
# CompositeErrorHandler
|
|
14
|
+
"CompositeErrorHandler.error_handlers": "DefaultErrorHandler",
|
|
15
|
+
# CursorPagination
|
|
16
|
+
"CursorPagination.decoder": "JsonDecoder",
|
|
17
|
+
# DatetimeBasedCursor
|
|
18
|
+
"DatetimeBasedCursor.end_datetime": "MinMaxDatetime",
|
|
19
|
+
"DatetimeBasedCursor.end_time_option": "RequestOption",
|
|
20
|
+
"DatetimeBasedCursor.start_datetime": "MinMaxDatetime",
|
|
21
|
+
"DatetimeBasedCursor.start_time_option": "RequestOption",
|
|
22
|
+
# CustomIncrementalSync
|
|
23
|
+
"CustomIncrementalSync.end_datetime": "MinMaxDatetime",
|
|
24
|
+
"CustomIncrementalSync.end_time_option": "RequestOption",
|
|
25
|
+
"CustomIncrementalSync.start_datetime": "MinMaxDatetime",
|
|
26
|
+
"CustomIncrementalSync.start_time_option": "RequestOption",
|
|
27
|
+
# DeclarativeSource
|
|
28
|
+
"DeclarativeSource.check": "CheckStream",
|
|
29
|
+
"DeclarativeSource.spec": "Spec",
|
|
30
|
+
"DeclarativeSource.streams": "DeclarativeStream",
|
|
31
|
+
# DeclarativeStream
|
|
32
|
+
"DeclarativeStream.retriever": "SimpleRetriever",
|
|
33
|
+
"DeclarativeStream.schema_loader": "JsonFileSchemaLoader",
|
|
34
|
+
# DynamicDeclarativeStream
|
|
35
|
+
"DynamicDeclarativeStream.stream_template": "DeclarativeStream",
|
|
36
|
+
"DynamicDeclarativeStream.components_resolver": "ConfigComponentResolver",
|
|
37
|
+
# HttpComponentsResolver
|
|
38
|
+
"HttpComponentsResolver.retriever": "SimpleRetriever",
|
|
39
|
+
"HttpComponentsResolver.components_mapping": "ComponentMappingDefinition",
|
|
40
|
+
# ConfigComponentResolver
|
|
41
|
+
"ConfigComponentsResolver.stream_config": "StreamConfig",
|
|
42
|
+
"ConfigComponentsResolver.components_mapping": "ComponentMappingDefinition",
|
|
43
|
+
# DefaultErrorHandler
|
|
44
|
+
"DefaultErrorHandler.response_filters": "HttpResponseFilter",
|
|
45
|
+
# DefaultPaginator
|
|
46
|
+
"DefaultPaginator.decoder": "JsonDecoder",
|
|
47
|
+
"DefaultPaginator.page_size_option": "RequestOption",
|
|
48
|
+
# DpathExtractor
|
|
49
|
+
"DpathExtractor.decoder": "JsonDecoder",
|
|
50
|
+
# HttpRequester
|
|
51
|
+
"HttpRequester.error_handler": "DefaultErrorHandler",
|
|
52
|
+
# ListPartitionRouter
|
|
53
|
+
"ListPartitionRouter.request_option": "RequestOption",
|
|
54
|
+
# ParentStreamConfig
|
|
55
|
+
"ParentStreamConfig.request_option": "RequestOption",
|
|
56
|
+
"ParentStreamConfig.stream": "DeclarativeStream",
|
|
57
|
+
# RecordSelector
|
|
58
|
+
"RecordSelector.extractor": "DpathExtractor",
|
|
59
|
+
"RecordSelector.record_filter": "RecordFilter",
|
|
60
|
+
# SimpleRetriever
|
|
61
|
+
"SimpleRetriever.paginator": "NoPagination",
|
|
62
|
+
"SimpleRetriever.record_selector": "RecordSelector",
|
|
63
|
+
"SimpleRetriever.requester": "HttpRequester",
|
|
64
|
+
# SubstreamPartitionRouter
|
|
65
|
+
"SubstreamPartitionRouter.parent_stream_configs": "ParentStreamConfig",
|
|
66
|
+
# AddFields
|
|
67
|
+
"AddFields.fields": "AddedFieldDefinition",
|
|
68
|
+
# CustomPartitionRouter
|
|
69
|
+
"CustomPartitionRouter.parent_stream_configs": "ParentStreamConfig",
|
|
70
|
+
# DynamicSchemaLoader
|
|
71
|
+
"DynamicSchemaLoader.retriever": "SimpleRetriever",
|
|
72
|
+
# SchemaTypeIdentifier
|
|
73
|
+
"SchemaTypeIdentifier.types_map": "TypesMap",
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# We retain a separate registry for custom components to automatically insert the type if it is missing. This is intended to
|
|
77
|
+
# be a short term fix because once we have migrated, then type and class_name should be requirements for all custom components.
|
|
78
|
+
CUSTOM_COMPONENTS_MAPPING: Mapping[str, str] = {
|
|
79
|
+
"CompositeErrorHandler.backoff_strategies": "CustomBackoffStrategy",
|
|
80
|
+
"DeclarativeStream.retriever": "CustomRetriever",
|
|
81
|
+
"DeclarativeStream.transformations": "CustomTransformation",
|
|
82
|
+
"DefaultErrorHandler.backoff_strategies": "CustomBackoffStrategy",
|
|
83
|
+
"DefaultPaginator.pagination_strategy": "CustomPaginationStrategy",
|
|
84
|
+
"HttpRequester.authenticator": "CustomAuthenticator",
|
|
85
|
+
"HttpRequester.error_handler": "CustomErrorHandler",
|
|
86
|
+
"RecordSelector.extractor": "CustomRecordExtractor",
|
|
87
|
+
"SimpleRetriever.partition_router": "CustomPartitionRouter",
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ManifestComponentTransformer:
|
|
92
|
+
def propagate_types_and_parameters(
|
|
93
|
+
self,
|
|
94
|
+
parent_field_identifier: str,
|
|
95
|
+
declarative_component: Mapping[str, Any],
|
|
96
|
+
parent_parameters: Mapping[str, Any],
|
|
97
|
+
) -> Mapping[str, Any]:
|
|
98
|
+
"""
|
|
99
|
+
Recursively transforms the specified declarative component and subcomponents to propagate parameters and insert the
|
|
100
|
+
default component type if it was not already present. The resulting transformed components are a deep copy of the input
|
|
101
|
+
components, not an in-place transformation.
|
|
102
|
+
|
|
103
|
+
:param declarative_component: The current component that is having type and parameters added
|
|
104
|
+
:param parent_field_identifier: The name of the field of the current component coming from the parent component
|
|
105
|
+
:param parent_parameters: The parameters set on parent components defined before the current component
|
|
106
|
+
:return: A deep copy of the transformed component with types and parameters persisted to it
|
|
107
|
+
"""
|
|
108
|
+
propagated_component = dict(copy.deepcopy(declarative_component))
|
|
109
|
+
if "type" not in propagated_component:
|
|
110
|
+
# If the component has class_name we assume that this is a reference to a custom component. This is a slight change to
|
|
111
|
+
# existing behavior because we originally allowed for either class or type to be specified. After the pydantic migration,
|
|
112
|
+
# class_name will only be a valid field on custom components and this change reflects that. I checked, and we currently
|
|
113
|
+
# have no low-code connectors that use class_name except for custom components.
|
|
114
|
+
if "class_name" in propagated_component:
|
|
115
|
+
found_type = CUSTOM_COMPONENTS_MAPPING.get(parent_field_identifier)
|
|
116
|
+
else:
|
|
117
|
+
found_type = DEFAULT_MODEL_TYPES.get(parent_field_identifier)
|
|
118
|
+
if found_type:
|
|
119
|
+
propagated_component["type"] = found_type
|
|
120
|
+
|
|
121
|
+
# When there is no resolved type, we're not processing a component (likely a regular object) and don't need to propagate parameters
|
|
122
|
+
# When the type refers to a json schema, we're not processing a component as well. This check is currently imperfect as there could
|
|
123
|
+
# be json_schema are not objects but we believe this is not likely in our case because:
|
|
124
|
+
# * records are Mapping so objects hence SchemaLoader root should be an object
|
|
125
|
+
# * connection_specification is a Mapping
|
|
126
|
+
if "type" not in propagated_component or self._is_json_schema_object(propagated_component):
|
|
127
|
+
return propagated_component
|
|
128
|
+
|
|
129
|
+
# Combines parameters defined at the current level with parameters from parent components. Parameters at the current
|
|
130
|
+
# level take precedence
|
|
131
|
+
current_parameters = dict(copy.deepcopy(parent_parameters))
|
|
132
|
+
component_parameters = propagated_component.pop(PARAMETERS_STR, {})
|
|
133
|
+
current_parameters = {**current_parameters, **component_parameters}
|
|
134
|
+
|
|
135
|
+
# Parameters should be applied to the current component fields with the existing field taking precedence over parameters if
|
|
136
|
+
# both exist
|
|
137
|
+
for parameter_key, parameter_value in current_parameters.items():
|
|
138
|
+
propagated_component[parameter_key] = (
|
|
139
|
+
propagated_component.get(parameter_key) or parameter_value
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
for field_name, field_value in propagated_component.items():
|
|
143
|
+
if isinstance(field_value, dict):
|
|
144
|
+
# We exclude propagating a parameter that matches the current field name because that would result in an infinite cycle
|
|
145
|
+
excluded_parameter = current_parameters.pop(field_name, None)
|
|
146
|
+
parent_type_field_identifier = f"{propagated_component.get('type')}.{field_name}"
|
|
147
|
+
propagated_component[field_name] = self.propagate_types_and_parameters(
|
|
148
|
+
parent_type_field_identifier, field_value, current_parameters
|
|
149
|
+
)
|
|
150
|
+
if excluded_parameter:
|
|
151
|
+
current_parameters[field_name] = excluded_parameter
|
|
152
|
+
elif isinstance(field_value, typing.List):
|
|
153
|
+
# We exclude propagating a parameter that matches the current field name because that would result in an infinite cycle
|
|
154
|
+
excluded_parameter = current_parameters.pop(field_name, None)
|
|
155
|
+
for i, element in enumerate(field_value):
|
|
156
|
+
if isinstance(element, dict):
|
|
157
|
+
parent_type_field_identifier = (
|
|
158
|
+
f"{propagated_component.get('type')}.{field_name}"
|
|
159
|
+
)
|
|
160
|
+
field_value[i] = self.propagate_types_and_parameters(
|
|
161
|
+
parent_type_field_identifier, element, current_parameters
|
|
162
|
+
)
|
|
163
|
+
if excluded_parameter:
|
|
164
|
+
current_parameters[field_name] = excluded_parameter
|
|
165
|
+
|
|
166
|
+
if current_parameters:
|
|
167
|
+
propagated_component[PARAMETERS_STR] = current_parameters
|
|
168
|
+
return propagated_component
|
|
169
|
+
|
|
170
|
+
@staticmethod
|
|
171
|
+
def _is_json_schema_object(propagated_component: Mapping[str, Any]) -> bool:
|
|
172
|
+
return propagated_component.get("type") == "object"
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any, Mapping, Set, Tuple, Union
|
|
7
|
+
|
|
8
|
+
from airbyte_cdk.sources.declarative.parsers.custom_exceptions import (
|
|
9
|
+
CircularReferenceException,
|
|
10
|
+
UndefinedReferenceException,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
REF_TAG = "$ref"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ManifestReferenceResolver:
|
|
17
|
+
"""
|
|
18
|
+
An incoming manifest can contain references to values previously defined.
|
|
19
|
+
This parser will dereference these values to produce a complete ConnectionDefinition.
|
|
20
|
+
|
|
21
|
+
References can be defined using a #/<arg> string.
|
|
22
|
+
```
|
|
23
|
+
key: 1234
|
|
24
|
+
reference: "#/key"
|
|
25
|
+
```
|
|
26
|
+
will produce the following definition:
|
|
27
|
+
```
|
|
28
|
+
key: 1234
|
|
29
|
+
reference: 1234
|
|
30
|
+
```
|
|
31
|
+
This also works with objects:
|
|
32
|
+
```
|
|
33
|
+
key_value_pairs:
|
|
34
|
+
k1: v1
|
|
35
|
+
k2: v2
|
|
36
|
+
same_key_value_pairs: "#/key_value_pairs"
|
|
37
|
+
```
|
|
38
|
+
will produce the following definition:
|
|
39
|
+
```
|
|
40
|
+
key_value_pairs:
|
|
41
|
+
k1: v1
|
|
42
|
+
k2: v2
|
|
43
|
+
same_key_value_pairs:
|
|
44
|
+
k1: v1
|
|
45
|
+
k2: v2
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
The $ref keyword can be used to refer to an object and enhance it with addition key-value pairs
|
|
49
|
+
```
|
|
50
|
+
key_value_pairs:
|
|
51
|
+
k1: v1
|
|
52
|
+
k2: v2
|
|
53
|
+
same_key_value_pairs:
|
|
54
|
+
$ref: "#/key_value_pairs"
|
|
55
|
+
k3: v3
|
|
56
|
+
```
|
|
57
|
+
will produce the following definition:
|
|
58
|
+
```
|
|
59
|
+
key_value_pairs:
|
|
60
|
+
k1: v1
|
|
61
|
+
k2: v2
|
|
62
|
+
same_key_value_pairs:
|
|
63
|
+
k1: v1
|
|
64
|
+
k2: v2
|
|
65
|
+
k3: v3
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
References can also point to nested values.
|
|
69
|
+
Nested references are ambiguous because one could define a key containing with `.`
|
|
70
|
+
in this example, we want to refer to the limit key in the dict object:
|
|
71
|
+
```
|
|
72
|
+
dict:
|
|
73
|
+
limit: 50
|
|
74
|
+
limit_ref: "#/dict/limit"
|
|
75
|
+
```
|
|
76
|
+
will produce the following definition:
|
|
77
|
+
```
|
|
78
|
+
dict
|
|
79
|
+
limit: 50
|
|
80
|
+
limit-ref: 50
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
whereas here we want to access the `nested/path` value.
|
|
84
|
+
```
|
|
85
|
+
nested:
|
|
86
|
+
path: "first one"
|
|
87
|
+
nested/path: "uh oh"
|
|
88
|
+
value: "#/nested/path
|
|
89
|
+
```
|
|
90
|
+
will produce the following definition:
|
|
91
|
+
```
|
|
92
|
+
nested:
|
|
93
|
+
path: "first one"
|
|
94
|
+
nested/path: "uh oh"
|
|
95
|
+
value: "uh oh"
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
to resolve the ambiguity, we try looking for the reference key at the top level, and then traverse the structs downward
|
|
99
|
+
until we find a key with the given path, or until there is nothing to traverse.
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
def preprocess_manifest(self, manifest: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
103
|
+
"""
|
|
104
|
+
:param manifest: incoming manifest that could have references to previously defined components
|
|
105
|
+
:return:
|
|
106
|
+
"""
|
|
107
|
+
return self._evaluate_node(manifest, manifest, set()) # type: ignore[no-any-return]
|
|
108
|
+
|
|
109
|
+
def _evaluate_node(self, node: Any, manifest: Mapping[str, Any], visited: Set[Any]) -> Any:
|
|
110
|
+
if isinstance(node, dict):
|
|
111
|
+
evaluated_dict = {
|
|
112
|
+
k: self._evaluate_node(v, manifest, visited)
|
|
113
|
+
for k, v in node.items()
|
|
114
|
+
if not self._is_ref_key(k)
|
|
115
|
+
}
|
|
116
|
+
if REF_TAG in node:
|
|
117
|
+
# The node includes a $ref key, so we splat the referenced value(s) into the evaluated dict
|
|
118
|
+
evaluated_ref = self._evaluate_node(node[REF_TAG], manifest, visited)
|
|
119
|
+
if not isinstance(evaluated_ref, dict):
|
|
120
|
+
return evaluated_ref
|
|
121
|
+
else:
|
|
122
|
+
# The values defined on the component take precedence over the reference values
|
|
123
|
+
return evaluated_ref | evaluated_dict
|
|
124
|
+
else:
|
|
125
|
+
return evaluated_dict
|
|
126
|
+
elif isinstance(node, list):
|
|
127
|
+
return [self._evaluate_node(v, manifest, visited) for v in node]
|
|
128
|
+
elif self._is_ref(node):
|
|
129
|
+
if node in visited:
|
|
130
|
+
raise CircularReferenceException(node)
|
|
131
|
+
visited.add(node)
|
|
132
|
+
ret = self._evaluate_node(self._lookup_ref_value(node, manifest), manifest, visited)
|
|
133
|
+
visited.remove(node)
|
|
134
|
+
return ret
|
|
135
|
+
else:
|
|
136
|
+
return node
|
|
137
|
+
|
|
138
|
+
def _lookup_ref_value(self, ref: str, manifest: Mapping[str, Any]) -> Any:
|
|
139
|
+
ref_match = re.match(r"#/(.*)", ref)
|
|
140
|
+
if not ref_match:
|
|
141
|
+
raise ValueError(f"Invalid reference format {ref}")
|
|
142
|
+
try:
|
|
143
|
+
path = ref_match.groups()[0]
|
|
144
|
+
return self._read_ref_value(path, manifest)
|
|
145
|
+
except (AttributeError, KeyError, IndexError):
|
|
146
|
+
raise UndefinedReferenceException(path, ref)
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def _is_ref(node: Any) -> bool:
|
|
150
|
+
return isinstance(node, str) and node.startswith("#/")
|
|
151
|
+
|
|
152
|
+
@staticmethod
|
|
153
|
+
def _is_ref_key(key: str) -> bool:
|
|
154
|
+
return bool(key == REF_TAG)
|
|
155
|
+
|
|
156
|
+
@staticmethod
|
|
157
|
+
def _read_ref_value(ref: str, manifest_node: Mapping[str, Any]) -> Any:
|
|
158
|
+
"""
|
|
159
|
+
Read the value at the referenced location of the manifest.
|
|
160
|
+
|
|
161
|
+
References are ambiguous because one could define a key containing `/`
|
|
162
|
+
In this example, we want to refer to the `limit` key in the `dict` object:
|
|
163
|
+
dict:
|
|
164
|
+
limit: 50
|
|
165
|
+
limit_ref: "#/dict/limit"
|
|
166
|
+
|
|
167
|
+
Whereas here we want to access the `nested/path` value.
|
|
168
|
+
nested:
|
|
169
|
+
path: "first one"
|
|
170
|
+
nested/path: "uh oh"
|
|
171
|
+
value: "#/nested/path"
|
|
172
|
+
|
|
173
|
+
To resolve the ambiguity, we try looking for the reference key at the top level, and then traverse the structs downward
|
|
174
|
+
until we find a key with the given path, or until there is nothing to traverse.
|
|
175
|
+
|
|
176
|
+
Consider the path foo/bar/baz. To resolve the ambiguity, we first try 'foo/bar/baz' in its entirety as a top-level key. If this
|
|
177
|
+
fails, we try 'foo' as the top-level key, and if this succeeds, pass 'bar/baz' on as the key to be tried at the next level.
|
|
178
|
+
"""
|
|
179
|
+
while ref:
|
|
180
|
+
try:
|
|
181
|
+
return manifest_node[ref]
|
|
182
|
+
except (KeyError, TypeError):
|
|
183
|
+
head, ref = _parse_path(ref)
|
|
184
|
+
manifest_node = manifest_node[head] # type: ignore # Couldn't figure out how to fix this since manifest_node can get reassigned into other types like lists
|
|
185
|
+
return manifest_node
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _parse_path(ref: str) -> Tuple[Union[str, int], str]:
|
|
189
|
+
"""
|
|
190
|
+
Return the next path component, together with the rest of the path.
|
|
191
|
+
|
|
192
|
+
A path component may be a string key, or an int index.
|
|
193
|
+
|
|
194
|
+
>>> _parse_path("foo/bar")
|
|
195
|
+
"foo", "bar"
|
|
196
|
+
>>> _parse_path("foo/7/8/bar")
|
|
197
|
+
"foo", "7/8/bar"
|
|
198
|
+
>>> _parse_path("7/8/bar")
|
|
199
|
+
7, "8/bar"
|
|
200
|
+
>>> _parse_path("8/bar")
|
|
201
|
+
8, "bar"
|
|
202
|
+
>>> _parse_path("8foo/bar")
|
|
203
|
+
"8foo", "bar"
|
|
204
|
+
"""
|
|
205
|
+
match = re.match(r"([^/]*)/?(.*)", ref)
|
|
206
|
+
if match:
|
|
207
|
+
first, rest = match.groups()
|
|
208
|
+
try:
|
|
209
|
+
return int(first), rest
|
|
210
|
+
except ValueError:
|
|
211
|
+
return first, rest
|
|
212
|
+
else:
|
|
213
|
+
raise ValueError(f"Invalid path {ref} specified")
|