airbyte-cdk 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +358 -0
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +104 -0
- airbyte_cdk/connector.py +123 -0
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/__init__.py +3 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
- airbyte_cdk/connector_builder/main.py +107 -0
- airbyte_cdk/connector_builder/models.py +73 -0
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +83 -0
- airbyte_cdk/destinations/__init__.py +8 -0
- airbyte_cdk/destinations/destination.py +154 -0
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
- airbyte_cdk/destinations/vector_db_based/config.py +298 -0
- airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
- airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
- airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
- airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
- airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
- airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
- airbyte_cdk/entrypoint.py +414 -0
- airbyte_cdk/exception_handler.py +56 -0
- airbyte_cdk/logger.py +109 -0
- airbyte_cdk/models/__init__.py +72 -0
- airbyte_cdk/models/airbyte_protocol.py +88 -0
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/well_known_types.py +5 -0
- airbyte_cdk/py.typed +0 -0
- airbyte_cdk/sources/__init__.py +26 -0
- airbyte_cdk/sources/abstract_source.py +326 -0
- airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
- airbyte_cdk/sources/config.py +27 -0
- airbyte_cdk/sources/connector_state_manager.py +161 -0
- airbyte_cdk/sources/declarative/__init__.py +3 -0
- airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
- airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
- airbyte_cdk/sources/declarative/auth/token.py +267 -0
- airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
- airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
- airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
- airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
- airbyte_cdk/sources/declarative/declarative_source.py +36 -0
- airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
- airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
- airbyte_cdk/sources/declarative/exceptions.py +9 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
- airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
- airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
- airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
- airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
- airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
- airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
- airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
- airbyte_cdk/sources/declarative/spec/spec.py +48 -0
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
- airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
- airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
- airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
- airbyte_cdk/sources/declarative/types.py +25 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
- airbyte_cdk/sources/file_based/config/__init__.py +0 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
- airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
- airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
- airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
- airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
- airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
- airbyte_cdk/sources/file_based/exceptions.py +159 -0
- airbyte_cdk/sources/file_based/file_based_source.py +466 -0
- airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
- airbyte_cdk/sources/file_based/file_record_data.py +22 -0
- airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
- airbyte_cdk/sources/file_based/remote_file.py +18 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
- airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
- airbyte_cdk/sources/file_based/types.py +10 -0
- airbyte_cdk/sources/http_config.py +10 -0
- airbyte_cdk/sources/http_logger.py +55 -0
- airbyte_cdk/sources/message/__init__.py +19 -0
- airbyte_cdk/sources/message/repository.py +137 -0
- airbyte_cdk/sources/source.py +95 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/__init__.py +8 -0
- airbyte_cdk/sources/streams/availability_strategy.py +84 -0
- airbyte_cdk/sources/streams/call_rate.py +704 -0
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
- airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
- airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
- airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/core.py +703 -0
- airbyte_cdk/sources/streams/http/__init__.py +10 -0
- airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +61 -0
- airbyte_cdk/sources/streams/http/http.py +673 -0
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/sources/streams/utils/__init__.py +3 -0
- airbyte_cdk/sources/types.py +169 -0
- airbyte_cdk/sources/utils/__init__.py +7 -0
- airbyte_cdk/sources/utils/casing.py +12 -0
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +53 -0
- airbyte_cdk/sources/utils/schema_helpers.py +230 -0
- airbyte_cdk/sources/utils/slice_logger.py +57 -0
- airbyte_cdk/sources/utils/transform.py +277 -0
- airbyte_cdk/sources/utils/types.py +7 -0
- airbyte_cdk/sql/__init__.py +0 -0
- airbyte_cdk/sql/_util/__init__.py +0 -0
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/__init__.py +7 -0
- airbyte_cdk/test/catalog_builder.py +81 -0
- airbyte_cdk/test/entrypoint_wrapper.py +250 -0
- airbyte_cdk/test/mock_http/__init__.py +6 -0
- airbyte_cdk/test/mock_http/matcher.py +41 -0
- airbyte_cdk/test/mock_http/mocker.py +185 -0
- airbyte_cdk/test/mock_http/request.py +103 -0
- airbyte_cdk/test/mock_http/response.py +28 -0
- airbyte_cdk/test/mock_http/response_builder.py +237 -0
- airbyte_cdk/test/state_builder.py +33 -0
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +10 -0
- airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
- airbyte_cdk/utils/analytics_message.py +25 -0
- airbyte_cdk/utils/constants.py +5 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/event_timing.py +85 -0
- airbyte_cdk/utils/is_cloud_environment.py +18 -0
- airbyte_cdk/utils/mapping_helpers.py +162 -0
- airbyte_cdk/utils/message_utils.py +26 -0
- airbyte_cdk/utils/oneof_option_config.py +33 -0
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +270 -0
- airbyte_cdk/utils/slice_hasher.py +37 -0
- airbyte_cdk/utils/spec_schema_transformations.py +26 -0
- airbyte_cdk/utils/stream_status_utils.py +43 -0
- airbyte_cdk/utils/traced_exception.py +145 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
- airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
- airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
- airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
- airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
airbyte_cdk/logger.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import logging.config
|
|
8
|
+
from typing import Any, Callable, Mapping, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
import orjson
|
|
11
|
+
|
|
12
|
+
from airbyte_cdk.models import (
|
|
13
|
+
AirbyteLogMessage,
|
|
14
|
+
AirbyteMessage,
|
|
15
|
+
AirbyteMessageSerializer,
|
|
16
|
+
Level,
|
|
17
|
+
Type,
|
|
18
|
+
)
|
|
19
|
+
from airbyte_cdk.utils import PrintBuffer
|
|
20
|
+
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
|
21
|
+
|
|
22
|
+
PRINT_BUFFER = PrintBuffer(flush_interval=0.1)
|
|
23
|
+
|
|
24
|
+
LOGGING_CONFIG = {
|
|
25
|
+
"version": 1,
|
|
26
|
+
"disable_existing_loggers": False,
|
|
27
|
+
"formatters": {
|
|
28
|
+
"airbyte": {"()": "airbyte_cdk.logger.AirbyteLogFormatter", "format": "%(message)s"},
|
|
29
|
+
},
|
|
30
|
+
"handlers": {
|
|
31
|
+
"console": {
|
|
32
|
+
"class": "logging.StreamHandler",
|
|
33
|
+
"stream": PRINT_BUFFER,
|
|
34
|
+
"formatter": "airbyte",
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
"root": {
|
|
38
|
+
"handlers": ["console"],
|
|
39
|
+
},
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def init_logger(name: Optional[str] = None) -> logging.Logger:
|
|
44
|
+
"""Initial set up of logger"""
|
|
45
|
+
logger = logging.getLogger(name)
|
|
46
|
+
logger.setLevel(logging.INFO)
|
|
47
|
+
logging.config.dictConfig(LOGGING_CONFIG)
|
|
48
|
+
return logger
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def lazy_log(logger: logging.Logger, level: int, lazy_log_provider: Callable[[], str]) -> None:
|
|
52
|
+
"""
|
|
53
|
+
This method ensure that the processing of the log message is only done if the logger is enabled for the log level.
|
|
54
|
+
"""
|
|
55
|
+
if logger.isEnabledFor(level):
|
|
56
|
+
logger.log(level, lazy_log_provider())
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class AirbyteLogFormatter(logging.Formatter):
|
|
60
|
+
"""Output log records using AirbyteMessage"""
|
|
61
|
+
|
|
62
|
+
# Transforming Python log levels to Airbyte protocol log levels
|
|
63
|
+
level_mapping = {
|
|
64
|
+
logging.FATAL: Level.FATAL,
|
|
65
|
+
logging.ERROR: Level.ERROR,
|
|
66
|
+
logging.WARNING: Level.WARN,
|
|
67
|
+
logging.INFO: Level.INFO,
|
|
68
|
+
logging.DEBUG: Level.DEBUG,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
72
|
+
"""Return a JSON representation of the log message"""
|
|
73
|
+
airbyte_level = self.level_mapping.get(record.levelno, "INFO")
|
|
74
|
+
if airbyte_level == Level.DEBUG:
|
|
75
|
+
extras = self.extract_extra_args_from_record(record)
|
|
76
|
+
debug_dict = {"type": "DEBUG", "message": record.getMessage(), "data": extras}
|
|
77
|
+
return filter_secrets(json.dumps(debug_dict))
|
|
78
|
+
else:
|
|
79
|
+
message = super().format(record)
|
|
80
|
+
message = filter_secrets(message)
|
|
81
|
+
log_message = AirbyteMessage(
|
|
82
|
+
type=Type.LOG, log=AirbyteLogMessage(level=airbyte_level, message=message)
|
|
83
|
+
)
|
|
84
|
+
return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode()
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def extract_extra_args_from_record(record: logging.LogRecord) -> Mapping[str, Any]:
|
|
88
|
+
"""
|
|
89
|
+
The python logger conflates default args with extra args. We use an empty log record and set operations
|
|
90
|
+
to isolate fields passed to the log record via extra by the developer.
|
|
91
|
+
"""
|
|
92
|
+
default_attrs = logging.LogRecord("", 0, "", 0, None, None, None).__dict__.keys()
|
|
93
|
+
extra_keys = set(record.__dict__.keys()) - default_attrs
|
|
94
|
+
return {k: str(getattr(record, k)) for k in extra_keys if hasattr(record, k)}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def log_by_prefix(msg: str, default_level: str) -> Tuple[int, str]:
|
|
98
|
+
"""Custom method, which takes log level from first word of message"""
|
|
99
|
+
valid_log_types = ["FATAL", "ERROR", "WARN", "INFO", "DEBUG", "TRACE"]
|
|
100
|
+
split_line = msg.split()
|
|
101
|
+
first_word = next(iter(split_line), None)
|
|
102
|
+
if first_word in valid_log_types:
|
|
103
|
+
log_level = logging.getLevelName(first_word)
|
|
104
|
+
rendered_message = " ".join(split_line[1:])
|
|
105
|
+
else:
|
|
106
|
+
log_level = logging.getLevelName(default_level)
|
|
107
|
+
rendered_message = msg
|
|
108
|
+
|
|
109
|
+
return log_level, rendered_message
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# The earlier versions of airbyte-cdk (0.28.0<=) had the airbyte_protocol python classes
|
|
2
|
+
# declared inline in the airbyte-cdk code. However, somewhere around Feb 2023 the
|
|
3
|
+
# Airbyte Protocol moved to its own repo/PyPi package, called airbyte-protocol-models.
|
|
4
|
+
# This directory including the airbyte_protocol.py and well_known_types.py files
|
|
5
|
+
# are just wrappers on top of that stand-alone package which do some namespacing magic
|
|
6
|
+
# to make the airbyte_protocol python classes available to the airbyte-cdk consumer as part
|
|
7
|
+
# of airbyte-cdk rather than a standalone package.
|
|
8
|
+
from .airbyte_protocol import (
|
|
9
|
+
AdvancedAuth,
|
|
10
|
+
AirbyteAnalyticsTraceMessage,
|
|
11
|
+
AirbyteCatalog,
|
|
12
|
+
AirbyteConnectionStatus,
|
|
13
|
+
AirbyteControlConnectorConfigMessage,
|
|
14
|
+
AirbyteControlMessage,
|
|
15
|
+
AirbyteErrorTraceMessage,
|
|
16
|
+
AirbyteEstimateTraceMessage,
|
|
17
|
+
AirbyteGlobalState,
|
|
18
|
+
AirbyteLogMessage,
|
|
19
|
+
AirbyteMessage,
|
|
20
|
+
AirbyteProtocol,
|
|
21
|
+
AirbyteRecordMessage,
|
|
22
|
+
AirbyteRecordMessageFileReference,
|
|
23
|
+
AirbyteStateBlob,
|
|
24
|
+
AirbyteStateMessage,
|
|
25
|
+
AirbyteStateStats,
|
|
26
|
+
AirbyteStateType,
|
|
27
|
+
AirbyteStream,
|
|
28
|
+
AirbyteStreamState,
|
|
29
|
+
AirbyteStreamStatus,
|
|
30
|
+
AirbyteStreamStatusReason,
|
|
31
|
+
AirbyteStreamStatusReasonType,
|
|
32
|
+
AirbyteStreamStatusTraceMessage,
|
|
33
|
+
AirbyteTraceMessage,
|
|
34
|
+
AuthFlowType,
|
|
35
|
+
ConfiguredAirbyteCatalog,
|
|
36
|
+
ConfiguredAirbyteStream,
|
|
37
|
+
ConnectorSpecification,
|
|
38
|
+
DestinationSyncMode,
|
|
39
|
+
EstimateType,
|
|
40
|
+
FailureType,
|
|
41
|
+
Level,
|
|
42
|
+
OAuthConfigSpecification,
|
|
43
|
+
OauthConnectorInputSpecification,
|
|
44
|
+
OrchestratorType,
|
|
45
|
+
State,
|
|
46
|
+
Status,
|
|
47
|
+
StreamDescriptor,
|
|
48
|
+
SyncMode,
|
|
49
|
+
TraceType,
|
|
50
|
+
Type,
|
|
51
|
+
)
|
|
52
|
+
from .airbyte_protocol_serializers import (
|
|
53
|
+
AirbyteMessageSerializer,
|
|
54
|
+
AirbyteStateMessageSerializer,
|
|
55
|
+
AirbyteStreamStateSerializer,
|
|
56
|
+
ConfiguredAirbyteCatalogSerializer,
|
|
57
|
+
ConfiguredAirbyteStreamSerializer,
|
|
58
|
+
ConnectorSpecificationSerializer,
|
|
59
|
+
)
|
|
60
|
+
from .well_known_types import (
|
|
61
|
+
BinaryData,
|
|
62
|
+
Boolean,
|
|
63
|
+
Date,
|
|
64
|
+
Integer,
|
|
65
|
+
Model,
|
|
66
|
+
Number,
|
|
67
|
+
String,
|
|
68
|
+
TimestampWithoutTimezone,
|
|
69
|
+
TimestampWithTimezone,
|
|
70
|
+
TimeWithoutTimezone,
|
|
71
|
+
TimeWithTimezone,
|
|
72
|
+
)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from dataclasses import InitVar, dataclass
|
|
6
|
+
from typing import Annotated, Any, Dict, List, Mapping, Optional, Union
|
|
7
|
+
|
|
8
|
+
from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*'
|
|
9
|
+
from serpyco_rs.metadata import Alias
|
|
10
|
+
|
|
11
|
+
# ruff: noqa: F405 # ignore fuzzy import issues with 'import *'
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class AirbyteStateBlob:
|
|
16
|
+
"""
|
|
17
|
+
A dataclass that dynamically sets attributes based on provided keyword arguments and positional arguments.
|
|
18
|
+
Used to "mimic" pydantic Basemodel with ConfigDict(extra='allow') option.
|
|
19
|
+
|
|
20
|
+
The `AirbyteStateBlob` class allows for flexible instantiation by accepting any number of keyword arguments
|
|
21
|
+
and positional arguments. These are used to dynamically update the instance's attributes. This class is useful
|
|
22
|
+
in scenarios where the attributes of an object are not known until runtime and need to be set dynamically.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
kwargs (InitVar[Mapping[str, Any]]): A dictionary of keyword arguments used to set attributes dynamically.
|
|
26
|
+
|
|
27
|
+
Methods:
|
|
28
|
+
__init__(*args: Any, **kwargs: Any) -> None:
|
|
29
|
+
Initializes the `AirbyteStateBlob` by setting attributes from the provided arguments.
|
|
30
|
+
|
|
31
|
+
__eq__(other: object) -> bool:
|
|
32
|
+
Checks equality between two `AirbyteStateBlob` instances based on their internal dictionaries.
|
|
33
|
+
Returns `False` if the other object is not an instance of `AirbyteStateBlob`.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
kwargs: InitVar[Mapping[str, Any]]
|
|
37
|
+
|
|
38
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
39
|
+
# Set any attribute passed in through kwargs
|
|
40
|
+
for arg in args:
|
|
41
|
+
self.__dict__.update(arg)
|
|
42
|
+
for key, value in kwargs.items():
|
|
43
|
+
setattr(self, key, value)
|
|
44
|
+
|
|
45
|
+
def __eq__(self, other: object) -> bool:
|
|
46
|
+
return (
|
|
47
|
+
False
|
|
48
|
+
if not isinstance(other, AirbyteStateBlob)
|
|
49
|
+
else bool(self.__dict__ == other.__dict__)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# The following dataclasses have been redeclared to include the new version of AirbyteStateBlob
|
|
54
|
+
@dataclass
|
|
55
|
+
class AirbyteStreamState:
|
|
56
|
+
stream_descriptor: StreamDescriptor # type: ignore [name-defined]
|
|
57
|
+
stream_state: Optional[AirbyteStateBlob] = None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class AirbyteGlobalState:
|
|
62
|
+
stream_states: List[AirbyteStreamState]
|
|
63
|
+
shared_state: Optional[AirbyteStateBlob] = None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class AirbyteStateMessage:
|
|
68
|
+
type: Optional[AirbyteStateType] = None # type: ignore [name-defined]
|
|
69
|
+
stream: Optional[AirbyteStreamState] = None
|
|
70
|
+
global_: Annotated[AirbyteGlobalState | None, Alias("global")] = (
|
|
71
|
+
None # "global" is a reserved keyword in python ⇒ Alias is used for (de-)serialization
|
|
72
|
+
)
|
|
73
|
+
data: Optional[Dict[str, Any]] = None
|
|
74
|
+
sourceStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined]
|
|
75
|
+
destinationStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class AirbyteMessage:
|
|
80
|
+
type: Type # type: ignore [name-defined]
|
|
81
|
+
log: Optional[AirbyteLogMessage] = None # type: ignore [name-defined]
|
|
82
|
+
spec: Optional[ConnectorSpecification] = None # type: ignore [name-defined]
|
|
83
|
+
connectionStatus: Optional[AirbyteConnectionStatus] = None # type: ignore [name-defined]
|
|
84
|
+
catalog: Optional[AirbyteCatalog] = None # type: ignore [name-defined]
|
|
85
|
+
record: Optional[AirbyteRecordMessage] = None # type: ignore [name-defined]
|
|
86
|
+
state: Optional[AirbyteStateMessage] = None
|
|
87
|
+
trace: Optional[AirbyteTraceMessage] = None # type: ignore [name-defined]
|
|
88
|
+
control: Optional[AirbyteControlMessage] = None # type: ignore [name-defined]
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
|
|
4
|
+
from serpyco_rs import CustomType, Serializer
|
|
5
|
+
|
|
6
|
+
from .airbyte_protocol import ( # type: ignore[attr-defined] # all classes are imported to airbyte_protocol via *
|
|
7
|
+
AirbyteMessage,
|
|
8
|
+
AirbyteStateBlob,
|
|
9
|
+
AirbyteStateMessage,
|
|
10
|
+
AirbyteStreamState,
|
|
11
|
+
ConfiguredAirbyteCatalog,
|
|
12
|
+
ConfiguredAirbyteStream,
|
|
13
|
+
ConnectorSpecification,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AirbyteStateBlobType(CustomType[AirbyteStateBlob, Dict[str, Any]]):
|
|
18
|
+
def serialize(self, value: AirbyteStateBlob) -> Dict[str, Any]:
|
|
19
|
+
# cant use orjson.dumps() directly because private attributes are excluded, e.g. "__ab_full_refresh_sync_complete"
|
|
20
|
+
return {k: v for k, v in value.__dict__.items()}
|
|
21
|
+
|
|
22
|
+
def deserialize(self, value: Dict[str, Any]) -> AirbyteStateBlob:
|
|
23
|
+
return AirbyteStateBlob(value)
|
|
24
|
+
|
|
25
|
+
def get_json_schema(self) -> Dict[str, Any]:
|
|
26
|
+
return {"type": "object"}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def custom_type_resolver(t: type) -> CustomType[AirbyteStateBlob, Dict[str, Any]] | None:
|
|
30
|
+
return AirbyteStateBlobType() if t is AirbyteStateBlob else None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
AirbyteStreamStateSerializer = Serializer(
|
|
34
|
+
AirbyteStreamState, omit_none=True, custom_type_resolver=custom_type_resolver
|
|
35
|
+
)
|
|
36
|
+
AirbyteStateMessageSerializer = Serializer(
|
|
37
|
+
AirbyteStateMessage, omit_none=True, custom_type_resolver=custom_type_resolver
|
|
38
|
+
)
|
|
39
|
+
AirbyteMessageSerializer = Serializer(
|
|
40
|
+
AirbyteMessage, omit_none=True, custom_type_resolver=custom_type_resolver
|
|
41
|
+
)
|
|
42
|
+
ConfiguredAirbyteCatalogSerializer = Serializer(ConfiguredAirbyteCatalog, omit_none=True)
|
|
43
|
+
ConfiguredAirbyteStreamSerializer = Serializer(ConfiguredAirbyteStream, omit_none=True)
|
|
44
|
+
ConnectorSpecificationSerializer = Serializer(ConnectorSpecification, omit_none=True)
|
airbyte_cdk/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import dpath.options
|
|
6
|
+
|
|
7
|
+
from .abstract_source import AbstractSource
|
|
8
|
+
from .config import BaseConfig
|
|
9
|
+
from .source import Source
|
|
10
|
+
|
|
11
|
+
# As part of the CDK sources, we do not control what the APIs return and it is possible that a key is empty.
|
|
12
|
+
# Reasons why we are doing this at the airbyte_cdk level:
|
|
13
|
+
# * As of today, all the use cases should allow for empty keys
|
|
14
|
+
# * Cases as of 2023-08-31: oauth/session token provider responses, extractor, transformation and substream)
|
|
15
|
+
# * The behavior is explicit at the package level and not hidden in every package that needs dpath.options.ALLOW_EMPTY_STRING_KEYS = True
|
|
16
|
+
# There is a downside in enforcing this option preemptively in the module __init__.py: the runtime code will import dpath even though the it
|
|
17
|
+
# might not need dpath leading to longer initialization time.
|
|
18
|
+
# There is a downside in using dpath as a library since the options are global: if we have two pieces of code that want different options,
|
|
19
|
+
# this will not be thread-safe.
|
|
20
|
+
dpath.options.ALLOW_EMPTY_STRING_KEYS = True
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"AbstractSource",
|
|
24
|
+
"BaseConfig",
|
|
25
|
+
"Source",
|
|
26
|
+
]
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from typing import (
|
|
8
|
+
Any,
|
|
9
|
+
Dict,
|
|
10
|
+
Iterable,
|
|
11
|
+
Iterator,
|
|
12
|
+
List,
|
|
13
|
+
Mapping,
|
|
14
|
+
MutableMapping,
|
|
15
|
+
Optional,
|
|
16
|
+
Tuple,
|
|
17
|
+
Union,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from airbyte_cdk.exception_handler import generate_failed_streams_error_message
|
|
21
|
+
from airbyte_cdk.models import (
|
|
22
|
+
AirbyteCatalog,
|
|
23
|
+
AirbyteConnectionStatus,
|
|
24
|
+
AirbyteMessage,
|
|
25
|
+
AirbyteStateMessage,
|
|
26
|
+
AirbyteStreamStatus,
|
|
27
|
+
ConfiguredAirbyteCatalog,
|
|
28
|
+
ConfiguredAirbyteStream,
|
|
29
|
+
FailureType,
|
|
30
|
+
Status,
|
|
31
|
+
StreamDescriptor,
|
|
32
|
+
)
|
|
33
|
+
from airbyte_cdk.models import Type as MessageType
|
|
34
|
+
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
|
35
|
+
from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
|
|
36
|
+
from airbyte_cdk.sources.source import Source
|
|
37
|
+
from airbyte_cdk.sources.streams import Stream
|
|
38
|
+
from airbyte_cdk.sources.streams.core import StreamData
|
|
39
|
+
from airbyte_cdk.sources.streams.http.http import HttpStream
|
|
40
|
+
from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
|
|
41
|
+
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, split_config
|
|
42
|
+
from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
|
|
43
|
+
from airbyte_cdk.utils.event_timing import create_timer
|
|
44
|
+
from airbyte_cdk.utils.stream_status_utils import (
|
|
45
|
+
as_airbyte_message as stream_status_as_airbyte_message,
|
|
46
|
+
)
|
|
47
|
+
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
|
48
|
+
|
|
49
|
+
_default_message_repository = InMemoryMessageRepository()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class AbstractSource(Source, ABC):
|
|
53
|
+
"""
|
|
54
|
+
Abstract base class for an Airbyte Source. Consumers should implement any abstract methods
|
|
55
|
+
in this class to create an Airbyte Specification compliant Source.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def check_connection(
|
|
60
|
+
self, logger: logging.Logger, config: Mapping[str, Any]
|
|
61
|
+
) -> Tuple[bool, Optional[Any]]:
|
|
62
|
+
"""
|
|
63
|
+
:param logger: source logger
|
|
64
|
+
:param config: The user-provided configuration as specified by the source's spec.
|
|
65
|
+
This usually contains information required to check connection e.g. tokens, secrets and keys etc.
|
|
66
|
+
:return: A tuple of (boolean, error). If boolean is true, then the connection check is successful
|
|
67
|
+
and we can connect to the underlying data source using the provided configuration.
|
|
68
|
+
Otherwise, the input config cannot be used to connect to the underlying data source,
|
|
69
|
+
and the "error" object should describe what went wrong.
|
|
70
|
+
The error object will be cast to string to display the problem to the user.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
@abstractmethod
|
|
74
|
+
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
|
75
|
+
"""
|
|
76
|
+
:param config: The user-provided configuration as specified by the source's spec.
|
|
77
|
+
Any stream construction related operation should happen here.
|
|
78
|
+
:return: A list of the streams in this source connector.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
# Stream name to instance map for applying output object transformation
|
|
82
|
+
_stream_to_instance_map: Dict[str, Stream] = {}
|
|
83
|
+
_slice_logger: SliceLogger = DebugSliceLogger()
|
|
84
|
+
|
|
85
|
+
def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
|
|
86
|
+
"""Implements the Discover operation from the Airbyte Specification.
|
|
87
|
+
See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/#discover.
|
|
88
|
+
"""
|
|
89
|
+
streams = [stream.as_airbyte_stream() for stream in self.streams(config=config)]
|
|
90
|
+
return AirbyteCatalog(streams=streams)
|
|
91
|
+
|
|
92
|
+
def check(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus:
|
|
93
|
+
"""Implements the Check Connection operation from the Airbyte Specification.
|
|
94
|
+
See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/#check.
|
|
95
|
+
"""
|
|
96
|
+
check_succeeded, error = self.check_connection(logger, config)
|
|
97
|
+
if not check_succeeded:
|
|
98
|
+
return AirbyteConnectionStatus(status=Status.FAILED, message=repr(error))
|
|
99
|
+
return AirbyteConnectionStatus(status=Status.SUCCEEDED)
|
|
100
|
+
|
|
101
|
+
def read(
|
|
102
|
+
self,
|
|
103
|
+
logger: logging.Logger,
|
|
104
|
+
config: Mapping[str, Any],
|
|
105
|
+
catalog: ConfiguredAirbyteCatalog,
|
|
106
|
+
state: Optional[List[AirbyteStateMessage]] = None,
|
|
107
|
+
) -> Iterator[AirbyteMessage]:
|
|
108
|
+
"""Implements the Read operation from the Airbyte Specification. See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/."""
|
|
109
|
+
logger.info(f"Starting syncing {self.name}")
|
|
110
|
+
config, internal_config = split_config(config)
|
|
111
|
+
# TODO assert all streams exist in the connector
|
|
112
|
+
# get the streams once in case the connector needs to make any queries to generate them
|
|
113
|
+
stream_instances = {s.name: s for s in self.streams(config)}
|
|
114
|
+
state_manager = ConnectorStateManager(state=state)
|
|
115
|
+
self._stream_to_instance_map = stream_instances
|
|
116
|
+
|
|
117
|
+
stream_name_to_exception: MutableMapping[str, AirbyteTracedException] = {}
|
|
118
|
+
|
|
119
|
+
with create_timer(self.name) as timer:
|
|
120
|
+
for configured_stream in catalog.streams:
|
|
121
|
+
stream_instance = stream_instances.get(configured_stream.stream.name)
|
|
122
|
+
is_stream_exist = bool(stream_instance)
|
|
123
|
+
try:
|
|
124
|
+
# Used direct reference to `stream_instance` instead of `is_stream_exist` to avoid mypy type checking errors
|
|
125
|
+
if not stream_instance:
|
|
126
|
+
if not self.raise_exception_on_missing_stream:
|
|
127
|
+
yield stream_status_as_airbyte_message(
|
|
128
|
+
configured_stream.stream, AirbyteStreamStatus.INCOMPLETE
|
|
129
|
+
)
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
error_message = (
|
|
133
|
+
f"The stream '{configured_stream.stream.name}' in your connection configuration was not found in the source. "
|
|
134
|
+
f"Refresh the schema in your replication settings and remove this stream from future sync attempts."
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Use configured_stream as stream_instance to support references in error handling.
|
|
138
|
+
stream_instance = configured_stream.stream
|
|
139
|
+
|
|
140
|
+
raise AirbyteTracedException(
|
|
141
|
+
message="A stream listed in your configuration was not found in the source. Please check the logs for more "
|
|
142
|
+
"details.",
|
|
143
|
+
internal_message=error_message,
|
|
144
|
+
failure_type=FailureType.config_error,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
timer.start_event(f"Syncing stream {configured_stream.stream.name}")
|
|
148
|
+
logger.info(f"Marking stream {configured_stream.stream.name} as STARTED")
|
|
149
|
+
yield stream_status_as_airbyte_message(
|
|
150
|
+
configured_stream.stream, AirbyteStreamStatus.STARTED
|
|
151
|
+
)
|
|
152
|
+
yield from self._read_stream(
|
|
153
|
+
logger=logger,
|
|
154
|
+
stream_instance=stream_instance,
|
|
155
|
+
configured_stream=configured_stream,
|
|
156
|
+
state_manager=state_manager,
|
|
157
|
+
internal_config=internal_config,
|
|
158
|
+
)
|
|
159
|
+
logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
|
|
160
|
+
yield stream_status_as_airbyte_message(
|
|
161
|
+
configured_stream.stream, AirbyteStreamStatus.COMPLETE
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
except Exception as e:
|
|
165
|
+
yield from self._emit_queued_messages()
|
|
166
|
+
logger.exception(
|
|
167
|
+
f"Encountered an exception while reading stream {configured_stream.stream.name}"
|
|
168
|
+
)
|
|
169
|
+
logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
|
|
170
|
+
yield stream_status_as_airbyte_message(
|
|
171
|
+
configured_stream.stream, AirbyteStreamStatus.INCOMPLETE
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
stream_descriptor = StreamDescriptor(name=configured_stream.stream.name)
|
|
175
|
+
|
|
176
|
+
if isinstance(e, AirbyteTracedException):
|
|
177
|
+
traced_exception = e
|
|
178
|
+
info_message = f"Stopping sync on error from stream {configured_stream.stream.name} because {self.name} does not support continuing syncs on error."
|
|
179
|
+
else:
|
|
180
|
+
traced_exception = self._serialize_exception(
|
|
181
|
+
stream_descriptor, e, stream_instance=stream_instance
|
|
182
|
+
)
|
|
183
|
+
info_message = f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}"
|
|
184
|
+
|
|
185
|
+
yield traced_exception.as_sanitized_airbyte_message(
|
|
186
|
+
stream_descriptor=stream_descriptor
|
|
187
|
+
)
|
|
188
|
+
stream_name_to_exception[stream_instance.name] = traced_exception # type: ignore # use configured_stream if stream_instance is None
|
|
189
|
+
if self.stop_sync_on_stream_failure:
|
|
190
|
+
logger.info(info_message)
|
|
191
|
+
break
|
|
192
|
+
finally:
|
|
193
|
+
# Finish read event only if the stream instance exists;
|
|
194
|
+
# otherwise, there's no need as it never started
|
|
195
|
+
if is_stream_exist:
|
|
196
|
+
timer.finish_event()
|
|
197
|
+
logger.info(f"Finished syncing {configured_stream.stream.name}")
|
|
198
|
+
logger.info(timer.report())
|
|
199
|
+
|
|
200
|
+
if len(stream_name_to_exception) > 0:
|
|
201
|
+
error_message = generate_failed_streams_error_message(
|
|
202
|
+
{key: [value] for key, value in stream_name_to_exception.items()}
|
|
203
|
+
)
|
|
204
|
+
logger.info(error_message)
|
|
205
|
+
# We still raise at least one exception when a stream raises an exception because the platform currently relies
|
|
206
|
+
# on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
|
|
207
|
+
# type because this combined error isn't actionable, but rather the previously emitted individual errors.
|
|
208
|
+
raise AirbyteTracedException(
|
|
209
|
+
message=error_message, failure_type=FailureType.config_error
|
|
210
|
+
)
|
|
211
|
+
logger.info(f"Finished syncing {self.name}")
|
|
212
|
+
|
|
213
|
+
@staticmethod
|
|
214
|
+
def _serialize_exception(
|
|
215
|
+
stream_descriptor: StreamDescriptor, e: Exception, stream_instance: Optional[Stream] = None
|
|
216
|
+
) -> AirbyteTracedException:
|
|
217
|
+
display_message = stream_instance.get_error_display_message(e) if stream_instance else None
|
|
218
|
+
if display_message:
|
|
219
|
+
return AirbyteTracedException.from_exception(
|
|
220
|
+
e, message=display_message, stream_descriptor=stream_descriptor
|
|
221
|
+
)
|
|
222
|
+
return AirbyteTracedException.from_exception(e, stream_descriptor=stream_descriptor)
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def raise_exception_on_missing_stream(self) -> bool:
|
|
226
|
+
return False
|
|
227
|
+
|
|
228
|
+
def _read_stream(
|
|
229
|
+
self,
|
|
230
|
+
logger: logging.Logger,
|
|
231
|
+
stream_instance: Stream,
|
|
232
|
+
configured_stream: ConfiguredAirbyteStream,
|
|
233
|
+
state_manager: ConnectorStateManager,
|
|
234
|
+
internal_config: InternalConfig,
|
|
235
|
+
) -> Iterator[AirbyteMessage]:
|
|
236
|
+
if internal_config.page_size and isinstance(stream_instance, HttpStream):
|
|
237
|
+
logger.info(
|
|
238
|
+
f"Setting page size for {stream_instance.name} to {internal_config.page_size}"
|
|
239
|
+
)
|
|
240
|
+
stream_instance.page_size = internal_config.page_size
|
|
241
|
+
logger.debug(
|
|
242
|
+
f"Syncing configured stream: {configured_stream.stream.name}",
|
|
243
|
+
extra={
|
|
244
|
+
"sync_mode": configured_stream.sync_mode,
|
|
245
|
+
"primary_key": configured_stream.primary_key,
|
|
246
|
+
"cursor_field": configured_stream.cursor_field,
|
|
247
|
+
},
|
|
248
|
+
)
|
|
249
|
+
stream_instance.log_stream_sync_configuration()
|
|
250
|
+
|
|
251
|
+
stream_name = configured_stream.stream.name
|
|
252
|
+
stream_state = state_manager.get_stream_state(stream_name, stream_instance.namespace)
|
|
253
|
+
|
|
254
|
+
# This is a hack. Existing full refresh streams that are converted into resumable full refresh need to discard
|
|
255
|
+
# the state because the terminal state for a full refresh sync is not compatible with substream resumable full
|
|
256
|
+
# refresh state. This is only required when running live traffic regression testing since the platform normally
|
|
257
|
+
# handles whether to pass state
|
|
258
|
+
if stream_state == {"__ab_no_cursor_state_message": True}:
|
|
259
|
+
stream_state = {}
|
|
260
|
+
|
|
261
|
+
if "state" in dir(stream_instance):
|
|
262
|
+
stream_instance.state = stream_state # type: ignore # we check that state in the dir(stream_instance)
|
|
263
|
+
logger.info(f"Setting state of {self.name} stream to {stream_state}")
|
|
264
|
+
|
|
265
|
+
record_iterator = stream_instance.read(
|
|
266
|
+
configured_stream,
|
|
267
|
+
logger,
|
|
268
|
+
self._slice_logger,
|
|
269
|
+
stream_state,
|
|
270
|
+
state_manager,
|
|
271
|
+
internal_config,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
record_counter = 0
|
|
275
|
+
logger.info(f"Syncing stream: {stream_name} ")
|
|
276
|
+
for record_data_or_message in record_iterator:
|
|
277
|
+
record = self._get_message(record_data_or_message, stream_instance)
|
|
278
|
+
if record.type == MessageType.RECORD:
|
|
279
|
+
record_counter += 1
|
|
280
|
+
if record_counter == 1:
|
|
281
|
+
logger.info(f"Marking stream {stream_name} as RUNNING")
|
|
282
|
+
# If we just read the first record of the stream, emit the transition to the RUNNING state
|
|
283
|
+
yield stream_status_as_airbyte_message(
|
|
284
|
+
configured_stream.stream, AirbyteStreamStatus.RUNNING
|
|
285
|
+
)
|
|
286
|
+
yield from self._emit_queued_messages()
|
|
287
|
+
yield record
|
|
288
|
+
|
|
289
|
+
logger.info(f"Read {record_counter} records from {stream_name} stream")
|
|
290
|
+
|
|
291
|
+
def _emit_queued_messages(self) -> Iterable[AirbyteMessage]:
|
|
292
|
+
if self.message_repository:
|
|
293
|
+
yield from self.message_repository.consume_queue()
|
|
294
|
+
return
|
|
295
|
+
|
|
296
|
+
def _get_message(
|
|
297
|
+
self, record_data_or_message: Union[StreamData, AirbyteMessage], stream: Stream
|
|
298
|
+
) -> AirbyteMessage:
|
|
299
|
+
"""
|
|
300
|
+
Converts the input to an AirbyteMessage if it is a StreamData. Returns the input as is if it is already an AirbyteMessage
|
|
301
|
+
"""
|
|
302
|
+
match record_data_or_message:
|
|
303
|
+
case AirbyteMessage():
|
|
304
|
+
return record_data_or_message
|
|
305
|
+
case _:
|
|
306
|
+
return stream_data_to_airbyte_message(
|
|
307
|
+
stream.name,
|
|
308
|
+
record_data_or_message,
|
|
309
|
+
stream.transformer,
|
|
310
|
+
stream.get_json_schema(),
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
@property
|
|
314
|
+
def message_repository(self) -> Union[None, MessageRepository]:
|
|
315
|
+
return _default_message_repository
|
|
316
|
+
|
|
317
|
+
@property
|
|
318
|
+
def stop_sync_on_stream_failure(self) -> bool:
|
|
319
|
+
"""
|
|
320
|
+
WARNING: This function is in-development which means it is subject to change. Use at your own risk.
|
|
321
|
+
|
|
322
|
+
By default, when a source encounters an exception while syncing a stream, it will emit an error trace message and then
|
|
323
|
+
continue syncing the next stream. This can be overwritten on a per-source basis so that the source will stop the sync
|
|
324
|
+
on the first error seen and emit a single error trace message for that stream.
|
|
325
|
+
"""
|
|
326
|
+
return False
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
2
|
+
"""The concurrent source model replaces the legacy Source model.
|
|
3
|
+
|
|
4
|
+
The concurrent source model is a new way to build sources in the Airbyte CDK. It is designed to
|
|
5
|
+
be more ergonomic and performant than the legacy Source model.
|
|
6
|
+
|
|
7
|
+
To implement a source using the concurrent source model, check out the submodules in this package.
|
|
8
|
+
"""
|