airbyte-cdk 0.72.1__py3-none-any.whl → 6.17.1.dev0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/__init__.py +355 -6
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +29 -10
- airbyte_cdk/connector.py +24 -24
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
- airbyte_cdk/connector_builder/main.py +45 -13
- airbyte_cdk/connector_builder/message_grouper.py +189 -50
- airbyte_cdk/connector_builder/models.py +3 -2
- airbyte_cdk/destinations/__init__.py +4 -3
- airbyte_cdk/destinations/destination.py +54 -20
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/config.py +40 -17
- airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
- airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
- airbyte_cdk/entrypoint.py +153 -44
- airbyte_cdk/exception_handler.py +21 -3
- airbyte_cdk/logger.py +30 -44
- airbyte_cdk/models/__init__.py +13 -2
- airbyte_cdk/models/airbyte_protocol.py +86 -1
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/__init__.py +5 -1
- airbyte_cdk/sources/abstract_source.py +125 -79
- airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
- airbyte_cdk/sources/config.py +3 -2
- airbyte_cdk/sources/connector_state_manager.py +49 -83
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
- airbyte_cdk/sources/declarative/auth/token.py +28 -10
- airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
- airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
- airbyte_cdk/sources/declarative/declarative_source.py +5 -2
- airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
- airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
- airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
- airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +346 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +173 -74
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
- airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
- airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1759 -225
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
- airbyte_cdk/sources/declarative/spec/spec.py +12 -5
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
- airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
- airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
- airbyte_cdk/sources/declarative/types.py +19 -110
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
- airbyte_cdk/sources/embedded/base_integration.py +16 -5
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +5 -2
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
- airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
- airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
- airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
- airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
- airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +18 -15
- airbyte_cdk/sources/file_based/file_based_source.py +140 -33
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
- airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
- airbyte_cdk/sources/file_based/remote_file.py +1 -1
- airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
- airbyte_cdk/sources/http_logger.py +8 -3
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +42 -38
- airbyte_cdk/sources/streams/__init__.py +2 -2
- airbyte_cdk/sources/streams/availability_strategy.py +54 -3
- airbyte_cdk/sources/streams/call_rate.py +64 -21
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
- airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
- airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
- airbyte_cdk/sources/streams/core.py +412 -87
- airbyte_cdk/sources/streams/http/__init__.py +2 -1
- airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +27 -7
- airbyte_cdk/sources/streams/http/http.py +369 -246
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +154 -0
- airbyte_cdk/sources/utils/record_helper.py +36 -21
- airbyte_cdk/sources/utils/schema_helpers.py +13 -6
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +54 -20
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/catalog_builder.py +70 -18
- airbyte_cdk/test/entrypoint_wrapper.py +117 -42
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/matcher.py +6 -0
- airbyte_cdk/test/mock_http/mocker.py +57 -10
- airbyte_cdk/test/mock_http/request.py +19 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +32 -16
- airbyte_cdk/test/state_builder.py +18 -10
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +2 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +20 -11
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +198 -28
- airbyte_cdk/utils/slice_hasher.py +30 -0
- airbyte_cdk/utils/spec_schema_transformations.py +6 -3
- airbyte_cdk/utils/stream_status_utils.py +8 -1
- airbyte_cdk/utils/traced_exception.py +61 -21
- airbyte_cdk-6.17.1.dev0.dist-info/METADATA +109 -0
- airbyte_cdk-6.17.1.dev0.dist-info/RECORD +350 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/WHEEL +1 -2
- airbyte_cdk-6.17.1.dev0.dist-info/entry_points.txt +3 -0
- airbyte_cdk/sources/declarative/create_partial.py +0 -92
- airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
- airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
- airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
- airbyte_cdk/sources/deprecated/base_source.py +0 -94
- airbyte_cdk/sources/deprecated/client.py +0 -99
- airbyte_cdk/sources/singer/__init__.py +0 -8
- airbyte_cdk/sources/singer/singer_helpers.py +0 -304
- airbyte_cdk/sources/singer/source.py +0 -186
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
- airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
- airbyte_cdk/sources/streams/http/auth/core.py +0 -29
- airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
- airbyte_cdk/sources/streams/http/auth/token.py +0 -47
- airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
- airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
- airbyte_cdk/sources/utils/schema_models.py +0 -84
- airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
- airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
- airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
- source_declarative_manifest/main.py +0 -29
- unit_tests/connector_builder/__init__.py +0 -3
- unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
- unit_tests/connector_builder/test_message_grouper.py +0 -713
- unit_tests/connector_builder/utils.py +0 -27
- unit_tests/destinations/test_destination.py +0 -243
- unit_tests/singer/test_singer_helpers.py +0 -56
- unit_tests/singer/test_singer_source.py +0 -112
- unit_tests/sources/__init__.py +0 -0
- unit_tests/sources/concurrent_source/__init__.py +0 -3
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
- unit_tests/sources/declarative/__init__.py +0 -3
- unit_tests/sources/declarative/auth/__init__.py +0 -3
- unit_tests/sources/declarative/auth/test_oauth.py +0 -331
- unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
- unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
- unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
- unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
- unit_tests/sources/declarative/checks/__init__.py +0 -3
- unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
- unit_tests/sources/declarative/decoders/__init__.py +0 -0
- unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
- unit_tests/sources/declarative/external_component.py +0 -13
- unit_tests/sources/declarative/extractors/__init__.py +0 -3
- unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
- unit_tests/sources/declarative/incremental/__init__.py +0 -0
- unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
- unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
- unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
- unit_tests/sources/declarative/interpolation/__init__.py +0 -3
- unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
- unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
- unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
- unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
- unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
- unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
- unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
- unit_tests/sources/declarative/parsers/__init__.py +0 -3
- unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
- unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
- unit_tests/sources/declarative/parsers/testing_components.py +0 -36
- unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
- unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
- unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
- unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
- unit_tests/sources/declarative/requesters/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
- unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
- unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
- unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
- unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
- unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
- unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
- unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
- unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
- unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
- unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
- unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
- unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
- unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
- unit_tests/sources/declarative/retrievers/__init__.py +0 -3
- unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
- unit_tests/sources/declarative/schema/__init__.py +0 -6
- unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
- unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
- unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
- unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
- unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
- unit_tests/sources/declarative/states/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
- unit_tests/sources/declarative/test_create_partial.py +0 -83
- unit_tests/sources/declarative/test_declarative_stream.py +0 -103
- unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
- unit_tests/sources/declarative/test_types.py +0 -39
- unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
- unit_tests/sources/file_based/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
- unit_tests/sources/file_based/config/__init__.py +0 -0
- unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
- unit_tests/sources/file_based/config/test_csv_format.py +0 -34
- unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
- unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
- unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
- unit_tests/sources/file_based/file_types/__init__.py +0 -0
- unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
- unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
- unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
- unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
- unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
- unit_tests/sources/file_based/helpers.py +0 -70
- unit_tests/sources/file_based/in_memory_files_source.py +0 -211
- unit_tests/sources/file_based/scenarios/__init__.py +0 -0
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
- unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
- unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
- unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
- unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
- unit_tests/sources/file_based/stream/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
- unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
- unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
- unit_tests/sources/file_based/test_scenarios.py +0 -253
- unit_tests/sources/file_based/test_schema_helpers.py +0 -346
- unit_tests/sources/fixtures/__init__.py +0 -3
- unit_tests/sources/fixtures/source_test_fixture.py +0 -153
- unit_tests/sources/message/__init__.py +0 -0
- unit_tests/sources/message/test_repository.py +0 -153
- unit_tests/sources/streams/__init__.py +0 -0
- unit_tests/sources/streams/concurrent/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
- unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
- unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
- unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
- unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
- unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
- unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
- unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
- unit_tests/sources/streams/http/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/test_auth.py +0 -173
- unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
- unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
- unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
- unit_tests/sources/streams/http/test_http.py +0 -635
- unit_tests/sources/streams/test_availability_strategy.py +0 -70
- unit_tests/sources/streams/test_call_rate.py +0 -300
- unit_tests/sources/streams/test_stream_read.py +0 -405
- unit_tests/sources/streams/test_streams_core.py +0 -184
- unit_tests/sources/test_abstract_source.py +0 -1442
- unit_tests/sources/test_concurrent_source.py +0 -112
- unit_tests/sources/test_config.py +0 -92
- unit_tests/sources/test_connector_state_manager.py +0 -482
- unit_tests/sources/test_http_logger.py +0 -252
- unit_tests/sources/test_integration_source.py +0 -86
- unit_tests/sources/test_source.py +0 -684
- unit_tests/sources/test_source_read.py +0 -460
- unit_tests/test/__init__.py +0 -0
- unit_tests/test/mock_http/__init__.py +0 -0
- unit_tests/test/mock_http/test_matcher.py +0 -53
- unit_tests/test/mock_http/test_mocker.py +0 -214
- unit_tests/test/mock_http/test_request.py +0 -117
- unit_tests/test/mock_http/test_response_builder.py +0 -177
- unit_tests/test/test_entrypoint_wrapper.py +0 -240
- unit_tests/utils/__init__.py +0 -0
- unit_tests/utils/test_datetime_format_inferrer.py +0 -60
- unit_tests/utils/test_mapping_helpers.py +0 -54
- unit_tests/utils/test_message_utils.py +0 -91
- unit_tests/utils/test_rate_limiting.py +0 -26
- unit_tests/utils/test_schema_inferrer.py +0 -202
- unit_tests/utils/test_secret_utils.py +0 -135
- unit_tests/utils/test_stream_status_utils.py +0 -61
- unit_tests/utils/test_traced_exception.py +0 -107
- /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
- {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
- {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
- {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/LICENSE.txt +0 -0
@@ -1,304 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
|
6
|
-
import json
|
7
|
-
import os
|
8
|
-
import selectors
|
9
|
-
import subprocess
|
10
|
-
from dataclasses import dataclass
|
11
|
-
from datetime import datetime
|
12
|
-
from io import TextIOWrapper
|
13
|
-
from typing import Any, DefaultDict, Dict, Iterator, List, Mapping, Optional, Tuple
|
14
|
-
|
15
|
-
from airbyte_cdk.logger import log_by_prefix
|
16
|
-
from airbyte_cdk.models import (
|
17
|
-
AirbyteCatalog,
|
18
|
-
AirbyteMessage,
|
19
|
-
AirbyteRecordMessage,
|
20
|
-
AirbyteStateMessage,
|
21
|
-
AirbyteStream,
|
22
|
-
ConfiguredAirbyteCatalog,
|
23
|
-
ConfiguredAirbyteStream,
|
24
|
-
SyncMode,
|
25
|
-
Type,
|
26
|
-
)
|
27
|
-
|
28
|
-
_INCREMENTAL = "INCREMENTAL"
|
29
|
-
_FULL_TABLE = "FULL_TABLE"
|
30
|
-
|
31
|
-
|
32
|
-
def to_json(string):
|
33
|
-
try:
|
34
|
-
return json.loads(string)
|
35
|
-
except ValueError:
|
36
|
-
return False
|
37
|
-
|
38
|
-
|
39
|
-
def is_field_metadata(metadata):
|
40
|
-
if len(metadata.get("breadcrumb")) != 2:
|
41
|
-
return False
|
42
|
-
else:
|
43
|
-
return metadata.get("breadcrumb")[0] != "property"
|
44
|
-
|
45
|
-
|
46
|
-
def configured_for_incremental(configured_stream: ConfiguredAirbyteStream):
|
47
|
-
return configured_stream.sync_mode and configured_stream.sync_mode == SyncMode.incremental
|
48
|
-
|
49
|
-
|
50
|
-
def get_stream_level_metadata(metadatas: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
51
|
-
for metadata in metadatas:
|
52
|
-
if not is_field_metadata(metadata) and "metadata" in metadata:
|
53
|
-
return metadata.get("metadata")
|
54
|
-
return None
|
55
|
-
|
56
|
-
|
57
|
-
@dataclass
|
58
|
-
class Catalogs:
|
59
|
-
singer_catalog: object
|
60
|
-
airbyte_catalog: AirbyteCatalog
|
61
|
-
|
62
|
-
|
63
|
-
@dataclass
|
64
|
-
class SyncModeInfo:
|
65
|
-
supported_sync_modes: Optional[List[SyncMode]] = None
|
66
|
-
source_defined_cursor: Optional[bool] = None
|
67
|
-
default_cursor_field: Optional[List[str]] = None
|
68
|
-
|
69
|
-
|
70
|
-
def set_sync_modes_from_metadata(airbyte_stream: AirbyteStream, metadatas: List[Dict[str, Any]]):
|
71
|
-
stream_metadata = get_stream_level_metadata(metadatas)
|
72
|
-
if stream_metadata:
|
73
|
-
# A stream is incremental if it declares replication keys or if forced-replication-method is set to incremental
|
74
|
-
replication_keys = stream_metadata.get("valid-replication-keys", [])
|
75
|
-
if len(replication_keys) > 0:
|
76
|
-
airbyte_stream.source_defined_cursor = True
|
77
|
-
airbyte_stream.supported_sync_modes = [SyncMode.incremental]
|
78
|
-
# TODO if there are multiple replication keys, allow configuring which one is used. For now we deterministically take the first
|
79
|
-
airbyte_stream.default_cursor_field = [sorted(replication_keys)[0]]
|
80
|
-
elif "forced-replication-method" in stream_metadata:
|
81
|
-
forced_replication_method = stream_metadata["forced-replication-method"]
|
82
|
-
if isinstance(forced_replication_method, dict):
|
83
|
-
forced_replication_method = forced_replication_method.get("replication-method", "")
|
84
|
-
if forced_replication_method.upper() == _INCREMENTAL:
|
85
|
-
airbyte_stream.source_defined_cursor = True
|
86
|
-
airbyte_stream.supported_sync_modes = [SyncMode.incremental]
|
87
|
-
elif forced_replication_method.upper() == _FULL_TABLE:
|
88
|
-
airbyte_stream.source_defined_cursor = False
|
89
|
-
airbyte_stream.supported_sync_modes = [SyncMode.full_refresh]
|
90
|
-
|
91
|
-
|
92
|
-
def override_sync_modes(airbyte_stream: AirbyteStream, overrides: SyncModeInfo):
|
93
|
-
airbyte_stream.source_defined_cursor = overrides.source_defined_cursor or False
|
94
|
-
if overrides.supported_sync_modes:
|
95
|
-
airbyte_stream.supported_sync_modes = overrides.supported_sync_modes
|
96
|
-
if overrides.default_cursor_field is not None:
|
97
|
-
airbyte_stream.default_cursor_field = overrides.default_cursor_field
|
98
|
-
|
99
|
-
|
100
|
-
class SingerHelper:
|
101
|
-
@staticmethod
|
102
|
-
def _transform_types(stream_properties: DefaultDict):
|
103
|
-
for field_name in stream_properties:
|
104
|
-
field_object = stream_properties[field_name]
|
105
|
-
# according to issue CDK: typing errors #9500, mypy raises error on this line
|
106
|
-
# '"Type[SingerHelper]" has no attribute "_parse_type"', it's need to fix
|
107
|
-
# ignored for now
|
108
|
-
field_object["type"] = SingerHelper._parse_type(field_object["type"]) # type: ignore
|
109
|
-
|
110
|
-
@staticmethod
|
111
|
-
def singer_catalog_to_airbyte_catalog(
|
112
|
-
singer_catalog: Dict[str, Any], sync_mode_overrides: Dict[str, SyncModeInfo], primary_key_overrides: Dict[str, List[str]]
|
113
|
-
) -> AirbyteCatalog:
|
114
|
-
"""
|
115
|
-
:param singer_catalog:
|
116
|
-
:param sync_mode_overrides: A dict from stream name to the sync modes it should use. Each stream in this dict must exist in the Singer catalog,
|
117
|
-
but not every stream in the catalog should exist in this
|
118
|
-
:param primary_key_overrides: A dict of stream name -> list of fields to be used as PKs.
|
119
|
-
:return: Airbyte Catalog
|
120
|
-
"""
|
121
|
-
airbyte_streams = []
|
122
|
-
# according to issue CDK: typing errors #9500, mypy raises error on this line
|
123
|
-
# 'Item "None" of "Optional[Any]" has no attribute "__iter__" (not iterable)'
|
124
|
-
# It occurs because default value isn't set, and it's None
|
125
|
-
# It's needed to set default value, ignored for now
|
126
|
-
for stream in singer_catalog.get("streams"): # type: ignore
|
127
|
-
name = stream.get("stream")
|
128
|
-
schema = stream.get("schema")
|
129
|
-
airbyte_stream = AirbyteStream(name=name, json_schema=schema, supported_sync_modes=[SyncMode.full_refresh])
|
130
|
-
if name in sync_mode_overrides:
|
131
|
-
override_sync_modes(airbyte_stream, sync_mode_overrides[name])
|
132
|
-
else:
|
133
|
-
set_sync_modes_from_metadata(airbyte_stream, stream.get("metadata", []))
|
134
|
-
|
135
|
-
if name in primary_key_overrides:
|
136
|
-
airbyte_stream.source_defined_primary_key = [[k] for k in primary_key_overrides[name]]
|
137
|
-
elif stream.get("key_properties"):
|
138
|
-
airbyte_stream.source_defined_primary_key = [[k] for k in stream["key_properties"]]
|
139
|
-
|
140
|
-
airbyte_streams += [airbyte_stream]
|
141
|
-
return AirbyteCatalog(streams=airbyte_streams)
|
142
|
-
|
143
|
-
@staticmethod
|
144
|
-
def _read_singer_catalog(logger, shell_command: str) -> Mapping[str, Any]:
|
145
|
-
completed_process = subprocess.run(
|
146
|
-
shell_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
|
147
|
-
)
|
148
|
-
for line in completed_process.stderr.splitlines():
|
149
|
-
logger.log(*log_by_prefix(line, "ERROR"))
|
150
|
-
|
151
|
-
return json.loads(completed_process.stdout)
|
152
|
-
|
153
|
-
@staticmethod
|
154
|
-
def get_catalogs(
|
155
|
-
logger,
|
156
|
-
shell_command: str,
|
157
|
-
sync_mode_overrides: Dict[str, SyncModeInfo],
|
158
|
-
primary_key_overrides: Dict[str, List[str]],
|
159
|
-
excluded_streams: List,
|
160
|
-
) -> Catalogs:
|
161
|
-
singer_catalog = SingerHelper._read_singer_catalog(logger, shell_command)
|
162
|
-
streams = singer_catalog.get("streams", [])
|
163
|
-
if streams and excluded_streams:
|
164
|
-
# according to issue CDK: typing errors #9500, mypy raises error on this line
|
165
|
-
# 'Unsupported target for indexed assignment ("Mapping[str, Any]")'
|
166
|
-
# _read_singer_catalog returns Mapping, to fix this error it should be changed to MutableMapping
|
167
|
-
# ignored for now
|
168
|
-
singer_catalog["streams"] = [stream for stream in streams if stream["stream"] not in excluded_streams] # type: ignore
|
169
|
-
|
170
|
-
# according to issue CDK: typing errors #9500, mypy raises error on this line
|
171
|
-
# 'Argument 1 to "singer_catalog_to_airbyte_catalog" of "SingerHelper" has incompatible type "Mapping[str, Any]"; expected "Dict[str, Any]"'
|
172
|
-
# singer_catalog is Mapping, because _read_singer_catalog returns Mapping, but singer_catalog_to_airbyte_catalog expects Dict
|
173
|
-
# it's needed to check and fix, ignored for now
|
174
|
-
airbyte_catalog = SingerHelper.singer_catalog_to_airbyte_catalog(singer_catalog, sync_mode_overrides, primary_key_overrides) # type: ignore
|
175
|
-
return Catalogs(singer_catalog=singer_catalog, airbyte_catalog=airbyte_catalog)
|
176
|
-
|
177
|
-
@staticmethod
|
178
|
-
def read(logger, shell_command, is_message=(lambda x: True)) -> Iterator[AirbyteMessage]:
|
179
|
-
with subprocess.Popen(shell_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) as p:
|
180
|
-
for line, text_wrapper in SingerHelper._read_lines(p):
|
181
|
-
if text_wrapper is p.stdout:
|
182
|
-
out_json = to_json(line)
|
183
|
-
if out_json is not None and is_message(out_json):
|
184
|
-
message_data = SingerHelper._airbyte_message_from_json(out_json)
|
185
|
-
if message_data is not None:
|
186
|
-
yield message_data
|
187
|
-
else:
|
188
|
-
logger.log(*log_by_prefix(line, "INFO"))
|
189
|
-
else:
|
190
|
-
logger.log(*log_by_prefix(line, "ERROR"))
|
191
|
-
|
192
|
-
@staticmethod
|
193
|
-
def _read_lines(process: subprocess.Popen) -> Iterator[Tuple[str, TextIOWrapper]]:
|
194
|
-
sel = selectors.DefaultSelector()
|
195
|
-
# according to issue CDK: typing errors #9500, mypy raises error on this two lines
|
196
|
-
# 'Argument 1 to "register" of "DefaultSelector" has incompatible type "Optional[IO[Any]]"; expected "Union[int, HasFileno]"'
|
197
|
-
# 'Argument 1 to "register" of "DefaultSelector" has incompatible type "Optional[IO[Any]]"; expected "Union[int, HasFileno]"'
|
198
|
-
# It's need to check, ignored for now
|
199
|
-
sel.register(process.stdout, selectors.EVENT_READ) # type: ignore
|
200
|
-
sel.register(process.stderr, selectors.EVENT_READ) # type: ignore
|
201
|
-
eof = False
|
202
|
-
while not eof:
|
203
|
-
selects_list = sel.select()
|
204
|
-
empty_line_counter = 0
|
205
|
-
for key, _ in selects_list:
|
206
|
-
# according to issue CDK: typing errors #9500, mypy raises two errors on these lines
|
207
|
-
# 'Item "int" of "Union[int, HasFileno]" has no attribute "readline"'
|
208
|
-
# 'Item "HasFileno" of "Union[int, HasFileno]" has no attribute "readline"'
|
209
|
-
# It's need to check, ignored for now
|
210
|
-
line = key.fileobj.readline() # type: ignore
|
211
|
-
if not line:
|
212
|
-
empty_line_counter += 1
|
213
|
-
if empty_line_counter >= len(selects_list):
|
214
|
-
eof = True
|
215
|
-
|
216
|
-
try:
|
217
|
-
process.wait(timeout=60)
|
218
|
-
except subprocess.TimeoutExpired:
|
219
|
-
# according to issue CDK: typing errors #9500, mypy raises error on this line
|
220
|
-
# 'On Python 3 '{}'.format(b'abc') produces "b'abc'", not 'abc'; use '{!r}'.format(b'abc') if this is desired behavior'
|
221
|
-
# It's need to fix, ignored for now
|
222
|
-
raise Exception(f"Underlying command {process.args} is hanging") # type: ignore
|
223
|
-
|
224
|
-
if process.returncode != 0:
|
225
|
-
# according to issue CDK: typing errors #9500, mypy raises error on this line
|
226
|
-
# 'On Python 3 '{}'.format(b'abc') produces "b'abc'", not 'abc'; use '{!r}'.format(b'abc') if this is desired behavior'
|
227
|
-
# It's need to fix, ignored for now
|
228
|
-
raise Exception(f"Underlying command {process.args} failed with exit code {process.returncode}") # type: ignore
|
229
|
-
else:
|
230
|
-
# according to issue CDK: typing errors #9500, mypy raises error on this line
|
231
|
-
# 'Incompatible types in "yield" (actual type "Tuple[Any, Union[int, HasFileno]]", expected type "Tuple[str, TextIOWrapper]")'
|
232
|
-
# It's need to fix, ignored for now
|
233
|
-
yield line, key.fileobj # type: ignore
|
234
|
-
|
235
|
-
@staticmethod
|
236
|
-
def _airbyte_message_from_json(transformed_json: Mapping[str, Any]) -> Optional[AirbyteMessage]:
|
237
|
-
if transformed_json is None or transformed_json.get("type") == "SCHEMA" or transformed_json.get("type") == "ACTIVATE_VERSION":
|
238
|
-
return None
|
239
|
-
elif transformed_json.get("type") == "STATE":
|
240
|
-
out_record = AirbyteStateMessage(data=transformed_json["value"])
|
241
|
-
out_message = AirbyteMessage(type=Type.STATE, state=out_record)
|
242
|
-
else:
|
243
|
-
# todo: check that messages match the discovered schema
|
244
|
-
stream_name = transformed_json["stream"]
|
245
|
-
# according to issue CDK: typing errors #9500, mypy raises error on this line
|
246
|
-
# 'Incompatible types in assignment (expression has type "AirbyteRecordMessage", variable has type "AirbyteStateMessage")'
|
247
|
-
# type of out_record is first initialized as AirbyteStateMessage on the line 240
|
248
|
-
# however AirbyteRecordMessage is assigned on the line below, it causes error
|
249
|
-
# ignored
|
250
|
-
out_record = AirbyteRecordMessage( # type: ignore
|
251
|
-
stream=stream_name,
|
252
|
-
data=transformed_json["record"],
|
253
|
-
emitted_at=int(datetime.now().timestamp()) * 1000,
|
254
|
-
)
|
255
|
-
out_message = AirbyteMessage(type=Type.RECORD, record=out_record)
|
256
|
-
return out_message
|
257
|
-
|
258
|
-
@staticmethod
|
259
|
-
def create_singer_catalog_with_selection(masked_airbyte_catalog: ConfiguredAirbyteCatalog, discovered_singer_catalog: object) -> str:
|
260
|
-
combined_catalog_path = os.path.join("singer_rendered_catalog.json")
|
261
|
-
masked_singer_streams = []
|
262
|
-
|
263
|
-
stream_name_to_configured_stream = {
|
264
|
-
configured_stream.stream.name: configured_stream for configured_stream in masked_airbyte_catalog.streams
|
265
|
-
}
|
266
|
-
|
267
|
-
# according to issue CDK: typing errors #9500, mypy raises error on this line
|
268
|
-
# '"object" has no attribute "get"'
|
269
|
-
# discovered_singer_catalog type is set to object on the line 259, need to check
|
270
|
-
# ignored for now
|
271
|
-
for singer_stream in discovered_singer_catalog.get("streams"): # type: ignore
|
272
|
-
stream_name = singer_stream.get("stream")
|
273
|
-
if stream_name in stream_name_to_configured_stream:
|
274
|
-
new_metadatas = []
|
275
|
-
# support old style catalog.
|
276
|
-
singer_stream["schema"]["selected"] = True
|
277
|
-
if singer_stream.get("metadata"):
|
278
|
-
metadatas = singer_stream.get("metadata")
|
279
|
-
for metadata in metadatas:
|
280
|
-
new_metadata = metadata
|
281
|
-
new_metadata["metadata"]["selected"] = True
|
282
|
-
if not is_field_metadata(new_metadata):
|
283
|
-
configured_stream = stream_name_to_configured_stream[stream_name]
|
284
|
-
if configured_for_incremental(configured_stream):
|
285
|
-
replication_method = _INCREMENTAL
|
286
|
-
if configured_stream.cursor_field:
|
287
|
-
new_metadata["metadata"]["replication-key"] = configured_stream.cursor_field[0]
|
288
|
-
else:
|
289
|
-
replication_method = _FULL_TABLE
|
290
|
-
new_metadata["metadata"]["forced-replication-method"] = replication_method
|
291
|
-
new_metadata["metadata"]["replication-method"] = replication_method
|
292
|
-
else:
|
293
|
-
if "fieldExclusions" in new_metadata["metadata"]:
|
294
|
-
new_metadata["metadata"]["selected"] = True if not new_metadata["metadata"]["fieldExclusions"] else False
|
295
|
-
new_metadatas += [new_metadata]
|
296
|
-
singer_stream["metadata"] = new_metadatas
|
297
|
-
|
298
|
-
masked_singer_streams += [singer_stream]
|
299
|
-
|
300
|
-
combined_catalog = {"streams": masked_singer_streams}
|
301
|
-
with open(combined_catalog_path, "w") as fh:
|
302
|
-
fh.write(json.dumps(combined_catalog))
|
303
|
-
|
304
|
-
return combined_catalog_path
|
@@ -1,186 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
|
6
|
-
import logging
|
7
|
-
import os
|
8
|
-
from typing import Any, Dict, Iterable, List, Mapping, Type
|
9
|
-
|
10
|
-
from airbyte_cdk.models import AirbyteCatalog, AirbyteConnectionStatus, AirbyteMessage, ConfiguredAirbyteCatalog, Status
|
11
|
-
from airbyte_cdk.sources.source import BaseSource
|
12
|
-
from airbyte_cdk.sources.utils.catalog_helpers import CatalogHelper
|
13
|
-
|
14
|
-
from .singer_helpers import Catalogs, SingerHelper, SyncModeInfo
|
15
|
-
|
16
|
-
|
17
|
-
class ConfigContainer(Dict[str, Any]):
|
18
|
-
config_path: str
|
19
|
-
|
20
|
-
def __init__(self, config, config_path):
|
21
|
-
super().__init__(config)
|
22
|
-
self.config_path = config_path
|
23
|
-
|
24
|
-
|
25
|
-
class SingerSource(BaseSource[ConfigContainer, str, str]):
|
26
|
-
def configure(self, config: Mapping[str, Any], temp_dir: str) -> ConfigContainer:
|
27
|
-
"""
|
28
|
-
Persist raw_config in temporary directory to run the Source job
|
29
|
-
This can be overridden if extra temporary files need to be persisted in the temp dir
|
30
|
-
"""
|
31
|
-
config_path = os.path.join(temp_dir, "config.json")
|
32
|
-
config = ConfigContainer(self.transform_config(config), config_path)
|
33
|
-
self.write_config(config, config_path)
|
34
|
-
return config
|
35
|
-
|
36
|
-
# Can be overridden to change an input config
|
37
|
-
def transform_config(self, config: Mapping[str, Any]) -> Mapping[str, Any]:
|
38
|
-
"""
|
39
|
-
Singer source may need to adapt the Config object for the singer tap specifics
|
40
|
-
"""
|
41
|
-
return config
|
42
|
-
|
43
|
-
def read_catalog(self, catalog_path: str) -> str:
|
44
|
-
"""
|
45
|
-
Since singer source don't need actual catalog object, we override this to return path only
|
46
|
-
"""
|
47
|
-
return catalog_path
|
48
|
-
|
49
|
-
def read_state(self, state_path: str) -> str:
|
50
|
-
"""
|
51
|
-
Since singer source don't need actual state object, we override this to return path only
|
52
|
-
"""
|
53
|
-
return state_path
|
54
|
-
|
55
|
-
def check_config(self, logger: logging.Logger, config_path: str, config: ConfigContainer) -> AirbyteConnectionStatus:
|
56
|
-
"""
|
57
|
-
Some Singer source may perform check using config_path or config to
|
58
|
-
tests if the input configuration can be used to successfully connect to the integration
|
59
|
-
"""
|
60
|
-
raise NotImplementedError
|
61
|
-
|
62
|
-
def discover_cmd(self, logger: logging.Logger, config_path: str) -> str:
|
63
|
-
"""
|
64
|
-
Returns the command used to run discovery in the singer tap. For example, if the bash command used to invoke the singer tap is `tap-postgres`,
|
65
|
-
and the config JSON lived in "/path/config.json", this method would return "tap-postgres --config /path/config.json"
|
66
|
-
"""
|
67
|
-
raise NotImplementedError
|
68
|
-
|
69
|
-
def read_cmd(self, logger: logging.Logger, config_path: str, catalog_path: str, state_path: str = None) -> str:
|
70
|
-
"""
|
71
|
-
Returns the command used to read data from the singer tap. For example, if the bash command used to invoke the singer tap is `tap-postgres`,
|
72
|
-
and the config JSON lived in "/path/config.json", and the catalog was in "/path/catalog.json",
|
73
|
-
this method would return "tap-postgres --config /path/config.json --catalog /path/catalog.json"
|
74
|
-
"""
|
75
|
-
raise NotImplementedError
|
76
|
-
|
77
|
-
def _discover_internal(self, logger: logging.Logger, config_path: str) -> Catalogs:
|
78
|
-
cmd = self.discover_cmd(logger, config_path)
|
79
|
-
catalogs = SingerHelper.get_catalogs(
|
80
|
-
logger, cmd, self.get_sync_mode_overrides(), self.get_primary_key_overrides(), self.get_excluded_streams()
|
81
|
-
)
|
82
|
-
return catalogs
|
83
|
-
|
84
|
-
def check(self, logger: logging.Logger, config: ConfigContainer) -> AirbyteConnectionStatus:
|
85
|
-
"""
|
86
|
-
Tests if the input configuration can be used to successfully connect to the integration
|
87
|
-
"""
|
88
|
-
return self.check_config(logger, config.config_path, config)
|
89
|
-
|
90
|
-
def discover(self, logger: logging.Logger, config: ConfigContainer) -> AirbyteCatalog:
|
91
|
-
"""
|
92
|
-
Implements the parent class discover method.
|
93
|
-
"""
|
94
|
-
return self._discover_internal(logger, config.config_path).airbyte_catalog
|
95
|
-
|
96
|
-
def read(self, logger: logging.Logger, config: ConfigContainer, catalog_path: str, state_path: str = None) -> Iterable[AirbyteMessage]:
|
97
|
-
"""
|
98
|
-
Implements the parent class read method.
|
99
|
-
"""
|
100
|
-
catalogs = self._discover_internal(logger, config.config_path)
|
101
|
-
masked_airbyte_catalog = ConfiguredAirbyteCatalog.parse_obj(self._read_json_file(catalog_path))
|
102
|
-
selected_singer_catalog_path = SingerHelper.create_singer_catalog_with_selection(masked_airbyte_catalog, catalogs.singer_catalog)
|
103
|
-
|
104
|
-
read_cmd = self.read_cmd(logger, config.config_path, selected_singer_catalog_path, state_path)
|
105
|
-
return SingerHelper.read(logger, read_cmd)
|
106
|
-
|
107
|
-
def get_sync_mode_overrides(self) -> Dict[str, SyncModeInfo]:
|
108
|
-
"""
|
109
|
-
The Singer Spec outlines a way for taps to declare in their catalog that their streams support incremental sync (valid-replication-keys,
|
110
|
-
forced-replication-method, and others). However, many taps which are incremental don't actually declare that via the catalog, and just
|
111
|
-
use their input state to perform an incremental sync without giving any hints to the user. An Airbyte Connector built on top of such a
|
112
|
-
Singer Tap cannot automatically detect which streams are full refresh or incremental or what their cursors are. In those cases the developer
|
113
|
-
needs to manually specify information about the sync modes.
|
114
|
-
|
115
|
-
This method provides a way of doing that: the dict of stream names to SyncModeInfo returned from this method will be used to override each
|
116
|
-
stream's sync mode information in the Airbyte Catalog output from the discover method. Only set fields provided in the SyncModeInfo are used.
|
117
|
-
If a SyncModeInfo field is not set, it will not be overridden in the output catalog.
|
118
|
-
|
119
|
-
:return: A dict from stream name to the sync modes that should be applied to this stream.
|
120
|
-
"""
|
121
|
-
return {}
|
122
|
-
|
123
|
-
def get_primary_key_overrides(self) -> Dict[str, List[str]]:
|
124
|
-
"""
|
125
|
-
Similar to get_sync_mode_overrides but for primary keys.
|
126
|
-
|
127
|
-
:return: A dict from stream name to the list of primary key fields for the stream.
|
128
|
-
"""
|
129
|
-
return {}
|
130
|
-
|
131
|
-
def get_excluded_streams(self) -> List[str]:
|
132
|
-
"""
|
133
|
-
This method provide ability to exclude some streams from catalog
|
134
|
-
|
135
|
-
:return: A list of excluded stream names
|
136
|
-
"""
|
137
|
-
return []
|
138
|
-
|
139
|
-
|
140
|
-
class BaseSingerSource(SingerSource):
|
141
|
-
force_full_refresh = False
|
142
|
-
|
143
|
-
def check_config(self, logger: logging.Logger, config_path: str, config: Mapping[str, Any]) -> AirbyteConnectionStatus:
|
144
|
-
try:
|
145
|
-
self.try_connect(logger, config)
|
146
|
-
except self.api_error as err:
|
147
|
-
logger.error(f"Exception while connecting to {self.tap_name}: {err}")
|
148
|
-
# this should be in UI
|
149
|
-
error_msg = f"Unable to connect to {self.tap_name} with the provided credentials. Error: {err}"
|
150
|
-
return AirbyteConnectionStatus(status=Status.FAILED, message=error_msg)
|
151
|
-
return AirbyteConnectionStatus(status=Status.SUCCEEDED)
|
152
|
-
|
153
|
-
def discover_cmd(self, logger: logging.Logger, config_path: str) -> str:
|
154
|
-
return f"{self.tap_cmd} --config {config_path} --discover"
|
155
|
-
|
156
|
-
def read_cmd(self, logger: logging.Logger, config_path: str, catalog_path: str, state_path: str = None) -> str:
|
157
|
-
state_path = None if self.force_full_refresh else state_path
|
158
|
-
args = {"--config": config_path, "--catalog": catalog_path, "--state": state_path}
|
159
|
-
cmd = " ".join([f"{k} {v}" for k, v in args.items() if v is not None])
|
160
|
-
|
161
|
-
return f"{self.tap_cmd} {cmd}"
|
162
|
-
|
163
|
-
def discover(self, logger: logging.Logger, config: ConfigContainer) -> AirbyteCatalog:
|
164
|
-
catalog = super().discover(logger, config)
|
165
|
-
if self.force_full_refresh:
|
166
|
-
return CatalogHelper.coerce_catalog_as_full_refresh(catalog)
|
167
|
-
return catalog
|
168
|
-
|
169
|
-
def try_connect(self, logger: logging.Logger, config: Mapping[str, Any]):
|
170
|
-
"""Test provided credentials, raises self.api_error if something goes wrong"""
|
171
|
-
raise NotImplementedError
|
172
|
-
|
173
|
-
@property
|
174
|
-
def api_error(self) -> Type[Exception]:
|
175
|
-
"""Class/Base class of the exception that will be thrown if the tap is misconfigured or service unavailable"""
|
176
|
-
raise NotImplementedError
|
177
|
-
|
178
|
-
@property
|
179
|
-
def tap_cmd(self) -> str:
|
180
|
-
"""Tap command"""
|
181
|
-
raise NotImplementedError
|
182
|
-
|
183
|
-
@property
|
184
|
-
def tap_name(self) -> str:
|
185
|
-
"""Tap name"""
|
186
|
-
raise NotImplementedError
|
@@ -1,23 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
from typing import Any, Mapping
|
6
|
-
|
7
|
-
|
8
|
-
class Record:
|
9
|
-
"""
|
10
|
-
Represents a record read from a stream.
|
11
|
-
"""
|
12
|
-
|
13
|
-
def __init__(self, data: Mapping[str, Any], stream_name: str):
|
14
|
-
self.data = data
|
15
|
-
self.stream_name = stream_name
|
16
|
-
|
17
|
-
def __eq__(self, other: Any) -> bool:
|
18
|
-
if not isinstance(other, Record):
|
19
|
-
return False
|
20
|
-
return self.data == other.data and self.stream_name == other.stream_name
|
21
|
-
|
22
|
-
def __repr__(self) -> str:
|
23
|
-
return f"Record(data={self.data}, stream_name={self.stream_name})"
|
@@ -1,17 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
# Initialize Auth Package
|
6
|
-
from .core import HttpAuthenticator, NoAuth
|
7
|
-
from .oauth import Oauth2Authenticator
|
8
|
-
from .token import BasicHttpAuthenticator, MultipleTokenAuthenticator, TokenAuthenticator
|
9
|
-
|
10
|
-
__all__ = [
|
11
|
-
"BasicHttpAuthenticator",
|
12
|
-
"HttpAuthenticator",
|
13
|
-
"NoAuth",
|
14
|
-
"Oauth2Authenticator",
|
15
|
-
"TokenAuthenticator",
|
16
|
-
"MultipleTokenAuthenticator",
|
17
|
-
]
|
@@ -1,29 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
|
6
|
-
from abc import ABC, abstractmethod
|
7
|
-
from typing import Any, Mapping
|
8
|
-
|
9
|
-
from deprecated import deprecated
|
10
|
-
|
11
|
-
|
12
|
-
@deprecated(version="0.1.20", reason="Use requests.auth.AuthBase instead")
|
13
|
-
class HttpAuthenticator(ABC):
|
14
|
-
"""
|
15
|
-
Base abstract class for various HTTP Authentication strategies. Authentication strategies are generally
|
16
|
-
expected to provide security credentials via HTTP headers.
|
17
|
-
"""
|
18
|
-
|
19
|
-
@abstractmethod
|
20
|
-
def get_auth_header(self) -> Mapping[str, Any]:
|
21
|
-
"""
|
22
|
-
:return: A dictionary containing all the necessary headers to authenticate.
|
23
|
-
"""
|
24
|
-
|
25
|
-
|
26
|
-
@deprecated(version="0.1.20", reason="Set `authenticator=None` instead")
|
27
|
-
class NoAuth(HttpAuthenticator):
|
28
|
-
def get_auth_header(self) -> Mapping[str, Any]:
|
29
|
-
return {}
|
@@ -1,113 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
|
6
|
-
import logging
|
7
|
-
from typing import Any, List, Mapping, MutableMapping, Optional, Tuple
|
8
|
-
|
9
|
-
import backoff
|
10
|
-
import pendulum
|
11
|
-
import requests
|
12
|
-
from deprecated import deprecated
|
13
|
-
|
14
|
-
from ..exceptions import DefaultBackoffException
|
15
|
-
from .core import HttpAuthenticator
|
16
|
-
|
17
|
-
logger = logging.getLogger("airbyte")
|
18
|
-
|
19
|
-
|
20
|
-
@deprecated(version="0.1.20", reason="Use airbyte_cdk.sources.streams.http.requests_native_auth.Oauth2Authenticator instead")
|
21
|
-
class Oauth2Authenticator(HttpAuthenticator):
|
22
|
-
"""
|
23
|
-
Generates OAuth2.0 access tokens from an OAuth2.0 refresh token and client credentials.
|
24
|
-
The generated access token is attached to each request via the Authorization header.
|
25
|
-
"""
|
26
|
-
|
27
|
-
def __init__(
|
28
|
-
self,
|
29
|
-
token_refresh_endpoint: str,
|
30
|
-
client_id: str,
|
31
|
-
client_secret: str,
|
32
|
-
refresh_token: str,
|
33
|
-
scopes: List[str] = None,
|
34
|
-
refresh_access_token_headers: Optional[Mapping[str, Any]] = None,
|
35
|
-
refresh_access_token_authenticator: Optional[HttpAuthenticator] = None,
|
36
|
-
):
|
37
|
-
self.token_refresh_endpoint = token_refresh_endpoint
|
38
|
-
self.client_secret = client_secret
|
39
|
-
self.client_id = client_id
|
40
|
-
self.refresh_token = refresh_token
|
41
|
-
self.scopes = scopes
|
42
|
-
self.refresh_access_token_headers = refresh_access_token_headers
|
43
|
-
self.refresh_access_token_authenticator = refresh_access_token_authenticator
|
44
|
-
|
45
|
-
self._token_expiry_date = pendulum.now().subtract(days=1)
|
46
|
-
self._access_token = None
|
47
|
-
|
48
|
-
def get_auth_header(self) -> Mapping[str, Any]:
|
49
|
-
return {"Authorization": f"Bearer {self.get_access_token()}"}
|
50
|
-
|
51
|
-
def get_access_token(self):
|
52
|
-
if self.token_has_expired():
|
53
|
-
t0 = pendulum.now()
|
54
|
-
token, expires_in = self.refresh_access_token()
|
55
|
-
self._access_token = token
|
56
|
-
self._token_expiry_date = t0.add(seconds=expires_in)
|
57
|
-
|
58
|
-
return self._access_token
|
59
|
-
|
60
|
-
def token_has_expired(self) -> bool:
|
61
|
-
return pendulum.now() > self._token_expiry_date
|
62
|
-
|
63
|
-
def get_refresh_request_body(self) -> Mapping[str, Any]:
|
64
|
-
"""Override to define additional parameters"""
|
65
|
-
payload: MutableMapping[str, Any] = {
|
66
|
-
"grant_type": "refresh_token",
|
67
|
-
"client_id": self.client_id,
|
68
|
-
"client_secret": self.client_secret,
|
69
|
-
"refresh_token": self.refresh_token,
|
70
|
-
}
|
71
|
-
|
72
|
-
if self.scopes:
|
73
|
-
payload["scopes"] = self.scopes
|
74
|
-
|
75
|
-
return payload
|
76
|
-
|
77
|
-
@backoff.on_exception(
|
78
|
-
backoff.expo,
|
79
|
-
DefaultBackoffException,
|
80
|
-
on_backoff=lambda details: logger.info(
|
81
|
-
f"Caught retryable error after {details['tries']} tries. Waiting {details['wait']} seconds then retrying..."
|
82
|
-
),
|
83
|
-
max_time=300,
|
84
|
-
)
|
85
|
-
def refresh_access_token(self) -> Tuple[str, int]:
|
86
|
-
"""
|
87
|
-
returns a tuple of (access_token, token_lifespan_in_seconds)
|
88
|
-
"""
|
89
|
-
try:
|
90
|
-
response = requests.request(
|
91
|
-
method="POST",
|
92
|
-
url=self.token_refresh_endpoint,
|
93
|
-
data=self.get_refresh_request_body(),
|
94
|
-
headers=self.get_refresh_access_token_headers(),
|
95
|
-
)
|
96
|
-
response.raise_for_status()
|
97
|
-
response_json = response.json()
|
98
|
-
return response_json["access_token"], int(response_json["expires_in"])
|
99
|
-
except requests.exceptions.RequestException as e:
|
100
|
-
if e.response.status_code == 429 or e.response.status_code >= 500:
|
101
|
-
raise DefaultBackoffException(request=e.response.request, response=e.response)
|
102
|
-
raise
|
103
|
-
except Exception as e:
|
104
|
-
raise Exception(f"Error while refreshing access token: {e}") from e
|
105
|
-
|
106
|
-
def get_refresh_access_token_headers(self):
|
107
|
-
headers = {}
|
108
|
-
if self.refresh_access_token_headers:
|
109
|
-
headers = self.refresh_access_token_headers
|
110
|
-
if self.refresh_access_token_authenticator:
|
111
|
-
refresh_auth_headers = self.refresh_access_token_authenticator.get_auth_header()
|
112
|
-
headers.update(refresh_auth_headers)
|
113
|
-
return headers
|