airbyte-cdk 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +358 -0
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +104 -0
- airbyte_cdk/connector.py +123 -0
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/__init__.py +3 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
- airbyte_cdk/connector_builder/main.py +107 -0
- airbyte_cdk/connector_builder/models.py +73 -0
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +83 -0
- airbyte_cdk/destinations/__init__.py +8 -0
- airbyte_cdk/destinations/destination.py +154 -0
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
- airbyte_cdk/destinations/vector_db_based/config.py +298 -0
- airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
- airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
- airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
- airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
- airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
- airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
- airbyte_cdk/entrypoint.py +414 -0
- airbyte_cdk/exception_handler.py +56 -0
- airbyte_cdk/logger.py +109 -0
- airbyte_cdk/models/__init__.py +72 -0
- airbyte_cdk/models/airbyte_protocol.py +88 -0
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/well_known_types.py +5 -0
- airbyte_cdk/py.typed +0 -0
- airbyte_cdk/sources/__init__.py +26 -0
- airbyte_cdk/sources/abstract_source.py +326 -0
- airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
- airbyte_cdk/sources/config.py +27 -0
- airbyte_cdk/sources/connector_state_manager.py +161 -0
- airbyte_cdk/sources/declarative/__init__.py +3 -0
- airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
- airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
- airbyte_cdk/sources/declarative/auth/token.py +267 -0
- airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
- airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
- airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
- airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
- airbyte_cdk/sources/declarative/declarative_source.py +36 -0
- airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
- airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
- airbyte_cdk/sources/declarative/exceptions.py +9 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
- airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
- airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
- airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
- airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
- airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
- airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
- airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
- airbyte_cdk/sources/declarative/spec/spec.py +48 -0
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
- airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
- airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
- airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
- airbyte_cdk/sources/declarative/types.py +25 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
- airbyte_cdk/sources/file_based/config/__init__.py +0 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
- airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
- airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
- airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
- airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
- airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
- airbyte_cdk/sources/file_based/exceptions.py +159 -0
- airbyte_cdk/sources/file_based/file_based_source.py +466 -0
- airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
- airbyte_cdk/sources/file_based/file_record_data.py +22 -0
- airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
- airbyte_cdk/sources/file_based/remote_file.py +18 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
- airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
- airbyte_cdk/sources/file_based/types.py +10 -0
- airbyte_cdk/sources/http_config.py +10 -0
- airbyte_cdk/sources/http_logger.py +55 -0
- airbyte_cdk/sources/message/__init__.py +19 -0
- airbyte_cdk/sources/message/repository.py +137 -0
- airbyte_cdk/sources/source.py +95 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/__init__.py +8 -0
- airbyte_cdk/sources/streams/availability_strategy.py +84 -0
- airbyte_cdk/sources/streams/call_rate.py +704 -0
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
- airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
- airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
- airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/core.py +703 -0
- airbyte_cdk/sources/streams/http/__init__.py +10 -0
- airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +61 -0
- airbyte_cdk/sources/streams/http/http.py +673 -0
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/sources/streams/utils/__init__.py +3 -0
- airbyte_cdk/sources/types.py +169 -0
- airbyte_cdk/sources/utils/__init__.py +7 -0
- airbyte_cdk/sources/utils/casing.py +12 -0
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +53 -0
- airbyte_cdk/sources/utils/schema_helpers.py +230 -0
- airbyte_cdk/sources/utils/slice_logger.py +57 -0
- airbyte_cdk/sources/utils/transform.py +277 -0
- airbyte_cdk/sources/utils/types.py +7 -0
- airbyte_cdk/sql/__init__.py +0 -0
- airbyte_cdk/sql/_util/__init__.py +0 -0
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/__init__.py +7 -0
- airbyte_cdk/test/catalog_builder.py +81 -0
- airbyte_cdk/test/entrypoint_wrapper.py +250 -0
- airbyte_cdk/test/mock_http/__init__.py +6 -0
- airbyte_cdk/test/mock_http/matcher.py +41 -0
- airbyte_cdk/test/mock_http/mocker.py +185 -0
- airbyte_cdk/test/mock_http/request.py +103 -0
- airbyte_cdk/test/mock_http/response.py +28 -0
- airbyte_cdk/test/mock_http/response_builder.py +237 -0
- airbyte_cdk/test/state_builder.py +33 -0
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +10 -0
- airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
- airbyte_cdk/utils/analytics_message.py +25 -0
- airbyte_cdk/utils/constants.py +5 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/event_timing.py +85 -0
- airbyte_cdk/utils/is_cloud_environment.py +18 -0
- airbyte_cdk/utils/mapping_helpers.py +162 -0
- airbyte_cdk/utils/message_utils.py +26 -0
- airbyte_cdk/utils/oneof_option_config.py +33 -0
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +270 -0
- airbyte_cdk/utils/slice_hasher.py +37 -0
- airbyte_cdk/utils/spec_schema_transformations.py +26 -0
- airbyte_cdk/utils/stream_status_utils.py +43 -0
- airbyte_cdk/utils/traced_exception.py +145 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
- airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
- airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
- airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
- airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,673 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from datetime import timedelta
|
|
8
|
+
from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
|
|
9
|
+
from urllib.parse import urljoin
|
|
10
|
+
|
|
11
|
+
import requests
|
|
12
|
+
from requests.auth import AuthBase
|
|
13
|
+
from typing_extensions import deprecated
|
|
14
|
+
|
|
15
|
+
from airbyte_cdk.models import AirbyteMessage, FailureType, SyncMode
|
|
16
|
+
from airbyte_cdk.models import Type as MessageType
|
|
17
|
+
from airbyte_cdk.sources.message.repository import InMemoryMessageRepository
|
|
18
|
+
from airbyte_cdk.sources.streams.call_rate import APIBudget
|
|
19
|
+
from airbyte_cdk.sources.streams.checkpoint.cursor import Cursor
|
|
20
|
+
from airbyte_cdk.sources.streams.checkpoint.resumable_full_refresh_cursor import (
|
|
21
|
+
ResumableFullRefreshCursor,
|
|
22
|
+
)
|
|
23
|
+
from airbyte_cdk.sources.streams.checkpoint.substream_resumable_full_refresh_cursor import (
|
|
24
|
+
SubstreamResumableFullRefreshCursor,
|
|
25
|
+
)
|
|
26
|
+
from airbyte_cdk.sources.streams.core import CheckpointMixin, Stream, StreamData
|
|
27
|
+
from airbyte_cdk.sources.streams.http.error_handlers import (
|
|
28
|
+
BackoffStrategy,
|
|
29
|
+
ErrorHandler,
|
|
30
|
+
HttpStatusErrorHandler,
|
|
31
|
+
)
|
|
32
|
+
from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
|
|
33
|
+
ErrorResolution,
|
|
34
|
+
ResponseAction,
|
|
35
|
+
)
|
|
36
|
+
from airbyte_cdk.sources.streams.http.http_client import HttpClient
|
|
37
|
+
from airbyte_cdk.sources.types import Record, StreamSlice
|
|
38
|
+
from airbyte_cdk.sources.utils.types import JsonType
|
|
39
|
+
|
|
40
|
+
# list of all possible HTTP methods which can be used for sending of request bodies
|
|
41
|
+
BODY_REQUEST_METHODS = ("GET", "POST", "PUT", "PATCH")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class HttpStream(Stream, CheckpointMixin, ABC):
|
|
45
|
+
"""
|
|
46
|
+
Base abstract class for an Airbyte Stream using the HTTP protocol. Basic building block for users building an Airbyte source for a HTTP API.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
source_defined_cursor = True # Most HTTP streams use a source defined cursor (i.e: the user can't configure it like on a SQL table)
|
|
50
|
+
page_size: Optional[int] = (
|
|
51
|
+
None # Use this variable to define page size for API http requests with pagination support
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self, authenticator: Optional[AuthBase] = None, api_budget: Optional[APIBudget] = None
|
|
56
|
+
):
|
|
57
|
+
self._exit_on_rate_limit: bool = False
|
|
58
|
+
self._http_client = HttpClient(
|
|
59
|
+
name=self.name,
|
|
60
|
+
logger=self.logger,
|
|
61
|
+
error_handler=self.get_error_handler(),
|
|
62
|
+
api_budget=api_budget or APIBudget(policies=[]),
|
|
63
|
+
authenticator=authenticator,
|
|
64
|
+
use_cache=self.use_cache,
|
|
65
|
+
backoff_strategy=self.get_backoff_strategy(),
|
|
66
|
+
message_repository=InMemoryMessageRepository(),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# There are three conditions that dictate if RFR should automatically be applied to a stream
|
|
70
|
+
# 1. Streams that explicitly initialize their own cursor should defer to it and not automatically apply RFR
|
|
71
|
+
# 2. Streams with at least one cursor_field are incremental and thus a superior sync to RFR.
|
|
72
|
+
# 3. Streams overriding read_records() do not guarantee that they will call the parent implementation which can perform
|
|
73
|
+
# per-page checkpointing so RFR is only supported if a stream use the default `HttpStream.read_records()` method
|
|
74
|
+
if (
|
|
75
|
+
not self.cursor
|
|
76
|
+
and len(self.cursor_field) == 0
|
|
77
|
+
and type(self).read_records is HttpStream.read_records
|
|
78
|
+
):
|
|
79
|
+
self.cursor = ResumableFullRefreshCursor()
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def exit_on_rate_limit(self) -> bool:
|
|
83
|
+
"""
|
|
84
|
+
:return: False if the stream will retry endlessly when rate limited
|
|
85
|
+
"""
|
|
86
|
+
return self._exit_on_rate_limit
|
|
87
|
+
|
|
88
|
+
@exit_on_rate_limit.setter
|
|
89
|
+
def exit_on_rate_limit(self, value: bool) -> None:
|
|
90
|
+
self._exit_on_rate_limit = value
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def cache_filename(self) -> str:
|
|
94
|
+
"""
|
|
95
|
+
Override if needed. Return the name of cache file
|
|
96
|
+
Note that if the environment variable REQUEST_CACHE_PATH is not set, the cache will be in-memory only.
|
|
97
|
+
"""
|
|
98
|
+
return f"{self.name}.sqlite"
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def use_cache(self) -> bool:
|
|
102
|
+
"""
|
|
103
|
+
Override if needed. If True, all records will be cached.
|
|
104
|
+
Note that if the environment variable REQUEST_CACHE_PATH is not set, the cache will be in-memory only.
|
|
105
|
+
"""
|
|
106
|
+
return False
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
@abstractmethod
|
|
110
|
+
def url_base(self) -> str:
|
|
111
|
+
"""
|
|
112
|
+
:return: URL base for the API endpoint e.g: if you wanted to hit https://myapi.com/v1/some_entity then this should return "https://myapi.com/v1/"
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def http_method(self) -> str:
|
|
117
|
+
"""
|
|
118
|
+
Override if needed. See get_request_data/get_request_json if using POST/PUT/PATCH.
|
|
119
|
+
"""
|
|
120
|
+
return "GET"
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
@deprecated(
|
|
124
|
+
"Deprecated as of CDK version 3.0.0. "
|
|
125
|
+
"You should set error_handler explicitly in HttpStream.get_error_handler() instead."
|
|
126
|
+
)
|
|
127
|
+
def raise_on_http_errors(self) -> bool:
|
|
128
|
+
"""
|
|
129
|
+
Override if needed. If set to False, allows opting-out of raising HTTP code exception.
|
|
130
|
+
"""
|
|
131
|
+
return True
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
@deprecated(
|
|
135
|
+
"Deprecated as of CDK version 3.0.0. "
|
|
136
|
+
"You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead."
|
|
137
|
+
)
|
|
138
|
+
def max_retries(self) -> Union[int, None]:
|
|
139
|
+
"""
|
|
140
|
+
Override if needed. Specifies maximum amount of retries for backoff policy. Return None for no limit.
|
|
141
|
+
"""
|
|
142
|
+
return 5
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
@deprecated(
|
|
146
|
+
"Deprecated as of CDK version 3.0.0. "
|
|
147
|
+
"You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead."
|
|
148
|
+
)
|
|
149
|
+
def max_time(self) -> Union[int, None]:
|
|
150
|
+
"""
|
|
151
|
+
Override if needed. Specifies maximum total waiting time (in seconds) for backoff policy. Return None for no limit.
|
|
152
|
+
"""
|
|
153
|
+
return 60 * 10
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
@deprecated(
|
|
157
|
+
"Deprecated as of CDK version 3.0.0. "
|
|
158
|
+
"You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead."
|
|
159
|
+
)
|
|
160
|
+
def retry_factor(self) -> float:
|
|
161
|
+
"""
|
|
162
|
+
Override if needed. Specifies factor for backoff policy.
|
|
163
|
+
"""
|
|
164
|
+
return 5
|
|
165
|
+
|
|
166
|
+
@abstractmethod
|
|
167
|
+
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
|
168
|
+
"""
|
|
169
|
+
Override this method to define a pagination strategy.
|
|
170
|
+
|
|
171
|
+
The value returned from this method is passed to most other methods in this class. Use it to form a request e.g: set headers or query params.
|
|
172
|
+
|
|
173
|
+
:return: The token for the next page from the input response object. Returning None means there are no more pages to read in this response.
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
@abstractmethod
|
|
177
|
+
def path(
|
|
178
|
+
self,
|
|
179
|
+
*,
|
|
180
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
|
181
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
|
182
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
|
183
|
+
) -> str:
|
|
184
|
+
"""
|
|
185
|
+
Returns the URL path for the API endpoint e.g: if you wanted to hit https://myapi.com/v1/some_entity then this should return "some_entity"
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
def request_params(
|
|
189
|
+
self,
|
|
190
|
+
stream_state: Optional[Mapping[str, Any]],
|
|
191
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
|
192
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
|
193
|
+
) -> MutableMapping[str, Any]:
|
|
194
|
+
"""
|
|
195
|
+
Override this method to define the query parameters that should be set on an outgoing HTTP request given the inputs.
|
|
196
|
+
|
|
197
|
+
E.g: you might want to define query parameters for paging if next_page_token is not None.
|
|
198
|
+
"""
|
|
199
|
+
return {}
|
|
200
|
+
|
|
201
|
+
def request_headers(
|
|
202
|
+
self,
|
|
203
|
+
stream_state: Optional[Mapping[str, Any]],
|
|
204
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
|
205
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
|
206
|
+
) -> Mapping[str, Any]:
|
|
207
|
+
"""
|
|
208
|
+
Override to return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.
|
|
209
|
+
"""
|
|
210
|
+
return {}
|
|
211
|
+
|
|
212
|
+
def request_body_data(
|
|
213
|
+
self,
|
|
214
|
+
stream_state: Optional[Mapping[str, Any]],
|
|
215
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
|
216
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
|
217
|
+
) -> Optional[Union[Mapping[str, Any], str]]:
|
|
218
|
+
"""
|
|
219
|
+
Override when creating POST/PUT/PATCH requests to populate the body of the request with a non-JSON payload.
|
|
220
|
+
|
|
221
|
+
If returns a ready text that it will be sent as is.
|
|
222
|
+
If returns a dict that it will be converted to a urlencoded form.
|
|
223
|
+
E.g. {"key1": "value1", "key2": "value2"} => "key1=value1&key2=value2"
|
|
224
|
+
|
|
225
|
+
At the same time only one of the 'request_body_data' and 'request_body_json' functions can be overridden.
|
|
226
|
+
"""
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
def request_body_json(
|
|
230
|
+
self,
|
|
231
|
+
stream_state: Optional[Mapping[str, Any]],
|
|
232
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
|
233
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
|
234
|
+
) -> Optional[Mapping[str, Any]]:
|
|
235
|
+
"""
|
|
236
|
+
Override when creating POST/PUT/PATCH requests to populate the body of the request with a JSON payload.
|
|
237
|
+
|
|
238
|
+
At the same time only one of the 'request_body_data' and 'request_body_json' functions can be overridden.
|
|
239
|
+
"""
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
def request_kwargs(
|
|
243
|
+
self,
|
|
244
|
+
stream_state: Optional[Mapping[str, Any]],
|
|
245
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
|
246
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
|
247
|
+
) -> Mapping[str, Any]:
|
|
248
|
+
"""
|
|
249
|
+
Override to return a mapping of keyword arguments to be used when creating the HTTP request.
|
|
250
|
+
Any option listed in https://docs.python-requests.org/en/latest/api/#requests.adapters.BaseAdapter.send for can be returned from
|
|
251
|
+
this method. Note that these options do not conflict with request-level options such as headers, request params, etc..
|
|
252
|
+
"""
|
|
253
|
+
return {}
|
|
254
|
+
|
|
255
|
+
@abstractmethod
|
|
256
|
+
def parse_response(
|
|
257
|
+
self,
|
|
258
|
+
response: requests.Response,
|
|
259
|
+
*,
|
|
260
|
+
stream_state: Mapping[str, Any],
|
|
261
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
|
262
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
|
263
|
+
) -> Iterable[Mapping[str, Any]]:
|
|
264
|
+
"""
|
|
265
|
+
Parses the raw response object into a list of records.
|
|
266
|
+
By default, this returns an iterable containing the input. Override to parse differently.
|
|
267
|
+
:param response:
|
|
268
|
+
:param stream_state:
|
|
269
|
+
:param stream_slice:
|
|
270
|
+
:param next_page_token:
|
|
271
|
+
:return: An iterable containing the parsed response
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
def get_backoff_strategy(self) -> Optional[Union[BackoffStrategy, List[BackoffStrategy]]]:
|
|
275
|
+
"""
|
|
276
|
+
Used to initialize Adapter to avoid breaking changes.
|
|
277
|
+
If Stream has a `backoff_time` method implementation, we know this stream uses old (pre-HTTPClient) backoff handlers and thus an adapter is needed.
|
|
278
|
+
|
|
279
|
+
Override to provide custom BackoffStrategy
|
|
280
|
+
:return Optional[BackoffStrategy]:
|
|
281
|
+
"""
|
|
282
|
+
if hasattr(self, "backoff_time"):
|
|
283
|
+
return HttpStreamAdapterBackoffStrategy(self)
|
|
284
|
+
else:
|
|
285
|
+
return None
|
|
286
|
+
|
|
287
|
+
def get_error_handler(self) -> Optional[ErrorHandler]:
|
|
288
|
+
"""
|
|
289
|
+
Used to initialize Adapter to avoid breaking changes.
|
|
290
|
+
If Stream has a `should_retry` method implementation, we know this stream uses old (pre-HTTPClient) error handlers and thus an adapter is needed.
|
|
291
|
+
|
|
292
|
+
Override to provide custom ErrorHandler
|
|
293
|
+
:return Optional[ErrorHandler]:
|
|
294
|
+
"""
|
|
295
|
+
if hasattr(self, "should_retry"):
|
|
296
|
+
error_handler = HttpStreamAdapterHttpStatusErrorHandler(
|
|
297
|
+
stream=self,
|
|
298
|
+
logger=logging.getLogger(),
|
|
299
|
+
max_retries=self.max_retries,
|
|
300
|
+
max_time=timedelta(seconds=self.max_time or 0),
|
|
301
|
+
)
|
|
302
|
+
return error_handler
|
|
303
|
+
else:
|
|
304
|
+
return None
|
|
305
|
+
|
|
306
|
+
@classmethod
|
|
307
|
+
def _join_url(cls, url_base: str, path: str) -> str:
|
|
308
|
+
return urljoin(url_base, path)
|
|
309
|
+
|
|
310
|
+
@classmethod
|
|
311
|
+
def parse_response_error_message(cls, response: requests.Response) -> Optional[str]:
|
|
312
|
+
"""
|
|
313
|
+
Parses the raw response object from a failed request into a user-friendly error message.
|
|
314
|
+
By default, this method tries to grab the error message from JSON responses by following common API patterns. Override to parse differently.
|
|
315
|
+
|
|
316
|
+
:param response:
|
|
317
|
+
:return: A user-friendly message that indicates the cause of the error
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
# default logic to grab error from common fields
|
|
321
|
+
def _try_get_error(value: Optional[JsonType]) -> Optional[str]:
|
|
322
|
+
if isinstance(value, str):
|
|
323
|
+
return value
|
|
324
|
+
elif isinstance(value, list):
|
|
325
|
+
errors_in_value = [_try_get_error(v) for v in value]
|
|
326
|
+
return ", ".join(v for v in errors_in_value if v is not None)
|
|
327
|
+
elif isinstance(value, dict):
|
|
328
|
+
new_value = (
|
|
329
|
+
value.get("message")
|
|
330
|
+
or value.get("messages")
|
|
331
|
+
or value.get("error")
|
|
332
|
+
or value.get("errors")
|
|
333
|
+
or value.get("failures")
|
|
334
|
+
or value.get("failure")
|
|
335
|
+
or value.get("detail")
|
|
336
|
+
)
|
|
337
|
+
return _try_get_error(new_value)
|
|
338
|
+
return None
|
|
339
|
+
|
|
340
|
+
try:
|
|
341
|
+
body = response.json()
|
|
342
|
+
return _try_get_error(body)
|
|
343
|
+
except requests.exceptions.JSONDecodeError:
|
|
344
|
+
return None
|
|
345
|
+
|
|
346
|
+
def get_error_display_message(self, exception: BaseException) -> Optional[str]:
|
|
347
|
+
"""
|
|
348
|
+
Retrieves the user-friendly display message that corresponds to an exception.
|
|
349
|
+
This will be called when encountering an exception while reading records from the stream, and used to build the AirbyteTraceMessage.
|
|
350
|
+
|
|
351
|
+
The default implementation of this method only handles HTTPErrors by passing the response to self.parse_response_error_message().
|
|
352
|
+
The method should be overriden as needed to handle any additional exception types.
|
|
353
|
+
|
|
354
|
+
:param exception: The exception that was raised
|
|
355
|
+
:return: A user-friendly message that indicates the cause of the error
|
|
356
|
+
"""
|
|
357
|
+
if isinstance(exception, requests.HTTPError) and exception.response is not None:
|
|
358
|
+
return self.parse_response_error_message(exception.response)
|
|
359
|
+
return None
|
|
360
|
+
|
|
361
|
+
def read_records(
|
|
362
|
+
self,
|
|
363
|
+
sync_mode: SyncMode,
|
|
364
|
+
cursor_field: Optional[List[str]] = None,
|
|
365
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
|
366
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
|
367
|
+
) -> Iterable[StreamData]:
|
|
368
|
+
# A cursor_field indicates this is an incremental stream which offers better checkpointing than RFR enabled via the cursor
|
|
369
|
+
if self.cursor_field or not isinstance(self.get_cursor(), ResumableFullRefreshCursor):
|
|
370
|
+
yield from self._read_pages(
|
|
371
|
+
lambda req, res, state, _slice: self.parse_response(
|
|
372
|
+
res, stream_slice=_slice, stream_state=state
|
|
373
|
+
),
|
|
374
|
+
stream_slice,
|
|
375
|
+
stream_state,
|
|
376
|
+
)
|
|
377
|
+
else:
|
|
378
|
+
yield from self._read_single_page(
|
|
379
|
+
lambda req, res, state, _slice: self.parse_response(
|
|
380
|
+
res, stream_slice=_slice, stream_state=state
|
|
381
|
+
),
|
|
382
|
+
stream_slice,
|
|
383
|
+
stream_state,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
@property
|
|
387
|
+
def state(self) -> MutableMapping[str, Any]:
|
|
388
|
+
cursor = self.get_cursor()
|
|
389
|
+
if cursor:
|
|
390
|
+
return cursor.get_stream_state() # type: ignore
|
|
391
|
+
return self._state
|
|
392
|
+
|
|
393
|
+
@state.setter
|
|
394
|
+
def state(self, value: MutableMapping[str, Any]) -> None:
|
|
395
|
+
cursor = self.get_cursor()
|
|
396
|
+
if cursor:
|
|
397
|
+
cursor.set_initial_state(value)
|
|
398
|
+
self._state = value
|
|
399
|
+
|
|
400
|
+
def get_cursor(self) -> Optional[Cursor]:
|
|
401
|
+
# I don't love that this is semi-stateful but not sure what else to do. We don't know exactly what type of cursor to
|
|
402
|
+
# instantiate when creating the class. We can make a few assumptions like if there is a cursor_field which implies
|
|
403
|
+
# incremental, but we don't know until runtime if this is a substream. Ideally, a stream should explicitly define
|
|
404
|
+
# its cursor, but because we're trying to automatically apply RFR we're stuck with this logic where we replace the
|
|
405
|
+
# cursor at runtime once we detect this is a substream based on self.has_multiple_slices being reassigned
|
|
406
|
+
if self.has_multiple_slices and isinstance(self.cursor, ResumableFullRefreshCursor):
|
|
407
|
+
self.cursor = SubstreamResumableFullRefreshCursor()
|
|
408
|
+
return self.cursor
|
|
409
|
+
else:
|
|
410
|
+
return self.cursor
|
|
411
|
+
|
|
412
|
+
def _read_pages(
|
|
413
|
+
self,
|
|
414
|
+
records_generator_fn: Callable[
|
|
415
|
+
[
|
|
416
|
+
requests.PreparedRequest,
|
|
417
|
+
requests.Response,
|
|
418
|
+
Mapping[str, Any],
|
|
419
|
+
Optional[Mapping[str, Any]],
|
|
420
|
+
],
|
|
421
|
+
Iterable[StreamData],
|
|
422
|
+
],
|
|
423
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
|
424
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
|
425
|
+
) -> Iterable[StreamData]:
|
|
426
|
+
stream_state = stream_state or {}
|
|
427
|
+
pagination_complete = False
|
|
428
|
+
next_page_token = None
|
|
429
|
+
while not pagination_complete:
|
|
430
|
+
request, response = self._fetch_next_page(stream_slice, stream_state, next_page_token)
|
|
431
|
+
yield from records_generator_fn(request, response, stream_state, stream_slice)
|
|
432
|
+
|
|
433
|
+
next_page_token = self.next_page_token(response)
|
|
434
|
+
if not next_page_token:
|
|
435
|
+
pagination_complete = True
|
|
436
|
+
|
|
437
|
+
cursor = self.get_cursor()
|
|
438
|
+
if cursor and isinstance(cursor, SubstreamResumableFullRefreshCursor):
|
|
439
|
+
partition, _, _ = self._extract_slice_fields(stream_slice=stream_slice)
|
|
440
|
+
# Substreams checkpoint state by marking an entire parent partition as completed so that on the subsequent attempt
|
|
441
|
+
# after a failure, completed parents are skipped and the sync can make progress
|
|
442
|
+
cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition))
|
|
443
|
+
|
|
444
|
+
# Always return an empty generator just in case no records were ever yielded
|
|
445
|
+
yield from []
|
|
446
|
+
|
|
447
|
+
def _read_single_page(
|
|
448
|
+
self,
|
|
449
|
+
records_generator_fn: Callable[
|
|
450
|
+
[
|
|
451
|
+
requests.PreparedRequest,
|
|
452
|
+
requests.Response,
|
|
453
|
+
Mapping[str, Any],
|
|
454
|
+
Optional[Mapping[str, Any]],
|
|
455
|
+
],
|
|
456
|
+
Iterable[StreamData],
|
|
457
|
+
],
|
|
458
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
|
459
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
|
460
|
+
) -> Iterable[StreamData]:
|
|
461
|
+
partition, cursor_slice, remaining_slice = self._extract_slice_fields(
|
|
462
|
+
stream_slice=stream_slice
|
|
463
|
+
)
|
|
464
|
+
stream_state = stream_state or {}
|
|
465
|
+
next_page_token = cursor_slice or None
|
|
466
|
+
|
|
467
|
+
request, response = self._fetch_next_page(remaining_slice, stream_state, next_page_token)
|
|
468
|
+
yield from records_generator_fn(request, response, stream_state, remaining_slice)
|
|
469
|
+
|
|
470
|
+
next_page_token = self.next_page_token(response) or {
|
|
471
|
+
"__ab_full_refresh_sync_complete": True
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
cursor = self.get_cursor()
|
|
475
|
+
if cursor:
|
|
476
|
+
cursor.close_slice(StreamSlice(cursor_slice=next_page_token, partition=partition))
|
|
477
|
+
|
|
478
|
+
# Always return an empty generator just in case no records were ever yielded
|
|
479
|
+
yield from []
|
|
480
|
+
|
|
481
|
+
@staticmethod
|
|
482
|
+
def _extract_slice_fields(
|
|
483
|
+
stream_slice: Optional[Mapping[str, Any]],
|
|
484
|
+
) -> tuple[Mapping[str, Any], Mapping[str, Any], Mapping[str, Any]]:
|
|
485
|
+
if not stream_slice:
|
|
486
|
+
return {}, {}, {}
|
|
487
|
+
|
|
488
|
+
if isinstance(stream_slice, StreamSlice):
|
|
489
|
+
partition = stream_slice.partition
|
|
490
|
+
cursor_slice = stream_slice.cursor_slice
|
|
491
|
+
remaining = {k: v for k, v in stream_slice.items()}
|
|
492
|
+
else:
|
|
493
|
+
# RFR streams that implement stream_slices() to generate stream slices in the legacy mapping format are converted into a
|
|
494
|
+
# structured stream slice mapping by the LegacyCursorBasedCheckpointReader. The structured mapping object has separate
|
|
495
|
+
# fields for the partition and cursor_slice value
|
|
496
|
+
partition = stream_slice.get("partition", {})
|
|
497
|
+
cursor_slice = stream_slice.get("cursor_slice", {})
|
|
498
|
+
remaining = {
|
|
499
|
+
key: val
|
|
500
|
+
for key, val in stream_slice.items()
|
|
501
|
+
if key != "partition" and key != "cursor_slice"
|
|
502
|
+
}
|
|
503
|
+
return partition, cursor_slice, remaining
|
|
504
|
+
|
|
505
|
+
def _fetch_next_page(
|
|
506
|
+
self,
|
|
507
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
|
508
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
|
509
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
|
510
|
+
) -> Tuple[requests.PreparedRequest, requests.Response]:
|
|
511
|
+
request, response = self._http_client.send_request(
|
|
512
|
+
http_method=self.http_method,
|
|
513
|
+
url=self._join_url(
|
|
514
|
+
self.url_base,
|
|
515
|
+
self.path(
|
|
516
|
+
stream_state=stream_state,
|
|
517
|
+
stream_slice=stream_slice,
|
|
518
|
+
next_page_token=next_page_token,
|
|
519
|
+
),
|
|
520
|
+
),
|
|
521
|
+
request_kwargs=self.request_kwargs(
|
|
522
|
+
stream_state=stream_state,
|
|
523
|
+
stream_slice=stream_slice,
|
|
524
|
+
next_page_token=next_page_token,
|
|
525
|
+
),
|
|
526
|
+
headers=self.request_headers(
|
|
527
|
+
stream_state=stream_state,
|
|
528
|
+
stream_slice=stream_slice,
|
|
529
|
+
next_page_token=next_page_token,
|
|
530
|
+
),
|
|
531
|
+
params=self.request_params(
|
|
532
|
+
stream_state=stream_state,
|
|
533
|
+
stream_slice=stream_slice,
|
|
534
|
+
next_page_token=next_page_token,
|
|
535
|
+
),
|
|
536
|
+
json=self.request_body_json(
|
|
537
|
+
stream_state=stream_state,
|
|
538
|
+
stream_slice=stream_slice,
|
|
539
|
+
next_page_token=next_page_token,
|
|
540
|
+
),
|
|
541
|
+
data=self.request_body_data(
|
|
542
|
+
stream_state=stream_state,
|
|
543
|
+
stream_slice=stream_slice,
|
|
544
|
+
next_page_token=next_page_token,
|
|
545
|
+
),
|
|
546
|
+
dedupe_query_params=True,
|
|
547
|
+
log_formatter=self.get_log_formatter(),
|
|
548
|
+
exit_on_rate_limit=self.exit_on_rate_limit,
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
return request, response
|
|
552
|
+
|
|
553
|
+
def get_log_formatter(self) -> Optional[Callable[[requests.Response], Any]]:
|
|
554
|
+
"""
|
|
555
|
+
|
|
556
|
+
:return Optional[Callable[[requests.Response], Any]]: Function that will be used in logging inside HttpClient
|
|
557
|
+
"""
|
|
558
|
+
return None
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
class HttpSubStream(HttpStream, ABC):
|
|
562
|
+
def __init__(self, parent: HttpStream, **kwargs: Any):
|
|
563
|
+
"""
|
|
564
|
+
:param parent: should be the instance of HttpStream class
|
|
565
|
+
"""
|
|
566
|
+
super().__init__(**kwargs)
|
|
567
|
+
self.parent = parent
|
|
568
|
+
self.has_multiple_slices = (
|
|
569
|
+
True # Substreams are based on parent records which implies there are multiple slices
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
# There are three conditions that dictate if RFR should automatically be applied to a stream
|
|
573
|
+
# 1. Streams that explicitly initialize their own cursor should defer to it and not automatically apply RFR
|
|
574
|
+
# 2. Streams with at least one cursor_field are incremental and thus a superior sync to RFR.
|
|
575
|
+
# 3. Streams overriding read_records() do not guarantee that they will call the parent implementation which can perform
|
|
576
|
+
# per-page checkpointing so RFR is only supported if a stream use the default `HttpStream.read_records()` method
|
|
577
|
+
if (
|
|
578
|
+
not self.cursor
|
|
579
|
+
and len(self.cursor_field) == 0
|
|
580
|
+
and type(self).read_records is HttpStream.read_records
|
|
581
|
+
):
|
|
582
|
+
self.cursor = SubstreamResumableFullRefreshCursor()
|
|
583
|
+
|
|
584
|
+
def stream_slices(
|
|
585
|
+
self,
|
|
586
|
+
sync_mode: SyncMode,
|
|
587
|
+
cursor_field: Optional[List[str]] = None,
|
|
588
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
|
589
|
+
) -> Iterable[Optional[Mapping[str, Any]]]:
|
|
590
|
+
# read_stateless() assumes the parent is not concurrent. This is currently okay since the concurrent CDK does
|
|
591
|
+
# not support either substreams or RFR, but something that needs to be considered once we do
|
|
592
|
+
for parent_record in self.parent.read_only_records(stream_state):
|
|
593
|
+
# Skip non-records (eg AirbyteLogMessage)
|
|
594
|
+
if isinstance(parent_record, AirbyteMessage):
|
|
595
|
+
if parent_record.type == MessageType.RECORD:
|
|
596
|
+
parent_record = parent_record.record.data # type: ignore [assignment, union-attr] # Incorrect type for assignment
|
|
597
|
+
else:
|
|
598
|
+
continue
|
|
599
|
+
elif isinstance(parent_record, Record):
|
|
600
|
+
parent_record = parent_record.data
|
|
601
|
+
yield {"parent": parent_record}
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
@deprecated(
|
|
605
|
+
"Deprecated as of CDK version 3.0.0."
|
|
606
|
+
"You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead."
|
|
607
|
+
)
|
|
608
|
+
class HttpStreamAdapterBackoffStrategy(BackoffStrategy):
|
|
609
|
+
def __init__(self, stream: HttpStream):
|
|
610
|
+
self.stream = stream
|
|
611
|
+
|
|
612
|
+
def backoff_time(
|
|
613
|
+
self,
|
|
614
|
+
response_or_exception: Optional[Union[requests.Response, requests.RequestException]],
|
|
615
|
+
attempt_count: int,
|
|
616
|
+
) -> Optional[float]:
|
|
617
|
+
return self.stream.backoff_time(response_or_exception) # type: ignore # noqa # HttpStream.backoff_time has been deprecated
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
@deprecated(
|
|
621
|
+
"Deprecated as of CDK version 3.0.0. "
|
|
622
|
+
"You should set error_handler explicitly in HttpStream.get_error_handler() instead."
|
|
623
|
+
)
|
|
624
|
+
class HttpStreamAdapterHttpStatusErrorHandler(HttpStatusErrorHandler):
|
|
625
|
+
def __init__(self, stream: HttpStream, **kwargs): # type: ignore # noqa
|
|
626
|
+
self.stream = stream
|
|
627
|
+
super().__init__(**kwargs)
|
|
628
|
+
|
|
629
|
+
def interpret_response(
|
|
630
|
+
self, response_or_exception: Optional[Union[requests.Response, Exception]] = None
|
|
631
|
+
) -> ErrorResolution:
|
|
632
|
+
if isinstance(response_or_exception, Exception):
|
|
633
|
+
return super().interpret_response(response_or_exception)
|
|
634
|
+
elif isinstance(response_or_exception, requests.Response):
|
|
635
|
+
should_retry = self.stream.should_retry(response_or_exception) # type: ignore # noqa
|
|
636
|
+
if should_retry:
|
|
637
|
+
if response_or_exception.status_code == 429:
|
|
638
|
+
return ErrorResolution(
|
|
639
|
+
response_action=ResponseAction.RATE_LIMITED,
|
|
640
|
+
failure_type=FailureType.transient_error,
|
|
641
|
+
error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
|
|
642
|
+
)
|
|
643
|
+
return ErrorResolution(
|
|
644
|
+
response_action=ResponseAction.RETRY,
|
|
645
|
+
failure_type=FailureType.transient_error,
|
|
646
|
+
error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
|
|
647
|
+
)
|
|
648
|
+
else:
|
|
649
|
+
if response_or_exception.ok:
|
|
650
|
+
return ErrorResolution(
|
|
651
|
+
response_action=ResponseAction.SUCCESS,
|
|
652
|
+
failure_type=None,
|
|
653
|
+
error_message=None,
|
|
654
|
+
)
|
|
655
|
+
if self.stream.raise_on_http_errors:
|
|
656
|
+
return ErrorResolution(
|
|
657
|
+
response_action=ResponseAction.FAIL,
|
|
658
|
+
failure_type=FailureType.transient_error,
|
|
659
|
+
error_message=f"Response status code: {response_or_exception.status_code}. Unexpected error. Failed.",
|
|
660
|
+
)
|
|
661
|
+
else:
|
|
662
|
+
return ErrorResolution(
|
|
663
|
+
response_action=ResponseAction.IGNORE,
|
|
664
|
+
failure_type=FailureType.transient_error,
|
|
665
|
+
error_message=f"Response status code: {response_or_exception.status_code}. Ignoring...",
|
|
666
|
+
)
|
|
667
|
+
else:
|
|
668
|
+
self._logger.error(f"Received unexpected response type: {type(response_or_exception)}")
|
|
669
|
+
return ErrorResolution(
|
|
670
|
+
response_action=ResponseAction.FAIL,
|
|
671
|
+
failure_type=FailureType.system_error,
|
|
672
|
+
error_message=f"Received unexpected response type: {type(response_or_exception)}",
|
|
673
|
+
)
|