airbyte-cdk 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +358 -0
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +104 -0
- airbyte_cdk/connector.py +123 -0
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/__init__.py +3 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
- airbyte_cdk/connector_builder/main.py +107 -0
- airbyte_cdk/connector_builder/models.py +73 -0
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +83 -0
- airbyte_cdk/destinations/__init__.py +8 -0
- airbyte_cdk/destinations/destination.py +154 -0
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
- airbyte_cdk/destinations/vector_db_based/config.py +298 -0
- airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
- airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
- airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
- airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
- airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
- airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
- airbyte_cdk/entrypoint.py +414 -0
- airbyte_cdk/exception_handler.py +56 -0
- airbyte_cdk/logger.py +109 -0
- airbyte_cdk/models/__init__.py +72 -0
- airbyte_cdk/models/airbyte_protocol.py +88 -0
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/well_known_types.py +5 -0
- airbyte_cdk/py.typed +0 -0
- airbyte_cdk/sources/__init__.py +26 -0
- airbyte_cdk/sources/abstract_source.py +326 -0
- airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
- airbyte_cdk/sources/config.py +27 -0
- airbyte_cdk/sources/connector_state_manager.py +161 -0
- airbyte_cdk/sources/declarative/__init__.py +3 -0
- airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
- airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
- airbyte_cdk/sources/declarative/auth/token.py +267 -0
- airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
- airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
- airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
- airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
- airbyte_cdk/sources/declarative/declarative_source.py +36 -0
- airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
- airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
- airbyte_cdk/sources/declarative/exceptions.py +9 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
- airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
- airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
- airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
- airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
- airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
- airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
- airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
- airbyte_cdk/sources/declarative/spec/spec.py +48 -0
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
- airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
- airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
- airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
- airbyte_cdk/sources/declarative/types.py +25 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
- airbyte_cdk/sources/file_based/config/__init__.py +0 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
- airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
- airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
- airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
- airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
- airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
- airbyte_cdk/sources/file_based/exceptions.py +159 -0
- airbyte_cdk/sources/file_based/file_based_source.py +466 -0
- airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
- airbyte_cdk/sources/file_based/file_record_data.py +22 -0
- airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
- airbyte_cdk/sources/file_based/remote_file.py +18 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
- airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
- airbyte_cdk/sources/file_based/types.py +10 -0
- airbyte_cdk/sources/http_config.py +10 -0
- airbyte_cdk/sources/http_logger.py +55 -0
- airbyte_cdk/sources/message/__init__.py +19 -0
- airbyte_cdk/sources/message/repository.py +137 -0
- airbyte_cdk/sources/source.py +95 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/__init__.py +8 -0
- airbyte_cdk/sources/streams/availability_strategy.py +84 -0
- airbyte_cdk/sources/streams/call_rate.py +704 -0
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
- airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
- airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
- airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/core.py +703 -0
- airbyte_cdk/sources/streams/http/__init__.py +10 -0
- airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +61 -0
- airbyte_cdk/sources/streams/http/http.py +673 -0
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/sources/streams/utils/__init__.py +3 -0
- airbyte_cdk/sources/types.py +169 -0
- airbyte_cdk/sources/utils/__init__.py +7 -0
- airbyte_cdk/sources/utils/casing.py +12 -0
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +53 -0
- airbyte_cdk/sources/utils/schema_helpers.py +230 -0
- airbyte_cdk/sources/utils/slice_logger.py +57 -0
- airbyte_cdk/sources/utils/transform.py +277 -0
- airbyte_cdk/sources/utils/types.py +7 -0
- airbyte_cdk/sql/__init__.py +0 -0
- airbyte_cdk/sql/_util/__init__.py +0 -0
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/__init__.py +7 -0
- airbyte_cdk/test/catalog_builder.py +81 -0
- airbyte_cdk/test/entrypoint_wrapper.py +250 -0
- airbyte_cdk/test/mock_http/__init__.py +6 -0
- airbyte_cdk/test/mock_http/matcher.py +41 -0
- airbyte_cdk/test/mock_http/mocker.py +185 -0
- airbyte_cdk/test/mock_http/request.py +103 -0
- airbyte_cdk/test/mock_http/response.py +28 -0
- airbyte_cdk/test/mock_http/response_builder.py +237 -0
- airbyte_cdk/test/state_builder.py +33 -0
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +10 -0
- airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
- airbyte_cdk/utils/analytics_message.py +25 -0
- airbyte_cdk/utils/constants.py +5 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/event_timing.py +85 -0
- airbyte_cdk/utils/is_cloud_environment.py +18 -0
- airbyte_cdk/utils/mapping_helpers.py +162 -0
- airbyte_cdk/utils/message_utils.py +26 -0
- airbyte_cdk/utils/oneof_option_config.py +33 -0
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +270 -0
- airbyte_cdk/utils/slice_hasher.py +37 -0
- airbyte_cdk/utils/spec_schema_transformations.py +26 -0
- airbyte_cdk/utils/stream_status_utils.py +43 -0
- airbyte_cdk/utils/traced_exception.py +145 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
- airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
- airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
- airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
- airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,704 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import abc
|
|
6
|
+
import dataclasses
|
|
7
|
+
import datetime
|
|
8
|
+
import logging
|
|
9
|
+
import re
|
|
10
|
+
import time
|
|
11
|
+
from datetime import timedelta
|
|
12
|
+
from threading import RLock
|
|
13
|
+
from typing import TYPE_CHECKING, Any, Mapping, Optional
|
|
14
|
+
from urllib import parse
|
|
15
|
+
|
|
16
|
+
import requests
|
|
17
|
+
import requests_cache
|
|
18
|
+
from pyrate_limiter import InMemoryBucket, Limiter, RateItem, TimeClock
|
|
19
|
+
from pyrate_limiter import Rate as PyRateRate
|
|
20
|
+
from pyrate_limiter.exceptions import BucketFullException
|
|
21
|
+
|
|
22
|
+
# prevents mypy from complaining about missing session attributes in LimiterMixin
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
MIXIN_BASE = requests.Session
|
|
25
|
+
else:
|
|
26
|
+
MIXIN_BASE = object
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger("airbyte")
|
|
29
|
+
logging.getLogger("pyrate_limiter").setLevel(logging.WARNING)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclasses.dataclass
|
|
33
|
+
class Rate:
|
|
34
|
+
"""Call rate limit"""
|
|
35
|
+
|
|
36
|
+
limit: int
|
|
37
|
+
interval: timedelta
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CallRateLimitHit(Exception):
|
|
41
|
+
def __init__(self, error: str, item: Any, weight: int, rate: str, time_to_wait: timedelta):
|
|
42
|
+
"""Constructor
|
|
43
|
+
|
|
44
|
+
:param error: error message
|
|
45
|
+
:param item: object passed into acquire_call
|
|
46
|
+
:param weight: how many credits were requested
|
|
47
|
+
:param rate: string representation of the rate violated
|
|
48
|
+
:param time_to_wait: how long should wait util more call will be available
|
|
49
|
+
"""
|
|
50
|
+
self.item = item
|
|
51
|
+
self.weight = weight
|
|
52
|
+
self.rate = rate
|
|
53
|
+
self.time_to_wait = time_to_wait
|
|
54
|
+
super().__init__(error)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class AbstractCallRatePolicy(abc.ABC):
|
|
58
|
+
"""Call rate policy interface.
|
|
59
|
+
Should be configurable with different rules, like N per M for endpoint X. Endpoint X is matched with APIBudget.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
@abc.abstractmethod
|
|
63
|
+
def matches(self, request: Any) -> bool:
|
|
64
|
+
"""Tells if this policy matches specific request and should apply to it
|
|
65
|
+
|
|
66
|
+
:param request:
|
|
67
|
+
:return: True if policy should apply to this request, False - otherwise
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
@abc.abstractmethod
|
|
71
|
+
def try_acquire(self, request: Any, weight: int) -> None:
|
|
72
|
+
"""Try to acquire request
|
|
73
|
+
|
|
74
|
+
:param request: a request object representing a single call to API
|
|
75
|
+
:param weight: number of requests to deduct from credit
|
|
76
|
+
:return:
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
@abc.abstractmethod
|
|
80
|
+
def update(
|
|
81
|
+
self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Update call rate counting with current values
|
|
84
|
+
|
|
85
|
+
:param available_calls:
|
|
86
|
+
:param call_reset_ts:
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class RequestMatcher(abc.ABC):
|
|
91
|
+
"""Callable that help to match a request object with call rate policies."""
|
|
92
|
+
|
|
93
|
+
@abc.abstractmethod
|
|
94
|
+
def __call__(self, request: Any) -> bool:
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
:param request:
|
|
98
|
+
:return: True if matches the provided request object, False - otherwise
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class HttpRequestMatcher(RequestMatcher):
|
|
103
|
+
"""Simple implementation of RequestMatcher for HTTP requests using HttpRequestRegexMatcher under the hood."""
|
|
104
|
+
|
|
105
|
+
def __init__(
|
|
106
|
+
self,
|
|
107
|
+
method: Optional[str] = None,
|
|
108
|
+
url: Optional[str] = None,
|
|
109
|
+
params: Optional[Mapping[str, Any]] = None,
|
|
110
|
+
headers: Optional[Mapping[str, Any]] = None,
|
|
111
|
+
):
|
|
112
|
+
"""Constructor
|
|
113
|
+
|
|
114
|
+
:param method: HTTP method (e.g., "GET", "POST").
|
|
115
|
+
:param url: Full URL to match.
|
|
116
|
+
:param params: Dictionary of query parameters to match.
|
|
117
|
+
:param headers: Dictionary of headers to match.
|
|
118
|
+
"""
|
|
119
|
+
# Parse the URL to extract the base and path
|
|
120
|
+
if url:
|
|
121
|
+
parsed_url = parse.urlsplit(url)
|
|
122
|
+
url_base = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
|
123
|
+
url_path = parsed_url.path if parsed_url.path != "/" else None
|
|
124
|
+
else:
|
|
125
|
+
url_base = None
|
|
126
|
+
url_path = None
|
|
127
|
+
|
|
128
|
+
# Use HttpRequestRegexMatcher under the hood
|
|
129
|
+
self._regex_matcher = HttpRequestRegexMatcher(
|
|
130
|
+
method=method,
|
|
131
|
+
url_base=url_base,
|
|
132
|
+
url_path_pattern=re.escape(url_path) if url_path else None,
|
|
133
|
+
params=params,
|
|
134
|
+
headers=headers,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
def __call__(self, request: Any) -> bool:
|
|
138
|
+
"""
|
|
139
|
+
:param request: A requests.Request or requests.PreparedRequest instance.
|
|
140
|
+
:return: True if the request matches all provided criteria; False otherwise.
|
|
141
|
+
"""
|
|
142
|
+
return self._regex_matcher(request)
|
|
143
|
+
|
|
144
|
+
def __str__(self) -> str:
|
|
145
|
+
return (
|
|
146
|
+
f"HttpRequestMatcher(method={self._regex_matcher._method}, "
|
|
147
|
+
f"url={self._regex_matcher._url_base}{self._regex_matcher._url_path_pattern.pattern if self._regex_matcher._url_path_pattern else ''}, "
|
|
148
|
+
f"params={self._regex_matcher._params}, headers={self._regex_matcher._headers})"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class HttpRequestRegexMatcher(RequestMatcher):
|
|
153
|
+
"""
|
|
154
|
+
Extended RequestMatcher for HTTP requests that supports matching on:
|
|
155
|
+
- HTTP method (case-insensitive)
|
|
156
|
+
- URL base (scheme + netloc) optionally
|
|
157
|
+
- URL path pattern (a regex applied to the path portion of the URL)
|
|
158
|
+
- Query parameters (must be present)
|
|
159
|
+
- Headers (header names compared case-insensitively)
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
def __init__(
|
|
163
|
+
self,
|
|
164
|
+
method: Optional[str] = None,
|
|
165
|
+
url_base: Optional[str] = None,
|
|
166
|
+
url_path_pattern: Optional[str] = None,
|
|
167
|
+
params: Optional[Mapping[str, Any]] = None,
|
|
168
|
+
headers: Optional[Mapping[str, Any]] = None,
|
|
169
|
+
):
|
|
170
|
+
"""
|
|
171
|
+
:param method: HTTP method (e.g. "GET", "POST"); compared case-insensitively.
|
|
172
|
+
:param url_base: Base URL (scheme://host) that must match.
|
|
173
|
+
:param url_path_pattern: A regex pattern that will be applied to the path portion of the URL.
|
|
174
|
+
:param params: Dictionary of query parameters that must be present in the request.
|
|
175
|
+
:param headers: Dictionary of headers that must be present (header keys are compared case-insensitively).
|
|
176
|
+
"""
|
|
177
|
+
self._method = method.upper() if method else None
|
|
178
|
+
|
|
179
|
+
# Normalize the url_base if provided: remove trailing slash.
|
|
180
|
+
self._url_base = url_base.rstrip("/") if url_base else None
|
|
181
|
+
|
|
182
|
+
# Compile the URL path pattern if provided.
|
|
183
|
+
self._url_path_pattern = re.compile(url_path_pattern) if url_path_pattern else None
|
|
184
|
+
|
|
185
|
+
# Normalize query parameters to strings.
|
|
186
|
+
self._params = {str(k): str(v) for k, v in (params or {}).items()}
|
|
187
|
+
|
|
188
|
+
# Normalize header keys to lowercase.
|
|
189
|
+
self._headers = {str(k).lower(): str(v) for k, v in (headers or {}).items()}
|
|
190
|
+
|
|
191
|
+
@staticmethod
|
|
192
|
+
def _match_dict(obj: Mapping[str, Any], pattern: Mapping[str, Any]) -> bool:
|
|
193
|
+
"""Check that every key/value in the pattern exists in the object."""
|
|
194
|
+
return pattern.items() <= obj.items()
|
|
195
|
+
|
|
196
|
+
def __call__(self, request: Any) -> bool:
|
|
197
|
+
"""
|
|
198
|
+
:param request: A requests.Request or requests.PreparedRequest instance.
|
|
199
|
+
:return: True if the request matches all provided criteria; False otherwise.
|
|
200
|
+
"""
|
|
201
|
+
# Prepare the request (if needed) and extract the URL details.
|
|
202
|
+
if isinstance(request, requests.Request):
|
|
203
|
+
prepared_request = request.prepare()
|
|
204
|
+
elif isinstance(request, requests.PreparedRequest):
|
|
205
|
+
prepared_request = request
|
|
206
|
+
else:
|
|
207
|
+
return False
|
|
208
|
+
|
|
209
|
+
# Check HTTP method.
|
|
210
|
+
if self._method is not None:
|
|
211
|
+
if prepared_request.method != self._method:
|
|
212
|
+
return False
|
|
213
|
+
|
|
214
|
+
# Parse the URL.
|
|
215
|
+
parsed_url = parse.urlsplit(prepared_request.url)
|
|
216
|
+
# Reconstruct the base: scheme://netloc
|
|
217
|
+
request_url_base = f"{str(parsed_url.scheme)}://{str(parsed_url.netloc)}"
|
|
218
|
+
# The path (without query parameters)
|
|
219
|
+
request_path = str(parsed_url.path).rstrip("/")
|
|
220
|
+
|
|
221
|
+
# If a base URL is provided, check that it matches.
|
|
222
|
+
if self._url_base is not None:
|
|
223
|
+
if request_url_base != self._url_base:
|
|
224
|
+
return False
|
|
225
|
+
|
|
226
|
+
# If a URL path pattern is provided, ensure the path matches the regex.
|
|
227
|
+
if self._url_path_pattern is not None:
|
|
228
|
+
if not self._url_path_pattern.search(request_path):
|
|
229
|
+
return False
|
|
230
|
+
|
|
231
|
+
# Check query parameters.
|
|
232
|
+
if self._params:
|
|
233
|
+
query_params = dict(parse.parse_qsl(str(parsed_url.query)))
|
|
234
|
+
if not self._match_dict(query_params, self._params):
|
|
235
|
+
return False
|
|
236
|
+
|
|
237
|
+
# Check headers (normalize keys to lower-case).
|
|
238
|
+
if self._headers:
|
|
239
|
+
req_headers = {k.lower(): v for k, v in prepared_request.headers.items()}
|
|
240
|
+
if not self._match_dict(req_headers, self._headers):
|
|
241
|
+
return False
|
|
242
|
+
|
|
243
|
+
return True
|
|
244
|
+
|
|
245
|
+
def __str__(self) -> str:
|
|
246
|
+
regex = self._url_path_pattern.pattern if self._url_path_pattern else None
|
|
247
|
+
return (
|
|
248
|
+
f"HttpRequestRegexMatcher(method={self._method}, url_base={self._url_base}, "
|
|
249
|
+
f"url_path_pattern={regex}, params={self._params}, headers={self._headers})"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class BaseCallRatePolicy(AbstractCallRatePolicy, abc.ABC):
|
|
254
|
+
def __init__(self, matchers: list[RequestMatcher]):
|
|
255
|
+
self._matchers = matchers
|
|
256
|
+
|
|
257
|
+
def matches(self, request: Any) -> bool:
|
|
258
|
+
"""Tell if this policy matches specific request and should apply to it
|
|
259
|
+
|
|
260
|
+
:param request:
|
|
261
|
+
:return: True if policy should apply to this request, False - otherwise
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
if not self._matchers:
|
|
265
|
+
return True
|
|
266
|
+
return any(matcher(request) for matcher in self._matchers)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class UnlimitedCallRatePolicy(BaseCallRatePolicy):
|
|
270
|
+
"""
|
|
271
|
+
This policy is for explicit unlimited call rates.
|
|
272
|
+
It can be used when we want to match a specific group of requests and don't apply any limits.
|
|
273
|
+
|
|
274
|
+
Example:
|
|
275
|
+
|
|
276
|
+
APICallBudget(
|
|
277
|
+
[
|
|
278
|
+
UnlimitedCallRatePolicy(
|
|
279
|
+
matchers=[HttpRequestMatcher(url="/some/method", headers={"sandbox": true})],
|
|
280
|
+
),
|
|
281
|
+
FixedWindowCallRatePolicy(
|
|
282
|
+
matchers=[HttpRequestMatcher(url="/some/method")],
|
|
283
|
+
next_reset_ts=datetime.now(),
|
|
284
|
+
period=timedelta(hours=1)
|
|
285
|
+
call_limit=1000,
|
|
286
|
+
),
|
|
287
|
+
]
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
The code above will limit all calls to /some/method except calls that have header sandbox=True
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
def try_acquire(self, request: Any, weight: int) -> None:
|
|
294
|
+
"""Do nothing"""
|
|
295
|
+
|
|
296
|
+
def update(
|
|
297
|
+
self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
|
|
298
|
+
) -> None:
|
|
299
|
+
"""Do nothing"""
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
class FixedWindowCallRatePolicy(BaseCallRatePolicy):
|
|
303
|
+
def __init__(
|
|
304
|
+
self,
|
|
305
|
+
next_reset_ts: datetime.datetime,
|
|
306
|
+
period: timedelta,
|
|
307
|
+
call_limit: int,
|
|
308
|
+
matchers: list[RequestMatcher],
|
|
309
|
+
):
|
|
310
|
+
"""A policy that allows {call_limit} calls within a {period} time interval
|
|
311
|
+
|
|
312
|
+
:param next_reset_ts: next call rate reset time point
|
|
313
|
+
:param period: call rate reset period
|
|
314
|
+
:param call_limit:
|
|
315
|
+
:param matchers:
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
self._next_reset_ts = next_reset_ts
|
|
319
|
+
self._offset = period
|
|
320
|
+
self._call_limit = call_limit
|
|
321
|
+
self._calls_num = 0
|
|
322
|
+
self._lock = RLock()
|
|
323
|
+
super().__init__(matchers=matchers)
|
|
324
|
+
|
|
325
|
+
def try_acquire(self, request: Any, weight: int) -> None:
|
|
326
|
+
if weight > self._call_limit:
|
|
327
|
+
raise ValueError("Weight can not exceed the call limit")
|
|
328
|
+
if not self.matches(request):
|
|
329
|
+
raise ValueError("Request does not match the policy")
|
|
330
|
+
|
|
331
|
+
with self._lock:
|
|
332
|
+
self._update_current_window()
|
|
333
|
+
|
|
334
|
+
if self._calls_num + weight > self._call_limit:
|
|
335
|
+
reset_in = self._next_reset_ts - datetime.datetime.now()
|
|
336
|
+
error_message = (
|
|
337
|
+
f"reached maximum number of allowed calls {self._call_limit} "
|
|
338
|
+
f"per {self._offset} interval, next reset in {reset_in}."
|
|
339
|
+
)
|
|
340
|
+
raise CallRateLimitHit(
|
|
341
|
+
error=error_message,
|
|
342
|
+
item=request,
|
|
343
|
+
weight=weight,
|
|
344
|
+
rate=f"{self._call_limit} per {self._offset}",
|
|
345
|
+
time_to_wait=reset_in,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
self._calls_num += weight
|
|
349
|
+
|
|
350
|
+
def __str__(self) -> str:
|
|
351
|
+
matcher_str = ", ".join(f"{matcher}" for matcher in self._matchers)
|
|
352
|
+
return (
|
|
353
|
+
f"FixedWindowCallRatePolicy(call_limit={self._call_limit}, period={self._offset}, "
|
|
354
|
+
f"calls_used={self._calls_num}, next_reset={self._next_reset_ts}, "
|
|
355
|
+
f"matchers=[{matcher_str}])"
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
def update(
|
|
359
|
+
self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
|
|
360
|
+
) -> None:
|
|
361
|
+
"""Update call rate counters, by default, only reacts to decreasing updates of available_calls and changes to call_reset_ts.
|
|
362
|
+
We ignore updates with available_calls > current_available_calls to support call rate limits that are lower than API limits.
|
|
363
|
+
|
|
364
|
+
:param available_calls:
|
|
365
|
+
:param call_reset_ts:
|
|
366
|
+
"""
|
|
367
|
+
with self._lock:
|
|
368
|
+
self._update_current_window()
|
|
369
|
+
current_available_calls = self._call_limit - self._calls_num
|
|
370
|
+
|
|
371
|
+
if available_calls is not None and current_available_calls > available_calls:
|
|
372
|
+
logger.debug(
|
|
373
|
+
"got rate limit update from api, adjusting available calls from %s to %s",
|
|
374
|
+
current_available_calls,
|
|
375
|
+
available_calls,
|
|
376
|
+
)
|
|
377
|
+
self._calls_num = self._call_limit - available_calls
|
|
378
|
+
|
|
379
|
+
if call_reset_ts is not None and call_reset_ts != self._next_reset_ts:
|
|
380
|
+
logger.debug(
|
|
381
|
+
"got rate limit update from api, adjusting reset time from %s to %s",
|
|
382
|
+
self._next_reset_ts,
|
|
383
|
+
call_reset_ts,
|
|
384
|
+
)
|
|
385
|
+
self._next_reset_ts = call_reset_ts
|
|
386
|
+
|
|
387
|
+
def _update_current_window(self) -> None:
|
|
388
|
+
now = datetime.datetime.now()
|
|
389
|
+
if now > self._next_reset_ts:
|
|
390
|
+
logger.debug("started new window, %s calls available now", self._call_limit)
|
|
391
|
+
self._next_reset_ts = self._next_reset_ts + self._offset
|
|
392
|
+
self._calls_num = 0
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
class MovingWindowCallRatePolicy(BaseCallRatePolicy):
|
|
396
|
+
"""
|
|
397
|
+
Policy to control requests rate implemented on top of PyRateLimiter lib.
|
|
398
|
+
The main difference between this policy and FixedWindowCallRatePolicy is that the rate-limiting window
|
|
399
|
+
is moving along requests that we made, and there is no moment when we reset an available number of calls.
|
|
400
|
+
This strategy requires saving of timestamps of all requests within a window.
|
|
401
|
+
"""
|
|
402
|
+
|
|
403
|
+
def __init__(self, rates: list[Rate], matchers: list[RequestMatcher]):
|
|
404
|
+
"""Constructor
|
|
405
|
+
|
|
406
|
+
:param rates: list of rates, the order is important and must be ascending
|
|
407
|
+
:param matchers:
|
|
408
|
+
"""
|
|
409
|
+
if not rates:
|
|
410
|
+
raise ValueError("The list of rates can not be empty")
|
|
411
|
+
pyrate_rates = [
|
|
412
|
+
PyRateRate(limit=rate.limit, interval=int(rate.interval.total_seconds() * 1000))
|
|
413
|
+
for rate in rates
|
|
414
|
+
]
|
|
415
|
+
self._bucket = InMemoryBucket(pyrate_rates)
|
|
416
|
+
# Limiter will create the background task that clears old requests in the bucket
|
|
417
|
+
self._limiter = Limiter(self._bucket)
|
|
418
|
+
super().__init__(matchers=matchers)
|
|
419
|
+
|
|
420
|
+
def try_acquire(self, request: Any, weight: int) -> None:
|
|
421
|
+
if not self.matches(request):
|
|
422
|
+
raise ValueError("Request does not match the policy")
|
|
423
|
+
|
|
424
|
+
try:
|
|
425
|
+
self._limiter.try_acquire(request, weight=weight)
|
|
426
|
+
except BucketFullException as exc:
|
|
427
|
+
item = self._limiter.bucket_factory.wrap_item(request, weight)
|
|
428
|
+
assert isinstance(item, RateItem)
|
|
429
|
+
|
|
430
|
+
with self._limiter.lock:
|
|
431
|
+
time_to_wait = self._bucket.waiting(item)
|
|
432
|
+
assert isinstance(time_to_wait, int)
|
|
433
|
+
|
|
434
|
+
raise CallRateLimitHit(
|
|
435
|
+
error=str(exc.meta_info["error"]),
|
|
436
|
+
item=request,
|
|
437
|
+
weight=int(exc.meta_info["weight"]),
|
|
438
|
+
rate=str(exc.meta_info["rate"]),
|
|
439
|
+
time_to_wait=timedelta(milliseconds=time_to_wait),
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
def update(
|
|
443
|
+
self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
|
|
444
|
+
) -> None:
|
|
445
|
+
"""Adjust call bucket to reflect the state of the API server
|
|
446
|
+
|
|
447
|
+
:param available_calls:
|
|
448
|
+
:param call_reset_ts:
|
|
449
|
+
:return:
|
|
450
|
+
"""
|
|
451
|
+
if (
|
|
452
|
+
available_calls is not None and call_reset_ts is None
|
|
453
|
+
): # we do our best to sync buckets with API
|
|
454
|
+
if available_calls == 0:
|
|
455
|
+
with self._limiter.lock:
|
|
456
|
+
items_to_add = self._bucket.count() < self._bucket.rates[0].limit
|
|
457
|
+
if items_to_add > 0:
|
|
458
|
+
now: int = TimeClock().now() # type: ignore[no-untyped-call]
|
|
459
|
+
self._bucket.put(RateItem(name="dummy", timestamp=now, weight=items_to_add))
|
|
460
|
+
# TODO: add support if needed, it might be that it is not possible to make a good solution for this case
|
|
461
|
+
# if available_calls is not None and call_reset_ts is not None:
|
|
462
|
+
# ts = call_reset_ts.timestamp()
|
|
463
|
+
|
|
464
|
+
def __str__(self) -> str:
|
|
465
|
+
"""Return a human-friendly description of the moving window rate policy for logging purposes."""
|
|
466
|
+
rates_info = ", ".join(
|
|
467
|
+
f"{rate.limit} per {timedelta(milliseconds=rate.interval)}"
|
|
468
|
+
for rate in self._bucket.rates
|
|
469
|
+
)
|
|
470
|
+
current_bucket_count = self._bucket.count()
|
|
471
|
+
matcher_str = ", ".join(f"{matcher}" for matcher in self._matchers)
|
|
472
|
+
return (
|
|
473
|
+
f"MovingWindowCallRatePolicy(rates=[{rates_info}], current_bucket_count={current_bucket_count}, "
|
|
474
|
+
f"matchers=[{matcher_str}])"
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
class AbstractAPIBudget(abc.ABC):
|
|
479
|
+
"""Interface to some API where a client allowed to have N calls per T interval.
|
|
480
|
+
|
|
481
|
+
Important: APIBudget is not doing any API calls, the end user code is responsible to call this interface
|
|
482
|
+
to respect call rate limitation of the API.
|
|
483
|
+
|
|
484
|
+
It supports multiple policies applied to different group of requests. To distinct these groups we use RequestMatchers.
|
|
485
|
+
Individual policy represented by MovingWindowCallRatePolicy and currently supports only moving window strategy.
|
|
486
|
+
"""
|
|
487
|
+
|
|
488
|
+
@abc.abstractmethod
|
|
489
|
+
def acquire_call(
|
|
490
|
+
self, request: Any, block: bool = True, timeout: Optional[float] = None
|
|
491
|
+
) -> None:
|
|
492
|
+
"""Try to get a call from budget, will block by default
|
|
493
|
+
|
|
494
|
+
:param request:
|
|
495
|
+
:param block: when true (default) will block the current thread until call credit is available
|
|
496
|
+
:param timeout: if set will limit maximum time in block, otherwise will wait until credit is available
|
|
497
|
+
:raises: CallRateLimitHit - when no credits left and if timeout was set the waiting time exceed the timeout
|
|
498
|
+
"""
|
|
499
|
+
|
|
500
|
+
@abc.abstractmethod
|
|
501
|
+
def get_matching_policy(self, request: Any) -> Optional[AbstractCallRatePolicy]:
|
|
502
|
+
"""Find matching call rate policy for specific request"""
|
|
503
|
+
|
|
504
|
+
@abc.abstractmethod
|
|
505
|
+
def update_from_response(self, request: Any, response: Any) -> None:
|
|
506
|
+
"""Update budget information based on response from API
|
|
507
|
+
|
|
508
|
+
:param request: the initial request that triggered this response
|
|
509
|
+
:param response: response from the API
|
|
510
|
+
"""
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
class APIBudget(AbstractAPIBudget):
|
|
514
|
+
"""Default APIBudget implementation"""
|
|
515
|
+
|
|
516
|
+
def __init__(
|
|
517
|
+
self, policies: list[AbstractCallRatePolicy], maximum_attempts_to_acquire: int = 100000
|
|
518
|
+
) -> None:
|
|
519
|
+
"""Constructor
|
|
520
|
+
|
|
521
|
+
:param policies: list of policies in this budget
|
|
522
|
+
:param maximum_attempts_to_acquire: number of attempts before throwing hit ratelimit exception, we put some big number here
|
|
523
|
+
to avoid situations when many threads compete with each other for a few lots over a significant amount of time
|
|
524
|
+
"""
|
|
525
|
+
|
|
526
|
+
self._policies = policies
|
|
527
|
+
self._maximum_attempts_to_acquire = maximum_attempts_to_acquire
|
|
528
|
+
|
|
529
|
+
def _extract_endpoint(self, request: Any) -> str:
|
|
530
|
+
"""Extract the endpoint URL from the request if available."""
|
|
531
|
+
endpoint = None
|
|
532
|
+
try:
|
|
533
|
+
# If the request is already a PreparedRequest, it should have a URL.
|
|
534
|
+
if isinstance(request, requests.PreparedRequest):
|
|
535
|
+
endpoint = request.url
|
|
536
|
+
# If it's a requests.Request, we call prepare() to extract the URL.
|
|
537
|
+
elif isinstance(request, requests.Request):
|
|
538
|
+
prepared = request.prepare()
|
|
539
|
+
endpoint = prepared.url
|
|
540
|
+
except Exception as e:
|
|
541
|
+
logger.debug(f"Error extracting endpoint: {e}")
|
|
542
|
+
if endpoint:
|
|
543
|
+
return endpoint
|
|
544
|
+
return "unknown endpoint"
|
|
545
|
+
|
|
546
|
+
def get_matching_policy(self, request: Any) -> Optional[AbstractCallRatePolicy]:
|
|
547
|
+
for policy in self._policies:
|
|
548
|
+
if policy.matches(request):
|
|
549
|
+
return policy
|
|
550
|
+
return None
|
|
551
|
+
|
|
552
|
+
def acquire_call(
|
|
553
|
+
self, request: Any, block: bool = True, timeout: Optional[float] = None
|
|
554
|
+
) -> None:
|
|
555
|
+
"""Try to get a call from budget, will block by default.
|
|
556
|
+
Matchers will be called sequentially in the same order they were added.
|
|
557
|
+
The first matcher that returns True will
|
|
558
|
+
|
|
559
|
+
:param request: the API request
|
|
560
|
+
:param block: when True (default) will block until a call credit is available
|
|
561
|
+
:param timeout: if provided, limits maximum waiting time; otherwise, waits indefinitely
|
|
562
|
+
:raises: CallRateLimitHit if the call credit cannot be acquired within the timeout
|
|
563
|
+
"""
|
|
564
|
+
|
|
565
|
+
policy = self.get_matching_policy(request)
|
|
566
|
+
endpoint = self._extract_endpoint(request)
|
|
567
|
+
if policy:
|
|
568
|
+
logger.debug(f"Acquiring call for endpoint {endpoint} using policy: {policy}")
|
|
569
|
+
self._do_acquire(request=request, policy=policy, block=block, timeout=timeout)
|
|
570
|
+
elif self._policies:
|
|
571
|
+
logger.debug(
|
|
572
|
+
f"No policies matched for endpoint {endpoint} (request: {request}). Allowing call by default."
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
def update_from_response(self, request: Any, response: Any) -> None:
|
|
576
|
+
"""Update budget information based on the API response.
|
|
577
|
+
|
|
578
|
+
:param request: the initial request that triggered this response
|
|
579
|
+
:param response: response from the API
|
|
580
|
+
"""
|
|
581
|
+
pass
|
|
582
|
+
|
|
583
|
+
def _do_acquire(
|
|
584
|
+
self, request: Any, policy: AbstractCallRatePolicy, block: bool, timeout: Optional[float]
|
|
585
|
+
) -> None:
|
|
586
|
+
"""Internal method to try to acquire a call credit.
|
|
587
|
+
|
|
588
|
+
:param request: the API request
|
|
589
|
+
:param policy: the matching rate-limiting policy
|
|
590
|
+
:param block: indicates whether to block until a call credit is available
|
|
591
|
+
:param timeout: maximum time to wait if blocking
|
|
592
|
+
:raises: CallRateLimitHit if unable to acquire a call credit
|
|
593
|
+
"""
|
|
594
|
+
last_exception = None
|
|
595
|
+
endpoint = self._extract_endpoint(request)
|
|
596
|
+
# sometimes we spend all budget before a second attempt, so we have a few more attempts
|
|
597
|
+
for attempt in range(1, self._maximum_attempts_to_acquire):
|
|
598
|
+
try:
|
|
599
|
+
policy.try_acquire(request, weight=1)
|
|
600
|
+
return
|
|
601
|
+
except CallRateLimitHit as exc:
|
|
602
|
+
last_exception = exc
|
|
603
|
+
if block:
|
|
604
|
+
if timeout is not None:
|
|
605
|
+
time_to_wait = min(timedelta(seconds=timeout), exc.time_to_wait)
|
|
606
|
+
else:
|
|
607
|
+
time_to_wait = exc.time_to_wait
|
|
608
|
+
# Ensure we never sleep for a negative duration.
|
|
609
|
+
time_to_wait = max(timedelta(0), time_to_wait)
|
|
610
|
+
logger.debug(
|
|
611
|
+
f"Policy {policy} reached call limit for endpoint {endpoint} ({exc.rate}). "
|
|
612
|
+
f"Sleeping for {time_to_wait} on attempt {attempt}."
|
|
613
|
+
)
|
|
614
|
+
time.sleep(time_to_wait.total_seconds())
|
|
615
|
+
else:
|
|
616
|
+
logger.debug(
|
|
617
|
+
f"Policy {policy} reached call limit for endpoint {endpoint} ({exc.rate}) "
|
|
618
|
+
f"and blocking is disabled."
|
|
619
|
+
)
|
|
620
|
+
raise
|
|
621
|
+
|
|
622
|
+
if last_exception:
|
|
623
|
+
logger.debug(
|
|
624
|
+
f"Exhausted all {self._maximum_attempts_to_acquire} attempts to acquire a call for endpoint {endpoint} "
|
|
625
|
+
f"using policy: {policy}"
|
|
626
|
+
)
|
|
627
|
+
raise last_exception
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
class HttpAPIBudget(APIBudget):
|
|
631
|
+
"""Implementation of AbstractAPIBudget for HTTP"""
|
|
632
|
+
|
|
633
|
+
def __init__(
|
|
634
|
+
self,
|
|
635
|
+
ratelimit_reset_header: str = "ratelimit-reset",
|
|
636
|
+
ratelimit_remaining_header: str = "ratelimit-remaining",
|
|
637
|
+
status_codes_for_ratelimit_hit: list[int] = [429],
|
|
638
|
+
**kwargs: Any,
|
|
639
|
+
):
|
|
640
|
+
"""Constructor
|
|
641
|
+
|
|
642
|
+
:param ratelimit_reset_header: name of the header that has a timestamp of the next reset of call budget
|
|
643
|
+
:param ratelimit_remaining_header: name of the header that has the number of calls left
|
|
644
|
+
:param status_codes_for_ratelimit_hit: list of HTTP status codes that signal about rate limit being hit
|
|
645
|
+
"""
|
|
646
|
+
self._ratelimit_reset_header = ratelimit_reset_header
|
|
647
|
+
self._ratelimit_remaining_header = ratelimit_remaining_header
|
|
648
|
+
self._status_codes_for_ratelimit_hit = status_codes_for_ratelimit_hit
|
|
649
|
+
super().__init__(**kwargs)
|
|
650
|
+
|
|
651
|
+
def update_from_response(self, request: Any, response: Any) -> None:
|
|
652
|
+
policy = self.get_matching_policy(request)
|
|
653
|
+
if not policy:
|
|
654
|
+
return
|
|
655
|
+
|
|
656
|
+
if isinstance(response, requests.Response):
|
|
657
|
+
available_calls = self.get_calls_left_from_response(response)
|
|
658
|
+
reset_ts = self.get_reset_ts_from_response(response)
|
|
659
|
+
policy.update(available_calls=available_calls, call_reset_ts=reset_ts)
|
|
660
|
+
|
|
661
|
+
def get_reset_ts_from_response(
|
|
662
|
+
self, response: requests.Response
|
|
663
|
+
) -> Optional[datetime.datetime]:
|
|
664
|
+
if response.headers.get(self._ratelimit_reset_header):
|
|
665
|
+
return datetime.datetime.fromtimestamp(
|
|
666
|
+
int(response.headers[self._ratelimit_reset_header])
|
|
667
|
+
)
|
|
668
|
+
return None
|
|
669
|
+
|
|
670
|
+
def get_calls_left_from_response(self, response: requests.Response) -> Optional[int]:
|
|
671
|
+
if response.headers.get(self._ratelimit_remaining_header):
|
|
672
|
+
return int(response.headers[self._ratelimit_remaining_header])
|
|
673
|
+
|
|
674
|
+
if response.status_code in self._status_codes_for_ratelimit_hit:
|
|
675
|
+
return 0
|
|
676
|
+
|
|
677
|
+
return None
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
class LimiterMixin(MIXIN_BASE):
|
|
681
|
+
"""Mixin class that adds rate-limiting behavior to requests."""
|
|
682
|
+
|
|
683
|
+
def __init__(
|
|
684
|
+
self,
|
|
685
|
+
api_budget: AbstractAPIBudget,
|
|
686
|
+
**kwargs: Any,
|
|
687
|
+
):
|
|
688
|
+
self._api_budget = api_budget
|
|
689
|
+
super().__init__(**kwargs) # type: ignore # Base Session doesn't take any kwargs
|
|
690
|
+
|
|
691
|
+
def send(self, request: requests.PreparedRequest, **kwargs: Any) -> requests.Response:
|
|
692
|
+
"""Send a request with rate-limiting."""
|
|
693
|
+
self._api_budget.acquire_call(request)
|
|
694
|
+
response = super().send(request, **kwargs)
|
|
695
|
+
self._api_budget.update_from_response(request, response)
|
|
696
|
+
return response
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
class LimiterSession(LimiterMixin, requests.Session):
|
|
700
|
+
"""Session that adds rate-limiting behavior to requests."""
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
class CachedLimiterSession(requests_cache.CacheMixin, LimiterMixin, requests.Session):
|
|
704
|
+
"""Session class with caching and rate-limiting behavior."""
|