airbyte-cdk 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +358 -0
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +104 -0
- airbyte_cdk/connector.py +123 -0
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/__init__.py +3 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
- airbyte_cdk/connector_builder/main.py +107 -0
- airbyte_cdk/connector_builder/models.py +73 -0
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +83 -0
- airbyte_cdk/destinations/__init__.py +8 -0
- airbyte_cdk/destinations/destination.py +154 -0
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
- airbyte_cdk/destinations/vector_db_based/config.py +298 -0
- airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
- airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
- airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
- airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
- airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
- airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
- airbyte_cdk/entrypoint.py +414 -0
- airbyte_cdk/exception_handler.py +56 -0
- airbyte_cdk/logger.py +109 -0
- airbyte_cdk/models/__init__.py +72 -0
- airbyte_cdk/models/airbyte_protocol.py +88 -0
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/well_known_types.py +5 -0
- airbyte_cdk/py.typed +0 -0
- airbyte_cdk/sources/__init__.py +26 -0
- airbyte_cdk/sources/abstract_source.py +326 -0
- airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
- airbyte_cdk/sources/config.py +27 -0
- airbyte_cdk/sources/connector_state_manager.py +161 -0
- airbyte_cdk/sources/declarative/__init__.py +3 -0
- airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
- airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
- airbyte_cdk/sources/declarative/auth/token.py +267 -0
- airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
- airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
- airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
- airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
- airbyte_cdk/sources/declarative/declarative_source.py +36 -0
- airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
- airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
- airbyte_cdk/sources/declarative/exceptions.py +9 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
- airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
- airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
- airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
- airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
- airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
- airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
- airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
- airbyte_cdk/sources/declarative/spec/spec.py +48 -0
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
- airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
- airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
- airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
- airbyte_cdk/sources/declarative/types.py +25 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
- airbyte_cdk/sources/file_based/config/__init__.py +0 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
- airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
- airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
- airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
- airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
- airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
- airbyte_cdk/sources/file_based/exceptions.py +159 -0
- airbyte_cdk/sources/file_based/file_based_source.py +466 -0
- airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
- airbyte_cdk/sources/file_based/file_record_data.py +22 -0
- airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
- airbyte_cdk/sources/file_based/remote_file.py +18 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
- airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
- airbyte_cdk/sources/file_based/types.py +10 -0
- airbyte_cdk/sources/http_config.py +10 -0
- airbyte_cdk/sources/http_logger.py +55 -0
- airbyte_cdk/sources/message/__init__.py +19 -0
- airbyte_cdk/sources/message/repository.py +137 -0
- airbyte_cdk/sources/source.py +95 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/__init__.py +8 -0
- airbyte_cdk/sources/streams/availability_strategy.py +84 -0
- airbyte_cdk/sources/streams/call_rate.py +704 -0
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
- airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
- airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
- airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/core.py +703 -0
- airbyte_cdk/sources/streams/http/__init__.py +10 -0
- airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +61 -0
- airbyte_cdk/sources/streams/http/http.py +673 -0
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/sources/streams/utils/__init__.py +3 -0
- airbyte_cdk/sources/types.py +169 -0
- airbyte_cdk/sources/utils/__init__.py +7 -0
- airbyte_cdk/sources/utils/casing.py +12 -0
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +53 -0
- airbyte_cdk/sources/utils/schema_helpers.py +230 -0
- airbyte_cdk/sources/utils/slice_logger.py +57 -0
- airbyte_cdk/sources/utils/transform.py +277 -0
- airbyte_cdk/sources/utils/types.py +7 -0
- airbyte_cdk/sql/__init__.py +0 -0
- airbyte_cdk/sql/_util/__init__.py +0 -0
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/__init__.py +7 -0
- airbyte_cdk/test/catalog_builder.py +81 -0
- airbyte_cdk/test/entrypoint_wrapper.py +250 -0
- airbyte_cdk/test/mock_http/__init__.py +6 -0
- airbyte_cdk/test/mock_http/matcher.py +41 -0
- airbyte_cdk/test/mock_http/mocker.py +185 -0
- airbyte_cdk/test/mock_http/request.py +103 -0
- airbyte_cdk/test/mock_http/response.py +28 -0
- airbyte_cdk/test/mock_http/response_builder.py +237 -0
- airbyte_cdk/test/state_builder.py +33 -0
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +10 -0
- airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
- airbyte_cdk/utils/analytics_message.py +25 -0
- airbyte_cdk/utils/constants.py +5 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/event_timing.py +85 -0
- airbyte_cdk/utils/is_cloud_environment.py +18 -0
- airbyte_cdk/utils/mapping_helpers.py +162 -0
- airbyte_cdk/utils/message_utils.py +26 -0
- airbyte_cdk/utils/oneof_option_config.py +33 -0
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +270 -0
- airbyte_cdk/utils/slice_hasher.py +37 -0
- airbyte_cdk/utils/spec_schema_transformations.py +26 -0
- airbyte_cdk/utils/stream_status_utils.py +43 -0
- airbyte_cdk/utils/traced_exception.py +145 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
- airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
- airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
- airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
- airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Callable
|
|
5
|
+
|
|
6
|
+
from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ClampingStrategy(ABC):
|
|
10
|
+
def clamp(self, value: CursorValueType) -> CursorValueType:
|
|
11
|
+
raise NotImplementedError()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NoClamping(ClampingStrategy):
|
|
15
|
+
def clamp(self, value: CursorValueType) -> CursorValueType:
|
|
16
|
+
return value
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ClampingEndProvider:
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
clamping_strategy: ClampingStrategy,
|
|
23
|
+
end_provider: Callable[[], CursorValueType],
|
|
24
|
+
granularity: timedelta,
|
|
25
|
+
) -> None:
|
|
26
|
+
self._clamping_strategy = clamping_strategy
|
|
27
|
+
self._end_provider = end_provider
|
|
28
|
+
self._granularity = granularity
|
|
29
|
+
|
|
30
|
+
def __call__(self) -> CursorValueType:
|
|
31
|
+
return self._clamping_strategy.clamp(self._end_provider()) - self._granularity
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class DayClampingStrategy(ClampingStrategy):
|
|
35
|
+
def __init__(self, is_ceiling: bool = True) -> None:
|
|
36
|
+
self._is_ceiling = is_ceiling
|
|
37
|
+
|
|
38
|
+
def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
|
|
39
|
+
return_value = value.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
40
|
+
if self._is_ceiling:
|
|
41
|
+
return return_value + timedelta(days=1)
|
|
42
|
+
return return_value
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class MonthClampingStrategy(ClampingStrategy):
|
|
46
|
+
def __init__(self, is_ceiling: bool = True) -> None:
|
|
47
|
+
self._is_ceiling = is_ceiling
|
|
48
|
+
|
|
49
|
+
def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
|
|
50
|
+
return_value = value.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
51
|
+
needs_to_round = value.day != 1
|
|
52
|
+
if not needs_to_round:
|
|
53
|
+
return return_value
|
|
54
|
+
|
|
55
|
+
return self._ceil(return_value) if self._is_ceiling else return_value.replace(day=1)
|
|
56
|
+
|
|
57
|
+
def _ceil(self, value: datetime) -> datetime:
|
|
58
|
+
return value.replace(
|
|
59
|
+
year=value.year + 1 if value.month == 12 else value.year,
|
|
60
|
+
month=(value.month % 12) + 1,
|
|
61
|
+
day=1,
|
|
62
|
+
hour=0,
|
|
63
|
+
minute=0,
|
|
64
|
+
second=0,
|
|
65
|
+
microsecond=0,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class Weekday(Enum):
|
|
70
|
+
"""
|
|
71
|
+
These integer values map to the same ones used by the Datetime.date.weekday() implementation
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
MONDAY = 0
|
|
75
|
+
TUESDAY = 1
|
|
76
|
+
WEDNESDAY = 2
|
|
77
|
+
THURSDAY = 3
|
|
78
|
+
FRIDAY = 4
|
|
79
|
+
SATURDAY = 5
|
|
80
|
+
SUNDAY = 6
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class WeekClampingStrategy(ClampingStrategy):
|
|
84
|
+
def __init__(self, day_of_week: Weekday, is_ceiling: bool = True) -> None:
|
|
85
|
+
self._day_of_week = day_of_week.value
|
|
86
|
+
self._is_ceiling = is_ceiling
|
|
87
|
+
|
|
88
|
+
def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
|
|
89
|
+
days_diff_to_ceiling = (
|
|
90
|
+
7 - (value.weekday() - self._day_of_week)
|
|
91
|
+
if value.weekday() > self._day_of_week
|
|
92
|
+
else abs(value.weekday() - self._day_of_week)
|
|
93
|
+
)
|
|
94
|
+
delta = (
|
|
95
|
+
timedelta(days_diff_to_ceiling)
|
|
96
|
+
if self._is_ceiling
|
|
97
|
+
else timedelta(days_diff_to_ceiling - 7)
|
|
98
|
+
)
|
|
99
|
+
return value.replace(hour=0, minute=0, second=0, microsecond=0) + delta
|
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
import logging
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import (
|
|
9
|
+
Any,
|
|
10
|
+
Callable,
|
|
11
|
+
Iterable,
|
|
12
|
+
List,
|
|
13
|
+
Mapping,
|
|
14
|
+
MutableMapping,
|
|
15
|
+
Optional,
|
|
16
|
+
Tuple,
|
|
17
|
+
Union,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
|
21
|
+
from airbyte_cdk.sources.message import MessageRepository
|
|
22
|
+
from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
|
|
23
|
+
from airbyte_cdk.sources.streams.concurrent.clamping import ClampingStrategy, NoClamping
|
|
24
|
+
from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType, GapType
|
|
25
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
|
26
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
|
27
|
+
from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
|
|
28
|
+
AbstractStreamStateConverter,
|
|
29
|
+
)
|
|
30
|
+
from airbyte_cdk.sources.types import Record, StreamSlice
|
|
31
|
+
|
|
32
|
+
LOGGER = logging.getLogger("airbyte")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any:
|
|
36
|
+
return functools.reduce(lambda a, b: a[b], path, mapping)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class CursorField:
|
|
40
|
+
def __init__(self, cursor_field_key: str) -> None:
|
|
41
|
+
self.cursor_field_key = cursor_field_key
|
|
42
|
+
|
|
43
|
+
def extract_value(self, record: Record) -> CursorValueType:
|
|
44
|
+
cursor_value = record.data.get(self.cursor_field_key)
|
|
45
|
+
if cursor_value is None:
|
|
46
|
+
raise ValueError(f"Could not find cursor field {self.cursor_field_key} in record")
|
|
47
|
+
return cursor_value # type: ignore # we assume that the value the path points at is a comparable
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class Cursor(StreamSlicer, ABC):
|
|
51
|
+
@property
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def state(self) -> MutableMapping[str, Any]: ...
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def observe(self, record: Record) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Indicate to the cursor that the record has been emitted
|
|
59
|
+
"""
|
|
60
|
+
raise NotImplementedError()
|
|
61
|
+
|
|
62
|
+
@abstractmethod
|
|
63
|
+
def close_partition(self, partition: Partition) -> None:
|
|
64
|
+
"""
|
|
65
|
+
Indicate to the cursor that the partition has been successfully processed
|
|
66
|
+
"""
|
|
67
|
+
raise NotImplementedError()
|
|
68
|
+
|
|
69
|
+
@abstractmethod
|
|
70
|
+
def ensure_at_least_one_state_emitted(self) -> None:
|
|
71
|
+
"""
|
|
72
|
+
State messages are emitted when a partition is closed. However, the platform expects at least one state to be emitted per sync per
|
|
73
|
+
stream. Hence, if no partitions are generated, this method needs to be called.
|
|
74
|
+
"""
|
|
75
|
+
raise NotImplementedError()
|
|
76
|
+
|
|
77
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
|
78
|
+
"""
|
|
79
|
+
Default placeholder implementation of generate_slices.
|
|
80
|
+
Subclasses can override this method to provide actual behavior.
|
|
81
|
+
"""
|
|
82
|
+
yield StreamSlice(partition={}, cursor_slice={})
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class FinalStateCursor(Cursor):
|
|
86
|
+
"""Cursor that is used to guarantee at least one state message is emitted for a concurrent stream."""
|
|
87
|
+
|
|
88
|
+
def __init__(
|
|
89
|
+
self,
|
|
90
|
+
stream_name: str,
|
|
91
|
+
stream_namespace: Optional[str],
|
|
92
|
+
message_repository: MessageRepository,
|
|
93
|
+
) -> None:
|
|
94
|
+
self._stream_name = stream_name
|
|
95
|
+
self._stream_namespace = stream_namespace
|
|
96
|
+
self._message_repository = message_repository
|
|
97
|
+
# Normally the connector state manager operates at the source-level. However, we only need it to write the sentinel
|
|
98
|
+
# state message rather than manage overall source state. This is also only temporary as we move to the resumable
|
|
99
|
+
# full refresh world where every stream uses a FileBasedConcurrentCursor with incremental state.
|
|
100
|
+
self._connector_state_manager = ConnectorStateManager()
|
|
101
|
+
self._has_closed_at_least_one_slice = False
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def state(self) -> MutableMapping[str, Any]:
|
|
105
|
+
return {NO_CURSOR_STATE_KEY: True}
|
|
106
|
+
|
|
107
|
+
def observe(self, record: Record) -> None:
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
def close_partition(self, partition: Partition) -> None:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
def ensure_at_least_one_state_emitted(self) -> None:
|
|
114
|
+
"""
|
|
115
|
+
Used primarily for full refresh syncs that do not have a valid cursor value to emit at the end of a sync
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
self._connector_state_manager.update_state_for_stream(
|
|
119
|
+
self._stream_name, self._stream_namespace, self.state
|
|
120
|
+
)
|
|
121
|
+
state_message = self._connector_state_manager.create_state_message(
|
|
122
|
+
self._stream_name, self._stream_namespace
|
|
123
|
+
)
|
|
124
|
+
self._message_repository.emit_message(state_message)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class ConcurrentCursor(Cursor):
|
|
128
|
+
_START_BOUNDARY = 0
|
|
129
|
+
_END_BOUNDARY = 1
|
|
130
|
+
|
|
131
|
+
def __init__(
|
|
132
|
+
self,
|
|
133
|
+
stream_name: str,
|
|
134
|
+
stream_namespace: Optional[str],
|
|
135
|
+
stream_state: Any,
|
|
136
|
+
message_repository: MessageRepository,
|
|
137
|
+
connector_state_manager: ConnectorStateManager,
|
|
138
|
+
connector_state_converter: AbstractStreamStateConverter,
|
|
139
|
+
cursor_field: CursorField,
|
|
140
|
+
slice_boundary_fields: Optional[Tuple[str, str]],
|
|
141
|
+
start: Optional[CursorValueType],
|
|
142
|
+
end_provider: Callable[[], CursorValueType],
|
|
143
|
+
lookback_window: Optional[GapType] = None,
|
|
144
|
+
slice_range: Optional[GapType] = None,
|
|
145
|
+
cursor_granularity: Optional[GapType] = None,
|
|
146
|
+
clamping_strategy: ClampingStrategy = NoClamping(),
|
|
147
|
+
) -> None:
|
|
148
|
+
self._stream_name = stream_name
|
|
149
|
+
self._stream_namespace = stream_namespace
|
|
150
|
+
self._message_repository = message_repository
|
|
151
|
+
self._connector_state_converter = connector_state_converter
|
|
152
|
+
self._connector_state_manager = connector_state_manager
|
|
153
|
+
self._cursor_field = cursor_field
|
|
154
|
+
# To see some example where the slice boundaries might not be defined, check https://github.com/airbytehq/airbyte/blob/1ce84d6396e446e1ac2377362446e3fb94509461/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py#L363-L379
|
|
155
|
+
self._slice_boundary_fields = slice_boundary_fields
|
|
156
|
+
self._start = start
|
|
157
|
+
self._end_provider = end_provider
|
|
158
|
+
self.start, self._concurrent_state = self._get_concurrent_state(stream_state)
|
|
159
|
+
self._lookback_window = lookback_window
|
|
160
|
+
self._slice_range = slice_range
|
|
161
|
+
self._most_recent_cursor_value_per_partition: MutableMapping[
|
|
162
|
+
Union[StreamSlice, Mapping[str, Any], None], Any
|
|
163
|
+
] = {}
|
|
164
|
+
self._has_closed_at_least_one_slice = False
|
|
165
|
+
self._cursor_granularity = cursor_granularity
|
|
166
|
+
# Flag to track if the logger has been triggered (per stream)
|
|
167
|
+
self._should_be_synced_logger_triggered = False
|
|
168
|
+
self._clamping_strategy = clamping_strategy
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def state(self) -> MutableMapping[str, Any]:
|
|
172
|
+
return self._connector_state_converter.convert_to_state_message(
|
|
173
|
+
self.cursor_field, self._concurrent_state
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def cursor_field(self) -> CursorField:
|
|
178
|
+
return self._cursor_field
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def _slice_boundary_fields_wrapper(self) -> Tuple[str, str]:
|
|
182
|
+
return (
|
|
183
|
+
self._slice_boundary_fields
|
|
184
|
+
if self._slice_boundary_fields
|
|
185
|
+
else (
|
|
186
|
+
self._connector_state_converter.START_KEY,
|
|
187
|
+
self._connector_state_converter.END_KEY,
|
|
188
|
+
)
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
def _get_concurrent_state(
|
|
192
|
+
self, state: MutableMapping[str, Any]
|
|
193
|
+
) -> Tuple[CursorValueType, MutableMapping[str, Any]]:
|
|
194
|
+
if self._connector_state_converter.is_state_message_compatible(state):
|
|
195
|
+
return (
|
|
196
|
+
self._start or self._connector_state_converter.zero_value,
|
|
197
|
+
self._connector_state_converter.deserialize(state),
|
|
198
|
+
)
|
|
199
|
+
return self._connector_state_converter.convert_from_sequential_state(
|
|
200
|
+
self._cursor_field, state, self._start
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
def observe(self, record: Record) -> None:
|
|
204
|
+
most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
|
|
205
|
+
record.associated_slice
|
|
206
|
+
)
|
|
207
|
+
try:
|
|
208
|
+
cursor_value = self._extract_cursor_value(record)
|
|
209
|
+
|
|
210
|
+
if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value:
|
|
211
|
+
self._most_recent_cursor_value_per_partition[record.associated_slice] = cursor_value
|
|
212
|
+
except ValueError:
|
|
213
|
+
self._log_for_record_without_cursor_value()
|
|
214
|
+
|
|
215
|
+
def _extract_cursor_value(self, record: Record) -> Any:
|
|
216
|
+
return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
|
|
217
|
+
|
|
218
|
+
def close_partition(self, partition: Partition) -> None:
|
|
219
|
+
slice_count_before = len(self._concurrent_state.get("slices", []))
|
|
220
|
+
self._add_slice_to_state(partition)
|
|
221
|
+
if slice_count_before < len(
|
|
222
|
+
self._concurrent_state["slices"]
|
|
223
|
+
): # only emit if at least one slice has been processed
|
|
224
|
+
self._merge_partitions()
|
|
225
|
+
self._emit_state_message()
|
|
226
|
+
self._has_closed_at_least_one_slice = True
|
|
227
|
+
|
|
228
|
+
def _add_slice_to_state(self, partition: Partition) -> None:
|
|
229
|
+
most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
|
|
230
|
+
partition.to_slice()
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
if self._slice_boundary_fields:
|
|
234
|
+
if "slices" not in self._concurrent_state:
|
|
235
|
+
raise RuntimeError(
|
|
236
|
+
f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
|
|
237
|
+
)
|
|
238
|
+
self._concurrent_state["slices"].append(
|
|
239
|
+
{
|
|
240
|
+
self._connector_state_converter.START_KEY: self._extract_from_slice(
|
|
241
|
+
partition, self._slice_boundary_fields[self._START_BOUNDARY]
|
|
242
|
+
),
|
|
243
|
+
self._connector_state_converter.END_KEY: self._extract_from_slice(
|
|
244
|
+
partition, self._slice_boundary_fields[self._END_BOUNDARY]
|
|
245
|
+
),
|
|
246
|
+
self._connector_state_converter.MOST_RECENT_RECORD_KEY: most_recent_cursor_value,
|
|
247
|
+
}
|
|
248
|
+
)
|
|
249
|
+
elif most_recent_cursor_value:
|
|
250
|
+
if self._has_closed_at_least_one_slice:
|
|
251
|
+
# If we track state value using records cursor field, we can only do that if there is one partition. This is because we save
|
|
252
|
+
# the state every time we close a partition. We assume that if there are multiple slices, they need to be providing
|
|
253
|
+
# boundaries. There are cases where partitions could not have boundaries:
|
|
254
|
+
# * The cursor should be per-partition
|
|
255
|
+
# * The stream state is actually the parent stream state
|
|
256
|
+
# There might be other cases not listed above. Those are not supported today hence the stream should not use this cursor for
|
|
257
|
+
# state management. For the specific user that was affected with this issue, we need to:
|
|
258
|
+
# * Fix state tracking (which is currently broken)
|
|
259
|
+
# * Make the new version available
|
|
260
|
+
# * (Probably) ask the user to reset the stream to avoid data loss
|
|
261
|
+
raise ValueError(
|
|
262
|
+
"Given that slice_boundary_fields is not defined and that per-partition state is not supported, only one slice is "
|
|
263
|
+
"expected. Please contact the Airbyte team."
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
self._concurrent_state["slices"].append(
|
|
267
|
+
{
|
|
268
|
+
self._connector_state_converter.START_KEY: self.start,
|
|
269
|
+
self._connector_state_converter.END_KEY: most_recent_cursor_value,
|
|
270
|
+
self._connector_state_converter.MOST_RECENT_RECORD_KEY: most_recent_cursor_value,
|
|
271
|
+
}
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
def _emit_state_message(self) -> None:
|
|
275
|
+
self._connector_state_manager.update_state_for_stream(
|
|
276
|
+
self._stream_name,
|
|
277
|
+
self._stream_namespace,
|
|
278
|
+
self.state,
|
|
279
|
+
)
|
|
280
|
+
state_message = self._connector_state_manager.create_state_message(
|
|
281
|
+
self._stream_name, self._stream_namespace
|
|
282
|
+
)
|
|
283
|
+
self._message_repository.emit_message(state_message)
|
|
284
|
+
|
|
285
|
+
def _merge_partitions(self) -> None:
|
|
286
|
+
self._concurrent_state["slices"] = self._connector_state_converter.merge_intervals(
|
|
287
|
+
self._concurrent_state["slices"]
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
|
|
291
|
+
try:
|
|
292
|
+
_slice = partition.to_slice()
|
|
293
|
+
if not _slice:
|
|
294
|
+
raise KeyError(f"Could not find key `{key}` in empty slice")
|
|
295
|
+
return self._connector_state_converter.parse_value(_slice[key]) # type: ignore # we expect the devs to specify a key that would return a CursorValueType
|
|
296
|
+
except KeyError as exception:
|
|
297
|
+
raise KeyError(
|
|
298
|
+
f"Partition is expected to have key `{key}` but could not be found"
|
|
299
|
+
) from exception
|
|
300
|
+
|
|
301
|
+
def ensure_at_least_one_state_emitted(self) -> None:
|
|
302
|
+
"""
|
|
303
|
+
The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
|
|
304
|
+
called.
|
|
305
|
+
"""
|
|
306
|
+
self._emit_state_message()
|
|
307
|
+
|
|
308
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
|
309
|
+
"""
|
|
310
|
+
Generating slices based on a few parameters:
|
|
311
|
+
* lookback_window: Buffer to remove from END_KEY of the highest slice
|
|
312
|
+
* slice_range: Max difference between two slices. If the difference between two slices is greater, multiple slices will be created
|
|
313
|
+
* start: `_split_per_slice_range` will clip any value to `self._start which means that:
|
|
314
|
+
* if upper is less than self._start, no slices will be generated
|
|
315
|
+
* if lower is less than self._start, self._start will be used as the lower boundary (lookback_window will not be considered in that case)
|
|
316
|
+
|
|
317
|
+
Note that the slices will overlap at their boundaries. We therefore expect to have at least the lower or the upper boundary to be
|
|
318
|
+
inclusive in the API that is queried.
|
|
319
|
+
"""
|
|
320
|
+
self._merge_partitions()
|
|
321
|
+
|
|
322
|
+
if self._start is not None and self._is_start_before_first_slice():
|
|
323
|
+
yield from self._split_per_slice_range(
|
|
324
|
+
self._start,
|
|
325
|
+
self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY],
|
|
326
|
+
False,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
if len(self._concurrent_state["slices"]) == 1:
|
|
330
|
+
yield from self._split_per_slice_range(
|
|
331
|
+
self._calculate_lower_boundary_of_last_slice(
|
|
332
|
+
self._concurrent_state["slices"][0][self._connector_state_converter.END_KEY]
|
|
333
|
+
),
|
|
334
|
+
self._end_provider(),
|
|
335
|
+
True,
|
|
336
|
+
)
|
|
337
|
+
elif len(self._concurrent_state["slices"]) > 1:
|
|
338
|
+
for i in range(len(self._concurrent_state["slices"]) - 1):
|
|
339
|
+
if self._cursor_granularity:
|
|
340
|
+
yield from self._split_per_slice_range(
|
|
341
|
+
self._concurrent_state["slices"][i][self._connector_state_converter.END_KEY]
|
|
342
|
+
+ self._cursor_granularity,
|
|
343
|
+
self._concurrent_state["slices"][i + 1][
|
|
344
|
+
self._connector_state_converter.START_KEY
|
|
345
|
+
],
|
|
346
|
+
False,
|
|
347
|
+
)
|
|
348
|
+
else:
|
|
349
|
+
yield from self._split_per_slice_range(
|
|
350
|
+
self._concurrent_state["slices"][i][
|
|
351
|
+
self._connector_state_converter.END_KEY
|
|
352
|
+
],
|
|
353
|
+
self._concurrent_state["slices"][i + 1][
|
|
354
|
+
self._connector_state_converter.START_KEY
|
|
355
|
+
],
|
|
356
|
+
False,
|
|
357
|
+
)
|
|
358
|
+
yield from self._split_per_slice_range(
|
|
359
|
+
self._calculate_lower_boundary_of_last_slice(
|
|
360
|
+
self._concurrent_state["slices"][-1][self._connector_state_converter.END_KEY]
|
|
361
|
+
),
|
|
362
|
+
self._end_provider(),
|
|
363
|
+
True,
|
|
364
|
+
)
|
|
365
|
+
else:
|
|
366
|
+
raise ValueError("Expected at least one slice")
|
|
367
|
+
|
|
368
|
+
def _is_start_before_first_slice(self) -> bool:
|
|
369
|
+
return (
|
|
370
|
+
self._start is not None
|
|
371
|
+
and self._start
|
|
372
|
+
< self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
def _calculate_lower_boundary_of_last_slice(
|
|
376
|
+
self, lower_boundary: CursorValueType
|
|
377
|
+
) -> CursorValueType:
|
|
378
|
+
if self._lookback_window:
|
|
379
|
+
return lower_boundary - self._lookback_window
|
|
380
|
+
return lower_boundary
|
|
381
|
+
|
|
382
|
+
def _split_per_slice_range(
|
|
383
|
+
self, lower: CursorValueType, upper: CursorValueType, upper_is_end: bool
|
|
384
|
+
) -> Iterable[StreamSlice]:
|
|
385
|
+
if lower >= upper:
|
|
386
|
+
return
|
|
387
|
+
|
|
388
|
+
if self._start and upper < self._start:
|
|
389
|
+
return
|
|
390
|
+
|
|
391
|
+
lower = max(lower, self._start) if self._start else lower
|
|
392
|
+
if not self._slice_range or self._evaluate_upper_safely(lower, self._slice_range) >= upper:
|
|
393
|
+
clamped_lower = self._clamping_strategy.clamp(lower)
|
|
394
|
+
clamped_upper = self._clamping_strategy.clamp(upper)
|
|
395
|
+
start_value, end_value = (
|
|
396
|
+
(clamped_lower, clamped_upper - self._cursor_granularity)
|
|
397
|
+
if self._cursor_granularity and not upper_is_end
|
|
398
|
+
else (clamped_lower, clamped_upper)
|
|
399
|
+
)
|
|
400
|
+
yield StreamSlice(
|
|
401
|
+
partition={},
|
|
402
|
+
cursor_slice={
|
|
403
|
+
self._slice_boundary_fields_wrapper[
|
|
404
|
+
self._START_BOUNDARY
|
|
405
|
+
]: self._connector_state_converter.output_format(start_value),
|
|
406
|
+
self._slice_boundary_fields_wrapper[
|
|
407
|
+
self._END_BOUNDARY
|
|
408
|
+
]: self._connector_state_converter.output_format(end_value),
|
|
409
|
+
},
|
|
410
|
+
)
|
|
411
|
+
else:
|
|
412
|
+
stop_processing = False
|
|
413
|
+
current_lower_boundary = lower
|
|
414
|
+
while not stop_processing:
|
|
415
|
+
current_upper_boundary = min(
|
|
416
|
+
self._evaluate_upper_safely(current_lower_boundary, self._slice_range), upper
|
|
417
|
+
)
|
|
418
|
+
has_reached_upper_boundary = current_upper_boundary >= upper
|
|
419
|
+
|
|
420
|
+
clamped_upper = (
|
|
421
|
+
self._clamping_strategy.clamp(current_upper_boundary)
|
|
422
|
+
if current_upper_boundary != upper
|
|
423
|
+
else current_upper_boundary
|
|
424
|
+
)
|
|
425
|
+
clamped_lower = self._clamping_strategy.clamp(current_lower_boundary)
|
|
426
|
+
if clamped_lower >= clamped_upper:
|
|
427
|
+
# clamping collapsed both values which means that it is time to stop processing
|
|
428
|
+
# FIXME should this be replace by proper end_provider
|
|
429
|
+
break
|
|
430
|
+
start_value, end_value = (
|
|
431
|
+
(clamped_lower, clamped_upper - self._cursor_granularity)
|
|
432
|
+
if self._cursor_granularity
|
|
433
|
+
and (not upper_is_end or not has_reached_upper_boundary)
|
|
434
|
+
else (clamped_lower, clamped_upper)
|
|
435
|
+
)
|
|
436
|
+
yield StreamSlice(
|
|
437
|
+
partition={},
|
|
438
|
+
cursor_slice={
|
|
439
|
+
self._slice_boundary_fields_wrapper[
|
|
440
|
+
self._START_BOUNDARY
|
|
441
|
+
]: self._connector_state_converter.output_format(start_value),
|
|
442
|
+
self._slice_boundary_fields_wrapper[
|
|
443
|
+
self._END_BOUNDARY
|
|
444
|
+
]: self._connector_state_converter.output_format(end_value),
|
|
445
|
+
},
|
|
446
|
+
)
|
|
447
|
+
current_lower_boundary = clamped_upper
|
|
448
|
+
if current_upper_boundary >= upper:
|
|
449
|
+
stop_processing = True
|
|
450
|
+
|
|
451
|
+
def _evaluate_upper_safely(self, lower: CursorValueType, step: GapType) -> CursorValueType:
|
|
452
|
+
"""
|
|
453
|
+
Given that we set the default step at datetime.timedelta.max, we will generate an OverflowError when evaluating the next start_date
|
|
454
|
+
This method assumes that users would never enter a step that would generate an overflow. Given that would be the case, the code
|
|
455
|
+
would have broken anyway.
|
|
456
|
+
"""
|
|
457
|
+
try:
|
|
458
|
+
return lower + step
|
|
459
|
+
except OverflowError:
|
|
460
|
+
return self._end_provider()
|
|
461
|
+
|
|
462
|
+
def should_be_synced(self, record: Record) -> bool:
|
|
463
|
+
"""
|
|
464
|
+
Determines if a record should be synced based on its cursor value.
|
|
465
|
+
:param record: The record to evaluate
|
|
466
|
+
|
|
467
|
+
:return: True if the record's cursor value falls within the sync boundaries
|
|
468
|
+
"""
|
|
469
|
+
try:
|
|
470
|
+
record_cursor_value: CursorValueType = self._extract_cursor_value(record)
|
|
471
|
+
except ValueError:
|
|
472
|
+
self._log_for_record_without_cursor_value()
|
|
473
|
+
return True
|
|
474
|
+
return self.start <= record_cursor_value <= self._end_provider()
|
|
475
|
+
|
|
476
|
+
def _log_for_record_without_cursor_value(self) -> None:
|
|
477
|
+
if not self._should_be_synced_logger_triggered:
|
|
478
|
+
LOGGER.warning(
|
|
479
|
+
f"Could not find cursor field `{self.cursor_field.cursor_field_key}` in record for stream {self._stream_name}. The incremental sync will assume it needs to be synced"
|
|
480
|
+
)
|
|
481
|
+
self._should_be_synced_logger_triggered = True
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import Protocol
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class GapType(Protocol):
|
|
6
|
+
"""
|
|
7
|
+
This is the representation of gaps between two cursor values. Examples:
|
|
8
|
+
* if cursor values are datetimes, GapType is timedelta
|
|
9
|
+
* if cursor values are integer, GapType will also be integer
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CursorValueType(Protocol):
|
|
16
|
+
"""Protocol for annotating comparable types."""
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def __lt__(self: "CursorValueType", other: "CursorValueType") -> bool:
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def __ge__(self: "CursorValueType", other: "CursorValueType") -> bool:
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def __add__(self: "CursorValueType", other: GapType) -> "CursorValueType":
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def __sub__(self: "CursorValueType", other: GapType) -> "CursorValueType":
|
|
32
|
+
pass
|