airbyte-cdk 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +358 -0
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +104 -0
- airbyte_cdk/connector.py +123 -0
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/__init__.py +3 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
- airbyte_cdk/connector_builder/main.py +107 -0
- airbyte_cdk/connector_builder/models.py +73 -0
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +83 -0
- airbyte_cdk/destinations/__init__.py +8 -0
- airbyte_cdk/destinations/destination.py +154 -0
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
- airbyte_cdk/destinations/vector_db_based/config.py +298 -0
- airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
- airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
- airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
- airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
- airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
- airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
- airbyte_cdk/entrypoint.py +414 -0
- airbyte_cdk/exception_handler.py +56 -0
- airbyte_cdk/logger.py +109 -0
- airbyte_cdk/models/__init__.py +72 -0
- airbyte_cdk/models/airbyte_protocol.py +88 -0
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/well_known_types.py +5 -0
- airbyte_cdk/py.typed +0 -0
- airbyte_cdk/sources/__init__.py +26 -0
- airbyte_cdk/sources/abstract_source.py +326 -0
- airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
- airbyte_cdk/sources/config.py +27 -0
- airbyte_cdk/sources/connector_state_manager.py +161 -0
- airbyte_cdk/sources/declarative/__init__.py +3 -0
- airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
- airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
- airbyte_cdk/sources/declarative/auth/token.py +267 -0
- airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
- airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
- airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
- airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
- airbyte_cdk/sources/declarative/declarative_source.py +36 -0
- airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
- airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
- airbyte_cdk/sources/declarative/exceptions.py +9 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
- airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
- airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
- airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
- airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
- airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
- airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
- airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
- airbyte_cdk/sources/declarative/spec/spec.py +48 -0
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
- airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
- airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
- airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
- airbyte_cdk/sources/declarative/types.py +25 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
- airbyte_cdk/sources/file_based/config/__init__.py +0 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
- airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
- airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
- airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
- airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
- airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
- airbyte_cdk/sources/file_based/exceptions.py +159 -0
- airbyte_cdk/sources/file_based/file_based_source.py +466 -0
- airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
- airbyte_cdk/sources/file_based/file_record_data.py +22 -0
- airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
- airbyte_cdk/sources/file_based/remote_file.py +18 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
- airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
- airbyte_cdk/sources/file_based/types.py +10 -0
- airbyte_cdk/sources/http_config.py +10 -0
- airbyte_cdk/sources/http_logger.py +55 -0
- airbyte_cdk/sources/message/__init__.py +19 -0
- airbyte_cdk/sources/message/repository.py +137 -0
- airbyte_cdk/sources/source.py +95 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/__init__.py +8 -0
- airbyte_cdk/sources/streams/availability_strategy.py +84 -0
- airbyte_cdk/sources/streams/call_rate.py +704 -0
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
- airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
- airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
- airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/core.py +703 -0
- airbyte_cdk/sources/streams/http/__init__.py +10 -0
- airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +61 -0
- airbyte_cdk/sources/streams/http/http.py +673 -0
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/sources/streams/utils/__init__.py +3 -0
- airbyte_cdk/sources/types.py +169 -0
- airbyte_cdk/sources/utils/__init__.py +7 -0
- airbyte_cdk/sources/utils/casing.py +12 -0
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +53 -0
- airbyte_cdk/sources/utils/schema_helpers.py +230 -0
- airbyte_cdk/sources/utils/slice_logger.py +57 -0
- airbyte_cdk/sources/utils/transform.py +277 -0
- airbyte_cdk/sources/utils/types.py +7 -0
- airbyte_cdk/sql/__init__.py +0 -0
- airbyte_cdk/sql/_util/__init__.py +0 -0
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/__init__.py +7 -0
- airbyte_cdk/test/catalog_builder.py +81 -0
- airbyte_cdk/test/entrypoint_wrapper.py +250 -0
- airbyte_cdk/test/mock_http/__init__.py +6 -0
- airbyte_cdk/test/mock_http/matcher.py +41 -0
- airbyte_cdk/test/mock_http/mocker.py +185 -0
- airbyte_cdk/test/mock_http/request.py +103 -0
- airbyte_cdk/test/mock_http/response.py +28 -0
- airbyte_cdk/test/mock_http/response_builder.py +237 -0
- airbyte_cdk/test/state_builder.py +33 -0
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +10 -0
- airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
- airbyte_cdk/utils/analytics_message.py +25 -0
- airbyte_cdk/utils/constants.py +5 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/event_timing.py +85 -0
- airbyte_cdk/utils/is_cloud_environment.py +18 -0
- airbyte_cdk/utils/mapping_helpers.py +162 -0
- airbyte_cdk/utils/message_utils.py +26 -0
- airbyte_cdk/utils/oneof_option_config.py +33 -0
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +270 -0
- airbyte_cdk/utils/slice_hasher.py +37 -0
- airbyte_cdk/utils/spec_schema_transformations.py +26 -0
- airbyte_cdk/utils/stream_status_utils.py +43 -0
- airbyte_cdk/utils/traced_exception.py +145 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
- airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
- airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
- airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
- airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
from logging import Logger
|
|
7
|
+
from typing import Any, Iterable, List, Mapping, Optional
|
|
8
|
+
|
|
9
|
+
from airbyte_cdk.models import AirbyteStream, SyncMode
|
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
|
11
|
+
from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
|
12
|
+
AbstractAvailabilityStrategy,
|
|
13
|
+
StreamAvailability,
|
|
14
|
+
)
|
|
15
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
|
16
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
|
17
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DefaultStream(AbstractStream):
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
partition_generator: PartitionGenerator,
|
|
24
|
+
name: str,
|
|
25
|
+
json_schema: Mapping[str, Any],
|
|
26
|
+
availability_strategy: AbstractAvailabilityStrategy,
|
|
27
|
+
primary_key: List[str],
|
|
28
|
+
cursor_field: Optional[str],
|
|
29
|
+
logger: Logger,
|
|
30
|
+
cursor: Cursor,
|
|
31
|
+
namespace: Optional[str] = None,
|
|
32
|
+
supports_file_transfer: bool = False,
|
|
33
|
+
) -> None:
|
|
34
|
+
self._stream_partition_generator = partition_generator
|
|
35
|
+
self._name = name
|
|
36
|
+
self._json_schema = json_schema
|
|
37
|
+
self._availability_strategy = availability_strategy
|
|
38
|
+
self._primary_key = primary_key
|
|
39
|
+
self._cursor_field = cursor_field
|
|
40
|
+
self._logger = logger
|
|
41
|
+
self._cursor = cursor
|
|
42
|
+
self._namespace = namespace
|
|
43
|
+
self._supports_file_transfer = supports_file_transfer
|
|
44
|
+
|
|
45
|
+
def generate_partitions(self) -> Iterable[Partition]:
|
|
46
|
+
yield from self._stream_partition_generator.generate()
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def name(self) -> str:
|
|
50
|
+
return self._name
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def namespace(self) -> Optional[str]:
|
|
54
|
+
return self._namespace
|
|
55
|
+
|
|
56
|
+
def check_availability(self) -> StreamAvailability:
|
|
57
|
+
return self._availability_strategy.check_availability(self._logger)
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def cursor_field(self) -> Optional[str]:
|
|
61
|
+
return self._cursor_field
|
|
62
|
+
|
|
63
|
+
@lru_cache(maxsize=None)
|
|
64
|
+
def get_json_schema(self) -> Mapping[str, Any]:
|
|
65
|
+
return self._json_schema
|
|
66
|
+
|
|
67
|
+
def as_airbyte_stream(self) -> AirbyteStream:
|
|
68
|
+
stream = AirbyteStream(
|
|
69
|
+
name=self.name,
|
|
70
|
+
json_schema=dict(self._json_schema),
|
|
71
|
+
supported_sync_modes=[SyncMode.full_refresh],
|
|
72
|
+
is_resumable=False,
|
|
73
|
+
is_file_based=self._supports_file_transfer,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if self._namespace:
|
|
77
|
+
stream.namespace = self._namespace
|
|
78
|
+
|
|
79
|
+
if self._cursor_field:
|
|
80
|
+
stream.source_defined_cursor = True
|
|
81
|
+
stream.is_resumable = True
|
|
82
|
+
stream.supported_sync_modes.append(SyncMode.incremental)
|
|
83
|
+
stream.default_cursor_field = [self._cursor_field]
|
|
84
|
+
|
|
85
|
+
keys = self._primary_key
|
|
86
|
+
if keys and len(keys) > 0:
|
|
87
|
+
stream.source_defined_primary_key = [[key] for key in keys]
|
|
88
|
+
|
|
89
|
+
return stream
|
|
90
|
+
|
|
91
|
+
def log_stream_sync_configuration(self) -> None:
|
|
92
|
+
self._logger.debug(
|
|
93
|
+
f"Syncing stream instance: {self.name}",
|
|
94
|
+
extra={
|
|
95
|
+
"primary_key": self._primary_key,
|
|
96
|
+
"cursor_field": self.cursor_field,
|
|
97
|
+
},
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def cursor(self) -> Cursor:
|
|
102
|
+
return self._cursor
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ExceptionWithDisplayMessage(Exception):
|
|
9
|
+
"""
|
|
10
|
+
Exception that can be used to display a custom message to the user.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, display_message: str, **kwargs: Any):
|
|
14
|
+
super().__init__(**kwargs)
|
|
15
|
+
self.display_message = display_message
|
|
16
|
+
|
|
17
|
+
def __str__(self) -> str:
|
|
18
|
+
return f'ExceptionWithDisplayMessage: "{self.display_message}"'
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional, Union
|
|
4
|
+
|
|
5
|
+
from airbyte_cdk.sources.streams import Stream
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_primary_key_from_stream(
|
|
9
|
+
stream_primary_key: Optional[Union[str, List[str], List[List[str]]]],
|
|
10
|
+
) -> List[str]:
|
|
11
|
+
if stream_primary_key is None:
|
|
12
|
+
return []
|
|
13
|
+
elif isinstance(stream_primary_key, str):
|
|
14
|
+
return [stream_primary_key]
|
|
15
|
+
elif isinstance(stream_primary_key, list):
|
|
16
|
+
are_all_elements_str = all(isinstance(k, str) for k in stream_primary_key)
|
|
17
|
+
are_all_elements_list_of_size_one = all(
|
|
18
|
+
isinstance(k, list) and len(k) == 1 for k in stream_primary_key
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
if are_all_elements_str:
|
|
22
|
+
return stream_primary_key # type: ignore # We verified all items in the list are strings
|
|
23
|
+
elif are_all_elements_list_of_size_one:
|
|
24
|
+
return list(map(lambda x: x[0], stream_primary_key))
|
|
25
|
+
else:
|
|
26
|
+
raise ValueError(f"Nested primary keys are not supported. Found {stream_primary_key}")
|
|
27
|
+
else:
|
|
28
|
+
raise ValueError(f"Invalid type for primary key: {stream_primary_key}")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_cursor_field_from_stream(stream: Stream) -> Optional[str]:
|
|
32
|
+
if isinstance(stream.cursor_field, list):
|
|
33
|
+
if len(stream.cursor_field) > 1:
|
|
34
|
+
raise ValueError(
|
|
35
|
+
f"Nested cursor fields are not supported. Got {stream.cursor_field} for {stream.name}"
|
|
36
|
+
)
|
|
37
|
+
elif len(stream.cursor_field) == 0:
|
|
38
|
+
return None
|
|
39
|
+
else:
|
|
40
|
+
return stream.cursor_field[0]
|
|
41
|
+
else:
|
|
42
|
+
return stream.cursor_field
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
import time
|
|
5
|
+
from queue import Queue
|
|
6
|
+
|
|
7
|
+
from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import (
|
|
8
|
+
PartitionGenerationCompletedSentinel,
|
|
9
|
+
)
|
|
10
|
+
from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
|
|
11
|
+
from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
|
|
12
|
+
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
|
13
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PartitionEnqueuer:
|
|
17
|
+
"""
|
|
18
|
+
Generates partitions from a partition generator and puts them in a queue.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
queue: Queue[QueueItem],
|
|
24
|
+
thread_pool_manager: ThreadPoolManager,
|
|
25
|
+
sleep_time_in_seconds: float = 0.1,
|
|
26
|
+
) -> None:
|
|
27
|
+
"""
|
|
28
|
+
:param queue: The queue to put the partitions in.
|
|
29
|
+
:param throttler: The throttler to use to throttle the partition generation.
|
|
30
|
+
"""
|
|
31
|
+
self._queue = queue
|
|
32
|
+
self._thread_pool_manager = thread_pool_manager
|
|
33
|
+
self._sleep_time_in_seconds = sleep_time_in_seconds
|
|
34
|
+
|
|
35
|
+
def generate_partitions(self, stream: AbstractStream) -> None:
|
|
36
|
+
"""
|
|
37
|
+
Generate partitions from a partition generator and put them in a queue.
|
|
38
|
+
When all the partitions are added to the queue, a sentinel is added to the queue to indicate that all the partitions have been generated.
|
|
39
|
+
|
|
40
|
+
If an exception is encountered, the exception will be caught and put in the queue. This is very important because if we don't, the
|
|
41
|
+
main thread will have no way to know that something when wrong and will wait until the timeout is reached
|
|
42
|
+
|
|
43
|
+
This method is meant to be called in a separate thread.
|
|
44
|
+
"""
|
|
45
|
+
try:
|
|
46
|
+
for partition in stream.generate_partitions():
|
|
47
|
+
# Adding partitions to the queue generates futures. To avoid having too many futures, we throttle here. We understand that
|
|
48
|
+
# we might add more futures than the limit by throttling in the threads while it is the main thread that actual adds the
|
|
49
|
+
# future but we expect the delta between the max futures length and the actual to be small enough that it would not be an
|
|
50
|
+
# issue. We do this in the threads because we want the main thread to always be processing QueueItems as if it does not, the
|
|
51
|
+
# queue size could grow and generating OOM issues.
|
|
52
|
+
#
|
|
53
|
+
# Also note that we do not expect this to create deadlocks where all worker threads wait because we have less
|
|
54
|
+
# PartitionEnqueuer threads than worker threads.
|
|
55
|
+
#
|
|
56
|
+
# Also note that prune_to_validate_has_reached_futures_limit has a lock while pruning which might create a bottleneck in
|
|
57
|
+
# terms of performance.
|
|
58
|
+
while self._thread_pool_manager.prune_to_validate_has_reached_futures_limit():
|
|
59
|
+
time.sleep(self._sleep_time_in_seconds)
|
|
60
|
+
self._queue.put(partition)
|
|
61
|
+
self._queue.put(PartitionGenerationCompletedSentinel(stream))
|
|
62
|
+
except Exception as e:
|
|
63
|
+
self._queue.put(StreamThreadException(e, stream.name))
|
|
64
|
+
self._queue.put(PartitionGenerationCompletedSentinel(stream))
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
from queue import Queue
|
|
5
|
+
|
|
6
|
+
from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
|
|
7
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.types import (
|
|
9
|
+
PartitionCompleteSentinel,
|
|
10
|
+
QueueItem,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PartitionReader:
|
|
15
|
+
"""
|
|
16
|
+
Generates records from a partition and puts them in a queue.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
_IS_SUCCESSFUL = True
|
|
20
|
+
|
|
21
|
+
def __init__(self, queue: Queue[QueueItem]) -> None:
|
|
22
|
+
"""
|
|
23
|
+
:param queue: The queue to put the records in.
|
|
24
|
+
"""
|
|
25
|
+
self._queue = queue
|
|
26
|
+
|
|
27
|
+
def process_partition(self, partition: Partition) -> None:
|
|
28
|
+
"""
|
|
29
|
+
Process a partition and put the records in the output queue.
|
|
30
|
+
When all the partitions are added to the queue, a sentinel is added to the queue to indicate that all the partitions have been generated.
|
|
31
|
+
|
|
32
|
+
If an exception is encountered, the exception will be caught and put in the queue. This is very important because if we don't, the
|
|
33
|
+
main thread will have no way to know that something when wrong and will wait until the timeout is reached
|
|
34
|
+
|
|
35
|
+
This method is meant to be called from a thread.
|
|
36
|
+
:param partition: The partition to read data from
|
|
37
|
+
:return: None
|
|
38
|
+
"""
|
|
39
|
+
try:
|
|
40
|
+
for record in partition.read():
|
|
41
|
+
self._queue.put(record)
|
|
42
|
+
self._queue.put(PartitionCompleteSentinel(partition, self._IS_SUCCESSFUL))
|
|
43
|
+
except Exception as e:
|
|
44
|
+
self._queue.put(StreamThreadException(e, partition.stream_name()))
|
|
45
|
+
self._queue.put(PartitionCompleteSentinel(partition, not self._IS_SUCCESSFUL))
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Any, Iterable, Mapping, Optional
|
|
7
|
+
|
|
8
|
+
from airbyte_cdk.sources.types import Record
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Partition(ABC):
|
|
12
|
+
"""
|
|
13
|
+
A partition is responsible for reading a specific set of data from a source.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def read(self) -> Iterable[Record]:
|
|
18
|
+
"""
|
|
19
|
+
Reads the data from the partition.
|
|
20
|
+
:return: An iterable of records.
|
|
21
|
+
"""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def to_slice(self) -> Optional[Mapping[str, Any]]:
|
|
26
|
+
"""
|
|
27
|
+
Converts the partition to a slice that can be serialized and deserialized.
|
|
28
|
+
|
|
29
|
+
Note: it would have been interesting to have a type of `Mapping[str, Comparable]` to simplify typing but some slices can have nested
|
|
30
|
+
values ([example](https://github.com/airbytehq/airbyte/blob/1ce84d6396e446e1ac2377362446e3fb94509461/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py#L584-L596))
|
|
31
|
+
:return: A mapping representing a slice
|
|
32
|
+
"""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def stream_name(self) -> str:
|
|
37
|
+
"""
|
|
38
|
+
Returns the name of the stream that this partition is reading from.
|
|
39
|
+
:return: The name of the stream.
|
|
40
|
+
"""
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def __hash__(self) -> int:
|
|
45
|
+
"""
|
|
46
|
+
Returns a hash of the partition.
|
|
47
|
+
Partitions must be hashable so that they can be used as keys in a dictionary.
|
|
48
|
+
"""
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PartitionGenerator(ABC):
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def generate(self) -> Iterable[Partition]:
|
|
14
|
+
"""
|
|
15
|
+
Generates partitions for a given sync mode.
|
|
16
|
+
:return: An iterable of partitions
|
|
17
|
+
"""
|
|
18
|
+
pass
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Iterable
|
|
5
|
+
|
|
6
|
+
from airbyte_cdk.sources.types import StreamSlice
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StreamSlicer(ABC):
|
|
10
|
+
"""
|
|
11
|
+
Slices the stream into chunks that can be fetched independently. Slices enable state checkpointing and data retrieval parallelization.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
|
16
|
+
"""
|
|
17
|
+
Defines stream slices
|
|
18
|
+
|
|
19
|
+
:return: An iterable of stream slices
|
|
20
|
+
"""
|
|
21
|
+
pass
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from typing import Any, Union
|
|
6
|
+
|
|
7
|
+
from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import (
|
|
8
|
+
PartitionGenerationCompletedSentinel,
|
|
9
|
+
)
|
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
|
11
|
+
from airbyte_cdk.sources.types import Record
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PartitionCompleteSentinel:
|
|
15
|
+
"""
|
|
16
|
+
A sentinel object indicating all records for a partition were produced.
|
|
17
|
+
Includes a pointer to the partition that was processed.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, partition: Partition, is_successful: bool = True):
|
|
21
|
+
"""
|
|
22
|
+
:param partition: The partition that was processed
|
|
23
|
+
"""
|
|
24
|
+
self.partition = partition
|
|
25
|
+
self.is_successful = is_successful
|
|
26
|
+
|
|
27
|
+
def __eq__(self, other: Any) -> bool:
|
|
28
|
+
if isinstance(other, PartitionCompleteSentinel):
|
|
29
|
+
return self.partition == other.partition
|
|
30
|
+
return False
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
Typedef representing the items that can be added to the ThreadBasedConcurrentStream
|
|
35
|
+
"""
|
|
36
|
+
QueueItem = Union[
|
|
37
|
+
Record, Partition, PartitionCompleteSentinel, PartitionGenerationCompletedSentinel, Exception
|
|
38
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Callable, List, MutableMapping, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ConcurrencyCompatibleStateType(Enum):
|
|
14
|
+
date_range = "date-range"
|
|
15
|
+
integer = "integer"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AbstractStreamStateConverter(ABC):
|
|
19
|
+
START_KEY = "start"
|
|
20
|
+
END_KEY = "end"
|
|
21
|
+
MOST_RECENT_RECORD_KEY = "most_recent_cursor_value"
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def _from_state_message(self, value: Any) -> Any:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def _to_state_message(self, value: Any) -> Any:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
def __init__(self, is_sequential_state: bool = True):
|
|
32
|
+
self._is_sequential_state = is_sequential_state
|
|
33
|
+
|
|
34
|
+
def convert_to_state_message(
|
|
35
|
+
self, cursor_field: "CursorField", stream_state: MutableMapping[str, Any]
|
|
36
|
+
) -> MutableMapping[str, Any]:
|
|
37
|
+
"""
|
|
38
|
+
Convert the state message from the concurrency-compatible format to the stream's original format.
|
|
39
|
+
|
|
40
|
+
e.g.
|
|
41
|
+
{ "created": "2021-01-18T21:18:20.000Z" }
|
|
42
|
+
"""
|
|
43
|
+
if self.is_state_message_compatible(stream_state) and self._is_sequential_state:
|
|
44
|
+
legacy_state = stream_state.get("legacy", {})
|
|
45
|
+
latest_complete_time = self._get_latest_complete_time(stream_state.get("slices", []))
|
|
46
|
+
if latest_complete_time is not None:
|
|
47
|
+
legacy_state.update(
|
|
48
|
+
{cursor_field.cursor_field_key: self._to_state_message(latest_complete_time)}
|
|
49
|
+
)
|
|
50
|
+
return legacy_state or {}
|
|
51
|
+
else:
|
|
52
|
+
return self.serialize(stream_state, ConcurrencyCompatibleStateType.date_range)
|
|
53
|
+
|
|
54
|
+
def _get_latest_complete_time(self, slices: List[MutableMapping[str, Any]]) -> Any:
|
|
55
|
+
"""
|
|
56
|
+
Get the latest time before which all records have been processed.
|
|
57
|
+
"""
|
|
58
|
+
if not slices:
|
|
59
|
+
raise RuntimeError(
|
|
60
|
+
"Expected at least one slice but there were none. This is unexpected; please contact Support."
|
|
61
|
+
)
|
|
62
|
+
merged_intervals = self.merge_intervals(slices)
|
|
63
|
+
first_interval = merged_intervals[0]
|
|
64
|
+
|
|
65
|
+
return first_interval.get("most_recent_cursor_value") or first_interval[self.START_KEY]
|
|
66
|
+
|
|
67
|
+
def deserialize(self, state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
|
|
68
|
+
"""
|
|
69
|
+
Perform any transformations needed for compatibility with the converter.
|
|
70
|
+
"""
|
|
71
|
+
for stream_slice in state.get("slices", []):
|
|
72
|
+
stream_slice[self.START_KEY] = self._from_state_message(stream_slice[self.START_KEY])
|
|
73
|
+
stream_slice[self.END_KEY] = self._from_state_message(stream_slice[self.END_KEY])
|
|
74
|
+
return state
|
|
75
|
+
|
|
76
|
+
def serialize(
|
|
77
|
+
self, state: MutableMapping[str, Any], state_type: ConcurrencyCompatibleStateType
|
|
78
|
+
) -> MutableMapping[str, Any]:
|
|
79
|
+
"""
|
|
80
|
+
Perform any transformations needed for compatibility with the converter.
|
|
81
|
+
"""
|
|
82
|
+
serialized_slices = []
|
|
83
|
+
for stream_slice in state.get("slices", []):
|
|
84
|
+
serialized_slice = {
|
|
85
|
+
self.START_KEY: self._to_state_message(stream_slice[self.START_KEY]),
|
|
86
|
+
self.END_KEY: self._to_state_message(stream_slice[self.END_KEY]),
|
|
87
|
+
}
|
|
88
|
+
if stream_slice.get(self.MOST_RECENT_RECORD_KEY):
|
|
89
|
+
serialized_slice[self.MOST_RECENT_RECORD_KEY] = self._to_state_message(
|
|
90
|
+
stream_slice[self.MOST_RECENT_RECORD_KEY]
|
|
91
|
+
)
|
|
92
|
+
serialized_slices.append(serialized_slice)
|
|
93
|
+
return {"slices": serialized_slices, "state_type": state_type.value}
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def is_state_message_compatible(state: MutableMapping[str, Any]) -> bool:
|
|
97
|
+
return bool(state) and state.get("state_type") in [
|
|
98
|
+
t.value for t in ConcurrencyCompatibleStateType
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
@abstractmethod
|
|
102
|
+
def convert_from_sequential_state(
|
|
103
|
+
self,
|
|
104
|
+
cursor_field: "CursorField", # to deprecate as it is only needed for sequential state
|
|
105
|
+
stream_state: MutableMapping[str, Any],
|
|
106
|
+
start: Optional[Any],
|
|
107
|
+
) -> Tuple[Any, MutableMapping[str, Any]]:
|
|
108
|
+
"""
|
|
109
|
+
Convert the state message to the format required by the ConcurrentCursor.
|
|
110
|
+
|
|
111
|
+
e.g.
|
|
112
|
+
{
|
|
113
|
+
"state_type": ConcurrencyCompatibleStateType.date_range.value,
|
|
114
|
+
"metadata": { … },
|
|
115
|
+
"slices": [
|
|
116
|
+
{starts: 0, end: 1617030403, finished_processing: true}]
|
|
117
|
+
}
|
|
118
|
+
"""
|
|
119
|
+
...
|
|
120
|
+
|
|
121
|
+
@abstractmethod
|
|
122
|
+
def increment(self, value: Any) -> Any:
|
|
123
|
+
"""
|
|
124
|
+
Increment a timestamp by a single unit.
|
|
125
|
+
"""
|
|
126
|
+
...
|
|
127
|
+
|
|
128
|
+
@abstractmethod
|
|
129
|
+
def output_format(self, value: Any) -> Any:
|
|
130
|
+
"""
|
|
131
|
+
Convert the cursor value type to a JSON valid type.
|
|
132
|
+
"""
|
|
133
|
+
...
|
|
134
|
+
|
|
135
|
+
def merge_intervals(
|
|
136
|
+
self, intervals: List[MutableMapping[str, Any]]
|
|
137
|
+
) -> List[MutableMapping[str, Any]]:
|
|
138
|
+
"""
|
|
139
|
+
Compute and return a list of merged intervals.
|
|
140
|
+
|
|
141
|
+
Intervals may be merged if the start time of the second interval is 1 unit or less (as defined by the
|
|
142
|
+
`increment` method) than the end time of the first interval.
|
|
143
|
+
"""
|
|
144
|
+
if not intervals:
|
|
145
|
+
return []
|
|
146
|
+
|
|
147
|
+
sorted_intervals = sorted(
|
|
148
|
+
intervals, key=lambda interval: (interval[self.START_KEY], interval[self.END_KEY])
|
|
149
|
+
)
|
|
150
|
+
merged_intervals = [sorted_intervals[0]]
|
|
151
|
+
|
|
152
|
+
for current_interval in sorted_intervals[1:]:
|
|
153
|
+
last_interval = merged_intervals[-1]
|
|
154
|
+
last_interval_end = last_interval[self.END_KEY]
|
|
155
|
+
current_interval_start = current_interval[self.START_KEY]
|
|
156
|
+
|
|
157
|
+
if self.increment(last_interval_end) >= current_interval_start:
|
|
158
|
+
last_interval[self.END_KEY] = max(last_interval_end, current_interval[self.END_KEY])
|
|
159
|
+
last_interval_cursor_value = last_interval.get("most_recent_cursor_value")
|
|
160
|
+
current_interval_cursor_value = current_interval.get("most_recent_cursor_value")
|
|
161
|
+
|
|
162
|
+
last_interval["most_recent_cursor_value"] = (
|
|
163
|
+
max(current_interval_cursor_value, last_interval_cursor_value)
|
|
164
|
+
if current_interval_cursor_value and last_interval_cursor_value
|
|
165
|
+
else current_interval_cursor_value or last_interval_cursor_value
|
|
166
|
+
)
|
|
167
|
+
else:
|
|
168
|
+
# Add a new interval if no overlap
|
|
169
|
+
merged_intervals.append(current_interval)
|
|
170
|
+
|
|
171
|
+
return merged_intervals
|
|
172
|
+
|
|
173
|
+
@abstractmethod
|
|
174
|
+
def parse_value(self, value: Any) -> Any:
|
|
175
|
+
"""
|
|
176
|
+
Parse the value of the cursor field into a comparable value.
|
|
177
|
+
"""
|
|
178
|
+
...
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
@abstractmethod
|
|
182
|
+
def zero_value(self) -> Any: ...
|