airbyte-cdk 0.72.1__py3-none-any.whl → 6.17.1.dev0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/__init__.py +355 -6
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +29 -10
- airbyte_cdk/connector.py +24 -24
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
- airbyte_cdk/connector_builder/main.py +45 -13
- airbyte_cdk/connector_builder/message_grouper.py +189 -50
- airbyte_cdk/connector_builder/models.py +3 -2
- airbyte_cdk/destinations/__init__.py +4 -3
- airbyte_cdk/destinations/destination.py +54 -20
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/config.py +40 -17
- airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
- airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
- airbyte_cdk/entrypoint.py +153 -44
- airbyte_cdk/exception_handler.py +21 -3
- airbyte_cdk/logger.py +30 -44
- airbyte_cdk/models/__init__.py +13 -2
- airbyte_cdk/models/airbyte_protocol.py +86 -1
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/__init__.py +5 -1
- airbyte_cdk/sources/abstract_source.py +125 -79
- airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
- airbyte_cdk/sources/config.py +3 -2
- airbyte_cdk/sources/connector_state_manager.py +49 -83
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
- airbyte_cdk/sources/declarative/auth/token.py +28 -10
- airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
- airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
- airbyte_cdk/sources/declarative/declarative_source.py +5 -2
- airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
- airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
- airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
- airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +346 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +173 -74
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
- airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
- airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1759 -225
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
- airbyte_cdk/sources/declarative/spec/spec.py +12 -5
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
- airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
- airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
- airbyte_cdk/sources/declarative/types.py +19 -110
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
- airbyte_cdk/sources/embedded/base_integration.py +16 -5
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +5 -2
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
- airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
- airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
- airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
- airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
- airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +18 -15
- airbyte_cdk/sources/file_based/file_based_source.py +140 -33
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
- airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
- airbyte_cdk/sources/file_based/remote_file.py +1 -1
- airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
- airbyte_cdk/sources/http_logger.py +8 -3
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +42 -38
- airbyte_cdk/sources/streams/__init__.py +2 -2
- airbyte_cdk/sources/streams/availability_strategy.py +54 -3
- airbyte_cdk/sources/streams/call_rate.py +64 -21
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
- airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
- airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
- airbyte_cdk/sources/streams/core.py +412 -87
- airbyte_cdk/sources/streams/http/__init__.py +2 -1
- airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +27 -7
- airbyte_cdk/sources/streams/http/http.py +369 -246
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +154 -0
- airbyte_cdk/sources/utils/record_helper.py +36 -21
- airbyte_cdk/sources/utils/schema_helpers.py +13 -6
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +54 -20
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/catalog_builder.py +70 -18
- airbyte_cdk/test/entrypoint_wrapper.py +117 -42
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/matcher.py +6 -0
- airbyte_cdk/test/mock_http/mocker.py +57 -10
- airbyte_cdk/test/mock_http/request.py +19 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +32 -16
- airbyte_cdk/test/state_builder.py +18 -10
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +2 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +20 -11
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +198 -28
- airbyte_cdk/utils/slice_hasher.py +30 -0
- airbyte_cdk/utils/spec_schema_transformations.py +6 -3
- airbyte_cdk/utils/stream_status_utils.py +8 -1
- airbyte_cdk/utils/traced_exception.py +61 -21
- airbyte_cdk-6.17.1.dev0.dist-info/METADATA +109 -0
- airbyte_cdk-6.17.1.dev0.dist-info/RECORD +350 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/WHEEL +1 -2
- airbyte_cdk-6.17.1.dev0.dist-info/entry_points.txt +3 -0
- airbyte_cdk/sources/declarative/create_partial.py +0 -92
- airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
- airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
- airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
- airbyte_cdk/sources/deprecated/base_source.py +0 -94
- airbyte_cdk/sources/deprecated/client.py +0 -99
- airbyte_cdk/sources/singer/__init__.py +0 -8
- airbyte_cdk/sources/singer/singer_helpers.py +0 -304
- airbyte_cdk/sources/singer/source.py +0 -186
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
- airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
- airbyte_cdk/sources/streams/http/auth/core.py +0 -29
- airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
- airbyte_cdk/sources/streams/http/auth/token.py +0 -47
- airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
- airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
- airbyte_cdk/sources/utils/schema_models.py +0 -84
- airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
- airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
- airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
- source_declarative_manifest/main.py +0 -29
- unit_tests/connector_builder/__init__.py +0 -3
- unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
- unit_tests/connector_builder/test_message_grouper.py +0 -713
- unit_tests/connector_builder/utils.py +0 -27
- unit_tests/destinations/test_destination.py +0 -243
- unit_tests/singer/test_singer_helpers.py +0 -56
- unit_tests/singer/test_singer_source.py +0 -112
- unit_tests/sources/__init__.py +0 -0
- unit_tests/sources/concurrent_source/__init__.py +0 -3
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
- unit_tests/sources/declarative/__init__.py +0 -3
- unit_tests/sources/declarative/auth/__init__.py +0 -3
- unit_tests/sources/declarative/auth/test_oauth.py +0 -331
- unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
- unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
- unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
- unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
- unit_tests/sources/declarative/checks/__init__.py +0 -3
- unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
- unit_tests/sources/declarative/decoders/__init__.py +0 -0
- unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
- unit_tests/sources/declarative/external_component.py +0 -13
- unit_tests/sources/declarative/extractors/__init__.py +0 -3
- unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
- unit_tests/sources/declarative/incremental/__init__.py +0 -0
- unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
- unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
- unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
- unit_tests/sources/declarative/interpolation/__init__.py +0 -3
- unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
- unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
- unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
- unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
- unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
- unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
- unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
- unit_tests/sources/declarative/parsers/__init__.py +0 -3
- unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
- unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
- unit_tests/sources/declarative/parsers/testing_components.py +0 -36
- unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
- unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
- unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
- unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
- unit_tests/sources/declarative/requesters/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
- unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
- unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
- unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
- unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
- unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
- unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
- unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
- unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
- unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
- unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
- unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
- unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
- unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
- unit_tests/sources/declarative/retrievers/__init__.py +0 -3
- unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
- unit_tests/sources/declarative/schema/__init__.py +0 -6
- unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
- unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
- unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
- unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
- unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
- unit_tests/sources/declarative/states/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
- unit_tests/sources/declarative/test_create_partial.py +0 -83
- unit_tests/sources/declarative/test_declarative_stream.py +0 -103
- unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
- unit_tests/sources/declarative/test_types.py +0 -39
- unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
- unit_tests/sources/file_based/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
- unit_tests/sources/file_based/config/__init__.py +0 -0
- unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
- unit_tests/sources/file_based/config/test_csv_format.py +0 -34
- unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
- unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
- unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
- unit_tests/sources/file_based/file_types/__init__.py +0 -0
- unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
- unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
- unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
- unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
- unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
- unit_tests/sources/file_based/helpers.py +0 -70
- unit_tests/sources/file_based/in_memory_files_source.py +0 -211
- unit_tests/sources/file_based/scenarios/__init__.py +0 -0
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
- unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
- unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
- unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
- unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
- unit_tests/sources/file_based/stream/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
- unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
- unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
- unit_tests/sources/file_based/test_scenarios.py +0 -253
- unit_tests/sources/file_based/test_schema_helpers.py +0 -346
- unit_tests/sources/fixtures/__init__.py +0 -3
- unit_tests/sources/fixtures/source_test_fixture.py +0 -153
- unit_tests/sources/message/__init__.py +0 -0
- unit_tests/sources/message/test_repository.py +0 -153
- unit_tests/sources/streams/__init__.py +0 -0
- unit_tests/sources/streams/concurrent/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
- unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
- unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
- unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
- unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
- unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
- unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
- unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
- unit_tests/sources/streams/http/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/test_auth.py +0 -173
- unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
- unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
- unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
- unit_tests/sources/streams/http/test_http.py +0 -635
- unit_tests/sources/streams/test_availability_strategy.py +0 -70
- unit_tests/sources/streams/test_call_rate.py +0 -300
- unit_tests/sources/streams/test_stream_read.py +0 -405
- unit_tests/sources/streams/test_streams_core.py +0 -184
- unit_tests/sources/test_abstract_source.py +0 -1442
- unit_tests/sources/test_concurrent_source.py +0 -112
- unit_tests/sources/test_config.py +0 -92
- unit_tests/sources/test_connector_state_manager.py +0 -482
- unit_tests/sources/test_http_logger.py +0 -252
- unit_tests/sources/test_integration_source.py +0 -86
- unit_tests/sources/test_source.py +0 -684
- unit_tests/sources/test_source_read.py +0 -460
- unit_tests/test/__init__.py +0 -0
- unit_tests/test/mock_http/__init__.py +0 -0
- unit_tests/test/mock_http/test_matcher.py +0 -53
- unit_tests/test/mock_http/test_mocker.py +0 -214
- unit_tests/test/mock_http/test_request.py +0 -117
- unit_tests/test/mock_http/test_response_builder.py +0 -177
- unit_tests/test/test_entrypoint_wrapper.py +0 -240
- unit_tests/utils/__init__.py +0 -0
- unit_tests/utils/test_datetime_format_inferrer.py +0 -60
- unit_tests/utils/test_mapping_helpers.py +0 -54
- unit_tests/utils/test_message_utils.py +0 -91
- unit_tests/utils/test_rate_limiting.py +0 -26
- unit_tests/utils/test_schema_inferrer.py +0 -202
- unit_tests/utils/test_secret_utils.py +0 -135
- unit_tests/utils/test_stream_status_utils.py +0 -61
- unit_tests/utils/test_traced_exception.py +0 -107
- /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
- {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
- {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
- {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/LICENSE.txt +0 -0
@@ -0,0 +1,335 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
from enum import Enum
|
5
|
+
from typing import Any, Iterable, Mapping, Optional
|
6
|
+
|
7
|
+
from airbyte_cdk.sources.types import StreamSlice
|
8
|
+
|
9
|
+
from .cursor import Cursor
|
10
|
+
|
11
|
+
|
12
|
+
class CheckpointMode(Enum):
|
13
|
+
INCREMENTAL = "incremental"
|
14
|
+
RESUMABLE_FULL_REFRESH = "resumable_full_refresh"
|
15
|
+
FULL_REFRESH = "full_refresh"
|
16
|
+
|
17
|
+
|
18
|
+
FULL_REFRESH_COMPLETE_STATE: Mapping[str, Any] = {"__ab_full_refresh_sync_complete": True}
|
19
|
+
|
20
|
+
|
21
|
+
class CheckpointReader(ABC):
|
22
|
+
"""
|
23
|
+
CheckpointReader manages how to iterate over a stream's partitions and serves as the bridge for interpreting the current state
|
24
|
+
of the stream that should be emitted back to the platform.
|
25
|
+
"""
|
26
|
+
|
27
|
+
@abstractmethod
|
28
|
+
def next(self) -> Optional[Mapping[str, Any]]:
|
29
|
+
"""
|
30
|
+
Returns the next slice that will be used to fetch the next group of records. Returning None indicates that the reader
|
31
|
+
has finished iterating over all slices.
|
32
|
+
"""
|
33
|
+
|
34
|
+
@abstractmethod
|
35
|
+
def observe(self, new_state: Mapping[str, Any]) -> None:
|
36
|
+
"""
|
37
|
+
Updates the internal state of the checkpoint reader based on the incoming stream state from a connector.
|
38
|
+
|
39
|
+
WARNING: This is used to retain backwards compatibility with streams using the legacy get_stream_state() method.
|
40
|
+
In order to uptake Resumable Full Refresh, connectors must migrate streams to use the state setter/getter methods.
|
41
|
+
"""
|
42
|
+
|
43
|
+
@abstractmethod
|
44
|
+
def get_checkpoint(self) -> Optional[Mapping[str, Any]]:
|
45
|
+
"""
|
46
|
+
Retrieves the current state value of the stream. The connector does not emit state messages if the checkpoint value is None.
|
47
|
+
"""
|
48
|
+
|
49
|
+
|
50
|
+
class IncrementalCheckpointReader(CheckpointReader):
|
51
|
+
"""
|
52
|
+
IncrementalCheckpointReader handles iterating through a stream based on partitioned windows of data that are determined
|
53
|
+
before syncing data.
|
54
|
+
"""
|
55
|
+
|
56
|
+
def __init__(
|
57
|
+
self, stream_state: Mapping[str, Any], stream_slices: Iterable[Optional[Mapping[str, Any]]]
|
58
|
+
):
|
59
|
+
self._state: Optional[Mapping[str, Any]] = stream_state
|
60
|
+
self._stream_slices = iter(stream_slices)
|
61
|
+
self._has_slices = False
|
62
|
+
|
63
|
+
def next(self) -> Optional[Mapping[str, Any]]:
|
64
|
+
try:
|
65
|
+
next_slice = next(self._stream_slices)
|
66
|
+
self._has_slices = True
|
67
|
+
return next_slice
|
68
|
+
except StopIteration:
|
69
|
+
# This is used to avoid sending a duplicate state message at the end of a sync since the stream has already
|
70
|
+
# emitted state at the end of each slice. If we want to avoid this extra complexity, we can also just accept
|
71
|
+
# that every sync emits a final duplicate state
|
72
|
+
if self._has_slices:
|
73
|
+
self._state = None
|
74
|
+
return None
|
75
|
+
|
76
|
+
def observe(self, new_state: Mapping[str, Any]) -> None:
|
77
|
+
self._state = new_state
|
78
|
+
|
79
|
+
def get_checkpoint(self) -> Optional[Mapping[str, Any]]:
|
80
|
+
return self._state
|
81
|
+
|
82
|
+
|
83
|
+
class CursorBasedCheckpointReader(CheckpointReader):
|
84
|
+
"""
|
85
|
+
CursorBasedCheckpointReader is used by streams that implement a Cursor in order to manage state. This allows the checkpoint
|
86
|
+
reader to delegate the complexity of fetching state to the cursor and focus on the iteration over a stream's partitions.
|
87
|
+
|
88
|
+
This reader supports the Cursor interface used by Python and low-code sources. Not to be confused with Cursor interface
|
89
|
+
that belongs to the Concurrent CDK.
|
90
|
+
"""
|
91
|
+
|
92
|
+
def __init__(
|
93
|
+
self,
|
94
|
+
cursor: Cursor,
|
95
|
+
stream_slices: Iterable[Optional[Mapping[str, Any]]],
|
96
|
+
read_state_from_cursor: bool = False,
|
97
|
+
):
|
98
|
+
self._cursor = cursor
|
99
|
+
self._stream_slices = iter(stream_slices)
|
100
|
+
# read_state_from_cursor is used to delineate that partitions should determine when to stop syncing dynamically according
|
101
|
+
# to the value of the state at runtime. This currently only applies to streams that use resumable full refresh.
|
102
|
+
self._read_state_from_cursor = read_state_from_cursor
|
103
|
+
self._current_slice: Optional[StreamSlice] = None
|
104
|
+
self._finished_sync = False
|
105
|
+
self._previous_state: Optional[Mapping[str, Any]] = None
|
106
|
+
|
107
|
+
def next(self) -> Optional[Mapping[str, Any]]:
|
108
|
+
try:
|
109
|
+
self.current_slice = self._find_next_slice()
|
110
|
+
return self.current_slice
|
111
|
+
except StopIteration:
|
112
|
+
self._finished_sync = True
|
113
|
+
return None
|
114
|
+
|
115
|
+
def observe(self, new_state: Mapping[str, Any]) -> None:
|
116
|
+
# Cursor based checkpoint readers don't need to observe the new state because it has already been updated by the cursor
|
117
|
+
# while processing records
|
118
|
+
pass
|
119
|
+
|
120
|
+
def get_checkpoint(self) -> Optional[Mapping[str, Any]]:
|
121
|
+
# This is used to avoid sending a duplicate state messages
|
122
|
+
new_state = self._cursor.get_stream_state()
|
123
|
+
if new_state != self._previous_state:
|
124
|
+
self._previous_state = new_state
|
125
|
+
return new_state
|
126
|
+
else:
|
127
|
+
return None
|
128
|
+
|
129
|
+
def _find_next_slice(self) -> StreamSlice:
|
130
|
+
"""
|
131
|
+
_find_next_slice() returns the next slice of data should be synced for the current stream according to its cursor.
|
132
|
+
This function supports iterating over a stream's slices across two dimensions. The first dimension is the stream's
|
133
|
+
partitions like parent records for a substream. The inner dimension iterates over the cursor value like a date
|
134
|
+
range for incremental streams or a pagination checkpoint for resumable full refresh.
|
135
|
+
|
136
|
+
The basic algorithm for iterating through a stream's slices is:
|
137
|
+
1. The first time next() is invoked we get the first partition
|
138
|
+
2. If the current partition is already complete as a result of a previous sync attempt, continue iterating until
|
139
|
+
we find an un-synced partition.
|
140
|
+
2. For streams whose cursor value is determined dynamically using stream state
|
141
|
+
1. Get the state for the current partition
|
142
|
+
2. If the current partition's state is complete, continue iterating over partitions
|
143
|
+
3. If the current partition's state is still in progress, emit the next cursor value
|
144
|
+
4. If the current partition is complete as delineated by the sentinel value, get the next incomplete partition
|
145
|
+
3. When stream has processed all partitions, the iterator will raise a StopIteration exception signaling there are no more
|
146
|
+
slices left for extracting more records.
|
147
|
+
"""
|
148
|
+
|
149
|
+
if self._read_state_from_cursor:
|
150
|
+
if self.current_slice is None:
|
151
|
+
# current_slice is None represents the first time we are iterating over a stream's slices. The first slice to
|
152
|
+
# sync not been assigned yet and must first be read from the iterator
|
153
|
+
next_slice = self.read_and_convert_slice()
|
154
|
+
state_for_slice = self._cursor.select_state(next_slice)
|
155
|
+
if state_for_slice == FULL_REFRESH_COMPLETE_STATE:
|
156
|
+
# Skip every slice that already has the terminal complete value indicating that a previous attempt
|
157
|
+
# successfully synced the slice
|
158
|
+
has_more = True
|
159
|
+
while has_more:
|
160
|
+
next_slice = self.read_and_convert_slice()
|
161
|
+
state_for_slice = self._cursor.select_state(next_slice)
|
162
|
+
has_more = state_for_slice == FULL_REFRESH_COMPLETE_STATE
|
163
|
+
return StreamSlice(
|
164
|
+
cursor_slice=state_for_slice or {},
|
165
|
+
partition=next_slice.partition,
|
166
|
+
extra_fields=next_slice.extra_fields,
|
167
|
+
)
|
168
|
+
else:
|
169
|
+
state_for_slice = self._cursor.select_state(self.current_slice)
|
170
|
+
if state_for_slice == FULL_REFRESH_COMPLETE_STATE:
|
171
|
+
# If the current slice is is complete, move to the next slice and skip the next slices that already
|
172
|
+
# have the terminal complete value indicating that a previous attempt was successfully read.
|
173
|
+
# Dummy initialization for mypy since we'll iterate at least once to get the next slice
|
174
|
+
next_candidate_slice = StreamSlice(cursor_slice={}, partition={})
|
175
|
+
has_more = True
|
176
|
+
while has_more:
|
177
|
+
next_candidate_slice = self.read_and_convert_slice()
|
178
|
+
state_for_slice = self._cursor.select_state(next_candidate_slice)
|
179
|
+
has_more = state_for_slice == FULL_REFRESH_COMPLETE_STATE
|
180
|
+
return StreamSlice(
|
181
|
+
cursor_slice=state_for_slice or {},
|
182
|
+
partition=next_candidate_slice.partition,
|
183
|
+
extra_fields=next_candidate_slice.extra_fields,
|
184
|
+
)
|
185
|
+
# The reader continues to process the current partition if it's state is still in progress
|
186
|
+
return StreamSlice(
|
187
|
+
cursor_slice=state_for_slice or {},
|
188
|
+
partition=self.current_slice.partition,
|
189
|
+
extra_fields=self.current_slice.extra_fields,
|
190
|
+
)
|
191
|
+
else:
|
192
|
+
# Unlike RFR cursors that iterate dynamically according to how stream state is updated, most cursors operate
|
193
|
+
# on a fixed set of slices determined before reading records. They just iterate to the next slice
|
194
|
+
return self.read_and_convert_slice()
|
195
|
+
|
196
|
+
@property
|
197
|
+
def current_slice(self) -> Optional[StreamSlice]:
|
198
|
+
return self._current_slice
|
199
|
+
|
200
|
+
@current_slice.setter
|
201
|
+
def current_slice(self, value: StreamSlice) -> None:
|
202
|
+
self._current_slice = value
|
203
|
+
|
204
|
+
def read_and_convert_slice(self) -> StreamSlice:
|
205
|
+
next_slice = next(self._stream_slices)
|
206
|
+
if not isinstance(next_slice, StreamSlice):
|
207
|
+
raise ValueError(
|
208
|
+
f"{self.current_slice} should be of type StreamSlice. This is likely a bug in the CDK, please contact Airbyte support"
|
209
|
+
)
|
210
|
+
return next_slice
|
211
|
+
|
212
|
+
|
213
|
+
class LegacyCursorBasedCheckpointReader(CursorBasedCheckpointReader):
|
214
|
+
"""
|
215
|
+
This (unfortunate) class operates like an adapter to retain backwards compatibility with legacy sources that take in stream_slice
|
216
|
+
in the form of a Mapping instead of the StreamSlice object. Internally, the reader still operates over StreamSlices, but it
|
217
|
+
is instantiated with and emits stream slices in the form of a Mapping[str, Any]. The logic of how partitions and cursors
|
218
|
+
are iterated over is synonymous with CursorBasedCheckpointReader.
|
219
|
+
|
220
|
+
We also retain the existing top level fields defined by the connector so the fields are present on dependent methods. For example,
|
221
|
+
the resulting mapping structure passed back to the stream's read_records() method looks like:
|
222
|
+
{
|
223
|
+
"cursor_slice": {
|
224
|
+
"next_page_token": 10
|
225
|
+
},
|
226
|
+
"partition": {
|
227
|
+
"repository": "airbytehq/airbyte"
|
228
|
+
},
|
229
|
+
"next_page_token": 10,
|
230
|
+
"repository": "airbytehq/airbyte"
|
231
|
+
}
|
232
|
+
"""
|
233
|
+
|
234
|
+
def __init__(
|
235
|
+
self,
|
236
|
+
cursor: Cursor,
|
237
|
+
stream_slices: Iterable[Optional[Mapping[str, Any]]],
|
238
|
+
read_state_from_cursor: bool = False,
|
239
|
+
):
|
240
|
+
super().__init__(
|
241
|
+
cursor=cursor,
|
242
|
+
stream_slices=stream_slices,
|
243
|
+
read_state_from_cursor=read_state_from_cursor,
|
244
|
+
)
|
245
|
+
|
246
|
+
def next(self) -> Optional[Mapping[str, Any]]:
|
247
|
+
try:
|
248
|
+
self.current_slice = self._find_next_slice()
|
249
|
+
|
250
|
+
if "partition" in dict(self.current_slice):
|
251
|
+
raise ValueError("Stream is configured to use invalid stream slice key 'partition'")
|
252
|
+
elif "cursor_slice" in dict(self.current_slice):
|
253
|
+
raise ValueError(
|
254
|
+
"Stream is configured to use invalid stream slice key 'cursor_slice'"
|
255
|
+
)
|
256
|
+
|
257
|
+
# We convert StreamSlice to a regular mapping because legacy connectors operate on the basic Mapping object. We
|
258
|
+
# also duplicate all fields at the top level for backwards compatibility for existing Python sources
|
259
|
+
return {
|
260
|
+
"partition": self.current_slice.partition,
|
261
|
+
"cursor_slice": self.current_slice.cursor_slice,
|
262
|
+
**dict(self.current_slice),
|
263
|
+
}
|
264
|
+
except StopIteration:
|
265
|
+
self._finished_sync = True
|
266
|
+
return None
|
267
|
+
|
268
|
+
def read_and_convert_slice(self) -> StreamSlice:
|
269
|
+
next_mapping_slice = next(self._stream_slices)
|
270
|
+
if not isinstance(next_mapping_slice, Mapping):
|
271
|
+
raise ValueError(
|
272
|
+
f"{self.current_slice} should be of type Mapping. This is likely a bug in the CDK, please contact Airbyte support"
|
273
|
+
)
|
274
|
+
|
275
|
+
# The legacy reader is instantiated with an iterable of stream slice mappings. We convert each into a StreamSlice
|
276
|
+
# to sanely process them during the sync and to reuse the existing Python defined cursors
|
277
|
+
return StreamSlice(
|
278
|
+
partition=next_mapping_slice,
|
279
|
+
cursor_slice={},
|
280
|
+
)
|
281
|
+
|
282
|
+
|
283
|
+
class ResumableFullRefreshCheckpointReader(CheckpointReader):
|
284
|
+
"""
|
285
|
+
ResumableFullRefreshCheckpointReader allows for iteration over an unbounded set of records based on the pagination strategy
|
286
|
+
of the stream. Because the number of pages is unknown, the stream's current state is used to determine whether to continue
|
287
|
+
fetching more pages or stopping the sync.
|
288
|
+
"""
|
289
|
+
|
290
|
+
def __init__(self, stream_state: Mapping[str, Any]):
|
291
|
+
# The first attempt of an RFR stream has an empty {} incoming state, but should still make a first attempt to read records
|
292
|
+
# from the first page in next().
|
293
|
+
self._first_page = bool(stream_state == {})
|
294
|
+
self._state: Mapping[str, Any] = stream_state
|
295
|
+
|
296
|
+
def next(self) -> Optional[Mapping[str, Any]]:
|
297
|
+
if self._first_page:
|
298
|
+
self._first_page = False
|
299
|
+
return self._state
|
300
|
+
elif self._state == FULL_REFRESH_COMPLETE_STATE:
|
301
|
+
return None
|
302
|
+
else:
|
303
|
+
return self._state
|
304
|
+
|
305
|
+
def observe(self, new_state: Mapping[str, Any]) -> None:
|
306
|
+
self._state = new_state
|
307
|
+
|
308
|
+
def get_checkpoint(self) -> Optional[Mapping[str, Any]]:
|
309
|
+
return self._state or {}
|
310
|
+
|
311
|
+
|
312
|
+
class FullRefreshCheckpointReader(CheckpointReader):
|
313
|
+
"""
|
314
|
+
FullRefreshCheckpointReader iterates over data that cannot be checkpointed incrementally during the sync because the stream
|
315
|
+
is not capable of managing state. At the end of a sync, a final state message is emitted to signal completion.
|
316
|
+
"""
|
317
|
+
|
318
|
+
def __init__(self, stream_slices: Iterable[Optional[Mapping[str, Any]]]):
|
319
|
+
self._stream_slices = iter(stream_slices)
|
320
|
+
self._final_checkpoint = False
|
321
|
+
|
322
|
+
def next(self) -> Optional[Mapping[str, Any]]:
|
323
|
+
try:
|
324
|
+
return next(self._stream_slices)
|
325
|
+
except StopIteration:
|
326
|
+
self._final_checkpoint = True
|
327
|
+
return None
|
328
|
+
|
329
|
+
def observe(self, new_state: Mapping[str, Any]) -> None:
|
330
|
+
pass
|
331
|
+
|
332
|
+
def get_checkpoint(self) -> Optional[Mapping[str, Any]]:
|
333
|
+
if self._final_checkpoint:
|
334
|
+
return {"__ab_no_cursor_state_message": True}
|
335
|
+
return None
|
@@ -3,16 +3,15 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
|
-
from typing import Optional
|
6
|
+
from typing import Any, Optional
|
7
7
|
|
8
|
-
from airbyte_cdk.sources.
|
9
|
-
from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState
|
8
|
+
from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
10
9
|
|
11
10
|
|
12
|
-
class Cursor(ABC
|
11
|
+
class Cursor(ABC):
|
13
12
|
"""
|
14
|
-
Cursors are components that allow for
|
15
|
-
that information.
|
13
|
+
Cursors are components that allow for checkpointing the current state of a sync. They keep track of what data has been consumed
|
14
|
+
and allows for syncs to be resumed from a specific point based on that information.
|
16
15
|
"""
|
17
16
|
|
18
17
|
@abstractmethod
|
@@ -35,17 +34,13 @@ class Cursor(ABC, StreamSlicer):
|
|
35
34
|
pass
|
36
35
|
|
37
36
|
@abstractmethod
|
38
|
-
def close_slice(self, stream_slice: StreamSlice,
|
37
|
+
def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
|
39
38
|
"""
|
40
|
-
Update state based on the stream slice
|
41
|
-
|
42
|
-
|
39
|
+
Update state based on the stream slice. Note that `stream_slice.cursor_slice` and `most_recent_record.associated_slice` are expected
|
40
|
+
to be the same but we make it explicit here that `stream_slice` should be leveraged to update the state. We do not pass in the
|
41
|
+
latest record, since cursor instances should maintain the relevant internal state on their own.
|
43
42
|
|
44
43
|
:param stream_slice: slice to close
|
45
|
-
:param most_recent_record: the latest record we have received for the slice. This is important to consider because even if the
|
46
|
-
cursor emits a slice, some APIs are not able to enforce the upper boundary. The outcome is that the last_record might have a
|
47
|
-
higher cursor value than the slice upper boundary and if we want to reduce the duplication as much as possible, we need to
|
48
|
-
consider the highest value between the internal cursor, the stream slice upper boundary and the record cursor value.
|
49
44
|
"""
|
50
45
|
|
51
46
|
@abstractmethod
|
@@ -72,3 +67,11 @@ class Cursor(ABC, StreamSlicer):
|
|
72
67
|
"""
|
73
68
|
Evaluating which record is greater in terms of cursor. This is used to avoid having to capture all the records to close a slice
|
74
69
|
"""
|
70
|
+
|
71
|
+
@abstractmethod
|
72
|
+
def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
|
73
|
+
"""
|
74
|
+
Get the state value of a specific stream_slice. For incremental or resumable full refresh cursors which only manage state in
|
75
|
+
a single dimension this is the entire state object. For per-partition cursors used by substreams, this returns the state of
|
76
|
+
a specific parent delineated by the incoming slice's partition object.
|
77
|
+
"""
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
import json
|
4
|
+
from typing import Any, Mapping
|
5
|
+
|
6
|
+
|
7
|
+
class PerPartitionKeySerializer:
|
8
|
+
"""
|
9
|
+
We are concerned of the performance of looping through the `states` list and evaluating equality on the partition. To reduce this
|
10
|
+
concern, we wanted to use dictionaries to map `partition -> cursor`. However, partitions are dict and dict can't be used as dict keys
|
11
|
+
since they are not hashable. By creating json string using the dict, we can have a use the dict as a key to the dict since strings are
|
12
|
+
hashable.
|
13
|
+
"""
|
14
|
+
|
15
|
+
@staticmethod
|
16
|
+
def to_partition_key(to_serialize: Any) -> str:
|
17
|
+
# separators have changed in Python 3.4. To avoid being impacted by further change, we explicitly specify our own value
|
18
|
+
return json.dumps(to_serialize, indent=None, separators=(",", ":"), sort_keys=True)
|
19
|
+
|
20
|
+
@staticmethod
|
21
|
+
def to_partition(to_deserialize: Any) -> Mapping[str, Any]:
|
22
|
+
return json.loads(to_deserialize) # type: ignore # The partition is known to be a dict, but the type hint is Any
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from typing import Any, Optional
|
5
|
+
|
6
|
+
from airbyte_cdk.sources.streams.checkpoint import Cursor
|
7
|
+
from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class ResumableFullRefreshCursor(Cursor):
|
12
|
+
"""
|
13
|
+
Cursor that allows for the checkpointing of sync progress according to a synthetic cursor based on the pagination state
|
14
|
+
of the stream. Resumable full refresh syncs are only intended to retain state in between sync attempts of the same job
|
15
|
+
with the platform responsible for removing said state.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def __init__(self) -> None:
|
19
|
+
self._cursor: StreamState = {}
|
20
|
+
|
21
|
+
def get_stream_state(self) -> StreamState:
|
22
|
+
return self._cursor
|
23
|
+
|
24
|
+
def set_initial_state(self, stream_state: StreamState) -> None:
|
25
|
+
self._cursor = stream_state
|
26
|
+
|
27
|
+
def observe(self, stream_slice: StreamSlice, record: Record) -> None:
|
28
|
+
"""
|
29
|
+
Resumable full refresh manages state using a page number so it does not need to update state by observing incoming records.
|
30
|
+
"""
|
31
|
+
pass
|
32
|
+
|
33
|
+
def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
|
34
|
+
self._cursor = stream_slice.cursor_slice
|
35
|
+
|
36
|
+
def should_be_synced(self, record: Record) -> bool:
|
37
|
+
"""
|
38
|
+
Unlike date-based cursors which filter out records outside slice boundaries, resumable full refresh records exist within pages
|
39
|
+
that don't have filterable bounds. We should always return them.
|
40
|
+
"""
|
41
|
+
return True
|
42
|
+
|
43
|
+
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
44
|
+
"""
|
45
|
+
RFR record don't have ordering to be compared between one another.
|
46
|
+
"""
|
47
|
+
return False
|
48
|
+
|
49
|
+
def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
|
50
|
+
# A top-level RFR cursor only manages the state of a single partition
|
51
|
+
return self._cursor
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from typing import Any, Mapping, MutableMapping, Optional
|
5
|
+
|
6
|
+
from airbyte_cdk.models import FailureType
|
7
|
+
from airbyte_cdk.sources.streams.checkpoint import Cursor
|
8
|
+
from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
|
9
|
+
PerPartitionKeySerializer,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
12
|
+
from airbyte_cdk.utils import AirbyteTracedException
|
13
|
+
|
14
|
+
FULL_REFRESH_COMPLETE_STATE: Mapping[str, Any] = {"__ab_full_refresh_sync_complete": True}
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class SubstreamResumableFullRefreshCursor(Cursor):
|
19
|
+
def __init__(self) -> None:
|
20
|
+
self._per_partition_state: MutableMapping[str, StreamState] = {}
|
21
|
+
self._partition_serializer = PerPartitionKeySerializer()
|
22
|
+
|
23
|
+
def get_stream_state(self) -> StreamState:
|
24
|
+
return {"states": list(self._per_partition_state.values())}
|
25
|
+
|
26
|
+
def set_initial_state(self, stream_state: StreamState) -> None:
|
27
|
+
"""
|
28
|
+
Set the initial state for the cursors.
|
29
|
+
|
30
|
+
This method initializes the state for each partition cursor using the provided stream state.
|
31
|
+
If a partition state is provided in the stream state, it will update the corresponding partition cursor with this state.
|
32
|
+
|
33
|
+
To simplify processing and state management, we do not maintain the checkpointed state of the parent partitions.
|
34
|
+
Instead, we are tracking whether a parent has already successfully synced on a prior attempt and skipping over it
|
35
|
+
allowing the sync to continue making progress. And this works for RFR because the platform will dispose of this
|
36
|
+
state on the next sync job.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
|
40
|
+
{
|
41
|
+
"states": [
|
42
|
+
{
|
43
|
+
"partition": {
|
44
|
+
"partition_key": "value_0"
|
45
|
+
},
|
46
|
+
"cursor": {
|
47
|
+
"__ab_full_refresh_sync_complete": True
|
48
|
+
}
|
49
|
+
},
|
50
|
+
{
|
51
|
+
"partition": {
|
52
|
+
"partition_key": "value_1"
|
53
|
+
},
|
54
|
+
"cursor": {},
|
55
|
+
},
|
56
|
+
]
|
57
|
+
}
|
58
|
+
"""
|
59
|
+
if not stream_state:
|
60
|
+
return
|
61
|
+
|
62
|
+
if "states" not in stream_state:
|
63
|
+
raise AirbyteTracedException(
|
64
|
+
internal_message=f"Could not sync parse the following state: {stream_state}",
|
65
|
+
message="The state for is format invalid. Validate that the migration steps included a reset and that it was performed "
|
66
|
+
"properly. Otherwise, please contact Airbyte support.",
|
67
|
+
failure_type=FailureType.config_error,
|
68
|
+
)
|
69
|
+
|
70
|
+
for state in stream_state["states"]:
|
71
|
+
self._per_partition_state[self._to_partition_key(state["partition"])] = state
|
72
|
+
|
73
|
+
def observe(self, stream_slice: StreamSlice, record: Record) -> None:
|
74
|
+
"""
|
75
|
+
Substream resumable full refresh manages state by closing the slice after syncing a parent so observe is not used.
|
76
|
+
"""
|
77
|
+
pass
|
78
|
+
|
79
|
+
def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
|
80
|
+
self._per_partition_state[self._to_partition_key(stream_slice.partition)] = {
|
81
|
+
"partition": stream_slice.partition,
|
82
|
+
"cursor": FULL_REFRESH_COMPLETE_STATE,
|
83
|
+
}
|
84
|
+
|
85
|
+
def should_be_synced(self, record: Record) -> bool:
|
86
|
+
"""
|
87
|
+
Unlike date-based cursors which filter out records outside slice boundaries, resumable full refresh records exist within pages
|
88
|
+
that don't have filterable bounds. We should always return them.
|
89
|
+
"""
|
90
|
+
return True
|
91
|
+
|
92
|
+
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
93
|
+
"""
|
94
|
+
RFR record don't have ordering to be compared between one another.
|
95
|
+
"""
|
96
|
+
return False
|
97
|
+
|
98
|
+
def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
|
99
|
+
if not stream_slice:
|
100
|
+
raise ValueError("A partition needs to be provided in order to extract a state")
|
101
|
+
|
102
|
+
return self._per_partition_state.get(
|
103
|
+
self._to_partition_key(stream_slice.partition), {}
|
104
|
+
).get("cursor")
|
105
|
+
|
106
|
+
def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
|
107
|
+
return self._partition_serializer.to_partition_key(partition)
|
108
|
+
|
109
|
+
def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
|
110
|
+
return self._partition_serializer.to_partition(partition_key)
|
@@ -0,0 +1,7 @@
|
|
1
|
+
## Breaking Changes & Limitations
|
2
|
+
|
3
|
+
- [bigger scope than Concurrent CDK] checkpointing state was acting on the number of records per slice. This has been changed to consider the number of records per syncs
|
4
|
+
- `Source.read_state` and `Source._emit_legacy_state_format` are now classmethods to allow for developers to have access to the state before instantiating the source
|
5
|
+
- send_per_stream_state is always True for Concurrent CDK
|
6
|
+
- Using stream_state during read_records: The concern is that today, stream_instance.get_updated_state is called on every record and read_records on every slice. The implication is that the argument stream_state passed to read_records will have the value after the last stream_instance.get_updated_state of the previous slice. For Concurrent CDK, this is not possible as slices are processed in an unordered way.
|
7
|
+
- Cursor fields can only be data-time formatted as epoch. Eventually, we want to move to ISO 8601 as it provides more flexibility but for the first iteration on Stripe, it was easier to use the same format that was already used
|
@@ -5,14 +5,19 @@
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from typing import Any, Iterable, Mapping, Optional
|
7
7
|
|
8
|
+
from typing_extensions import deprecated
|
9
|
+
|
8
10
|
from airbyte_cdk.models import AirbyteStream
|
11
|
+
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
9
12
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability
|
10
13
|
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
11
14
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
12
|
-
from deprecated.classic import deprecated
|
13
15
|
|
14
16
|
|
15
|
-
@deprecated(
|
17
|
+
@deprecated(
|
18
|
+
"This class is experimental. Use at your own risk.",
|
19
|
+
category=ExperimentalClassWarning,
|
20
|
+
)
|
16
21
|
class AbstractStream(ABC):
|
17
22
|
"""
|
18
23
|
AbstractStream is an experimental interface for streams developed as part of the Concurrent CDK.
|