airbyte-cdk 0.72.0__py3-none-any.whl → 6.13.1.dev4106__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/__init__.py +355 -6
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +29 -10
- airbyte_cdk/connector.py +24 -24
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
- airbyte_cdk/connector_builder/main.py +45 -13
- airbyte_cdk/connector_builder/message_grouper.py +189 -50
- airbyte_cdk/connector_builder/models.py +3 -2
- airbyte_cdk/destinations/__init__.py +4 -3
- airbyte_cdk/destinations/destination.py +54 -20
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/config.py +40 -17
- airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
- airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
- airbyte_cdk/entrypoint.py +153 -44
- airbyte_cdk/exception_handler.py +21 -3
- airbyte_cdk/logger.py +30 -44
- airbyte_cdk/models/__init__.py +13 -2
- airbyte_cdk/models/airbyte_protocol.py +86 -1
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/__init__.py +5 -1
- airbyte_cdk/sources/abstract_source.py +125 -79
- airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
- airbyte_cdk/sources/config.py +3 -2
- airbyte_cdk/sources/connector_state_manager.py +49 -83
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
- airbyte_cdk/sources/declarative/auth/token.py +28 -10
- airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
- airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1213 -88
- airbyte_cdk/sources/declarative/declarative_source.py +5 -2
- airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
- airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
- airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +65 -8
- airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +25 -3
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +159 -74
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
- airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
- airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1329 -595
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1699 -226
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +228 -72
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
- airbyte_cdk/sources/declarative/spec/spec.py +12 -5
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
- airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
- airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
- airbyte_cdk/sources/declarative/types.py +19 -110
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
- airbyte_cdk/sources/embedded/base_integration.py +16 -5
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +5 -2
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +58 -10
- airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
- airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
- airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
- airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
- airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +52 -15
- airbyte_cdk/sources/file_based/file_based_source.py +163 -33
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +83 -5
- airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +145 -41
- airbyte_cdk/sources/file_based/remote_file.py +1 -1
- airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +175 -45
- airbyte_cdk/sources/http_logger.py +8 -3
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +42 -38
- airbyte_cdk/sources/streams/__init__.py +2 -2
- airbyte_cdk/sources/streams/availability_strategy.py +54 -3
- airbyte_cdk/sources/streams/call_rate.py +64 -21
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
- airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
- airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
- airbyte_cdk/sources/streams/core.py +412 -87
- airbyte_cdk/sources/streams/http/__init__.py +2 -1
- airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +27 -7
- airbyte_cdk/sources/streams/http/http.py +369 -246
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +154 -0
- airbyte_cdk/sources/utils/record_helper.py +36 -21
- airbyte_cdk/sources/utils/schema_helpers.py +13 -6
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +54 -20
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/catalog_builder.py +70 -18
- airbyte_cdk/test/entrypoint_wrapper.py +117 -42
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/matcher.py +6 -0
- airbyte_cdk/test/mock_http/mocker.py +57 -10
- airbyte_cdk/test/mock_http/request.py +19 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +32 -16
- airbyte_cdk/test/state_builder.py +18 -10
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +2 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +20 -11
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +198 -28
- airbyte_cdk/utils/slice_hasher.py +30 -0
- airbyte_cdk/utils/spec_schema_transformations.py +6 -3
- airbyte_cdk/utils/stream_status_utils.py +8 -1
- airbyte_cdk/utils/traced_exception.py +61 -21
- airbyte_cdk-6.13.1.dev4106.dist-info/METADATA +109 -0
- airbyte_cdk-6.13.1.dev4106.dist-info/RECORD +349 -0
- {airbyte_cdk-0.72.0.dist-info → airbyte_cdk-6.13.1.dev4106.dist-info}/WHEEL +1 -2
- airbyte_cdk-6.13.1.dev4106.dist-info/entry_points.txt +3 -0
- airbyte_cdk/sources/declarative/create_partial.py +0 -92
- airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
- airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
- airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
- airbyte_cdk/sources/deprecated/base_source.py +0 -94
- airbyte_cdk/sources/deprecated/client.py +0 -99
- airbyte_cdk/sources/singer/__init__.py +0 -8
- airbyte_cdk/sources/singer/singer_helpers.py +0 -304
- airbyte_cdk/sources/singer/source.py +0 -186
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
- airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
- airbyte_cdk/sources/streams/http/auth/core.py +0 -29
- airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
- airbyte_cdk/sources/streams/http/auth/token.py +0 -47
- airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
- airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
- airbyte_cdk/sources/utils/schema_models.py +0 -84
- airbyte_cdk-0.72.0.dist-info/METADATA +0 -243
- airbyte_cdk-0.72.0.dist-info/RECORD +0 -466
- airbyte_cdk-0.72.0.dist-info/top_level.txt +0 -3
- source_declarative_manifest/main.py +0 -29
- unit_tests/connector_builder/__init__.py +0 -3
- unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
- unit_tests/connector_builder/test_message_grouper.py +0 -713
- unit_tests/connector_builder/utils.py +0 -27
- unit_tests/destinations/test_destination.py +0 -243
- unit_tests/singer/test_singer_helpers.py +0 -56
- unit_tests/singer/test_singer_source.py +0 -112
- unit_tests/sources/__init__.py +0 -0
- unit_tests/sources/concurrent_source/__init__.py +0 -3
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
- unit_tests/sources/declarative/__init__.py +0 -3
- unit_tests/sources/declarative/auth/__init__.py +0 -3
- unit_tests/sources/declarative/auth/test_oauth.py +0 -331
- unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
- unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
- unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
- unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
- unit_tests/sources/declarative/checks/__init__.py +0 -3
- unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
- unit_tests/sources/declarative/decoders/__init__.py +0 -0
- unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
- unit_tests/sources/declarative/external_component.py +0 -13
- unit_tests/sources/declarative/extractors/__init__.py +0 -3
- unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
- unit_tests/sources/declarative/incremental/__init__.py +0 -0
- unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
- unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
- unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
- unit_tests/sources/declarative/interpolation/__init__.py +0 -3
- unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
- unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
- unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
- unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
- unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
- unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
- unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
- unit_tests/sources/declarative/parsers/__init__.py +0 -3
- unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
- unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1841
- unit_tests/sources/declarative/parsers/testing_components.py +0 -36
- unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
- unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
- unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
- unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
- unit_tests/sources/declarative/requesters/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
- unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
- unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
- unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
- unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
- unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
- unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
- unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
- unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
- unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
- unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
- unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
- unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
- unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
- unit_tests/sources/declarative/retrievers/__init__.py +0 -3
- unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
- unit_tests/sources/declarative/schema/__init__.py +0 -6
- unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
- unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
- unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
- unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
- unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
- unit_tests/sources/declarative/states/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
- unit_tests/sources/declarative/test_create_partial.py +0 -83
- unit_tests/sources/declarative/test_declarative_stream.py +0 -103
- unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
- unit_tests/sources/declarative/test_types.py +0 -39
- unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
- unit_tests/sources/file_based/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
- unit_tests/sources/file_based/config/__init__.py +0 -0
- unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
- unit_tests/sources/file_based/config/test_csv_format.py +0 -34
- unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
- unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
- unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
- unit_tests/sources/file_based/file_types/__init__.py +0 -0
- unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
- unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
- unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
- unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
- unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
- unit_tests/sources/file_based/helpers.py +0 -70
- unit_tests/sources/file_based/in_memory_files_source.py +0 -211
- unit_tests/sources/file_based/scenarios/__init__.py +0 -0
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
- unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
- unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
- unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
- unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
- unit_tests/sources/file_based/stream/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
- unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
- unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
- unit_tests/sources/file_based/test_scenarios.py +0 -253
- unit_tests/sources/file_based/test_schema_helpers.py +0 -346
- unit_tests/sources/fixtures/__init__.py +0 -3
- unit_tests/sources/fixtures/source_test_fixture.py +0 -153
- unit_tests/sources/message/__init__.py +0 -0
- unit_tests/sources/message/test_repository.py +0 -153
- unit_tests/sources/streams/__init__.py +0 -0
- unit_tests/sources/streams/concurrent/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
- unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
- unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
- unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
- unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
- unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
- unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
- unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
- unit_tests/sources/streams/http/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/test_auth.py +0 -173
- unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
- unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
- unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
- unit_tests/sources/streams/http/test_http.py +0 -635
- unit_tests/sources/streams/test_availability_strategy.py +0 -70
- unit_tests/sources/streams/test_call_rate.py +0 -300
- unit_tests/sources/streams/test_stream_read.py +0 -405
- unit_tests/sources/streams/test_streams_core.py +0 -184
- unit_tests/sources/test_abstract_source.py +0 -1442
- unit_tests/sources/test_concurrent_source.py +0 -112
- unit_tests/sources/test_config.py +0 -92
- unit_tests/sources/test_connector_state_manager.py +0 -482
- unit_tests/sources/test_http_logger.py +0 -252
- unit_tests/sources/test_integration_source.py +0 -86
- unit_tests/sources/test_source.py +0 -684
- unit_tests/sources/test_source_read.py +0 -460
- unit_tests/test/__init__.py +0 -0
- unit_tests/test/mock_http/__init__.py +0 -0
- unit_tests/test/mock_http/test_matcher.py +0 -53
- unit_tests/test/mock_http/test_mocker.py +0 -214
- unit_tests/test/mock_http/test_request.py +0 -117
- unit_tests/test/mock_http/test_response_builder.py +0 -177
- unit_tests/test/test_entrypoint_wrapper.py +0 -240
- unit_tests/utils/__init__.py +0 -0
- unit_tests/utils/test_datetime_format_inferrer.py +0 -60
- unit_tests/utils/test_mapping_helpers.py +0 -54
- unit_tests/utils/test_message_utils.py +0 -91
- unit_tests/utils/test_rate_limiting.py +0 -26
- unit_tests/utils/test_schema_inferrer.py +0 -202
- unit_tests/utils/test_secret_utils.py +0 -135
- unit_tests/utils/test_stream_status_utils.py +0 -61
- unit_tests/utils/test_traced_exception.py +0 -107
- /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
- {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
- {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
- {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
- {airbyte_cdk-0.72.0.dist-info → airbyte_cdk-6.13.1.dev4106.dist-info}/LICENSE.txt +0 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from typing import Any, Optional
|
5
|
+
|
6
|
+
from airbyte_cdk.sources.streams.checkpoint import Cursor
|
7
|
+
from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class ResumableFullRefreshCursor(Cursor):
|
12
|
+
"""
|
13
|
+
Cursor that allows for the checkpointing of sync progress according to a synthetic cursor based on the pagination state
|
14
|
+
of the stream. Resumable full refresh syncs are only intended to retain state in between sync attempts of the same job
|
15
|
+
with the platform responsible for removing said state.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def __init__(self) -> None:
|
19
|
+
self._cursor: StreamState = {}
|
20
|
+
|
21
|
+
def get_stream_state(self) -> StreamState:
|
22
|
+
return self._cursor
|
23
|
+
|
24
|
+
def set_initial_state(self, stream_state: StreamState) -> None:
|
25
|
+
self._cursor = stream_state
|
26
|
+
|
27
|
+
def observe(self, stream_slice: StreamSlice, record: Record) -> None:
|
28
|
+
"""
|
29
|
+
Resumable full refresh manages state using a page number so it does not need to update state by observing incoming records.
|
30
|
+
"""
|
31
|
+
pass
|
32
|
+
|
33
|
+
def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
|
34
|
+
self._cursor = stream_slice.cursor_slice
|
35
|
+
|
36
|
+
def should_be_synced(self, record: Record) -> bool:
|
37
|
+
"""
|
38
|
+
Unlike date-based cursors which filter out records outside slice boundaries, resumable full refresh records exist within pages
|
39
|
+
that don't have filterable bounds. We should always return them.
|
40
|
+
"""
|
41
|
+
return True
|
42
|
+
|
43
|
+
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
44
|
+
"""
|
45
|
+
RFR record don't have ordering to be compared between one another.
|
46
|
+
"""
|
47
|
+
return False
|
48
|
+
|
49
|
+
def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
|
50
|
+
# A top-level RFR cursor only manages the state of a single partition
|
51
|
+
return self._cursor
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from typing import Any, Mapping, MutableMapping, Optional
|
5
|
+
|
6
|
+
from airbyte_cdk.models import FailureType
|
7
|
+
from airbyte_cdk.sources.streams.checkpoint import Cursor
|
8
|
+
from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
|
9
|
+
PerPartitionKeySerializer,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
12
|
+
from airbyte_cdk.utils import AirbyteTracedException
|
13
|
+
|
14
|
+
FULL_REFRESH_COMPLETE_STATE: Mapping[str, Any] = {"__ab_full_refresh_sync_complete": True}
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class SubstreamResumableFullRefreshCursor(Cursor):
|
19
|
+
def __init__(self) -> None:
|
20
|
+
self._per_partition_state: MutableMapping[str, StreamState] = {}
|
21
|
+
self._partition_serializer = PerPartitionKeySerializer()
|
22
|
+
|
23
|
+
def get_stream_state(self) -> StreamState:
|
24
|
+
return {"states": list(self._per_partition_state.values())}
|
25
|
+
|
26
|
+
def set_initial_state(self, stream_state: StreamState) -> None:
|
27
|
+
"""
|
28
|
+
Set the initial state for the cursors.
|
29
|
+
|
30
|
+
This method initializes the state for each partition cursor using the provided stream state.
|
31
|
+
If a partition state is provided in the stream state, it will update the corresponding partition cursor with this state.
|
32
|
+
|
33
|
+
To simplify processing and state management, we do not maintain the checkpointed state of the parent partitions.
|
34
|
+
Instead, we are tracking whether a parent has already successfully synced on a prior attempt and skipping over it
|
35
|
+
allowing the sync to continue making progress. And this works for RFR because the platform will dispose of this
|
36
|
+
state on the next sync job.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
|
40
|
+
{
|
41
|
+
"states": [
|
42
|
+
{
|
43
|
+
"partition": {
|
44
|
+
"partition_key": "value_0"
|
45
|
+
},
|
46
|
+
"cursor": {
|
47
|
+
"__ab_full_refresh_sync_complete": True
|
48
|
+
}
|
49
|
+
},
|
50
|
+
{
|
51
|
+
"partition": {
|
52
|
+
"partition_key": "value_1"
|
53
|
+
},
|
54
|
+
"cursor": {},
|
55
|
+
},
|
56
|
+
]
|
57
|
+
}
|
58
|
+
"""
|
59
|
+
if not stream_state:
|
60
|
+
return
|
61
|
+
|
62
|
+
if "states" not in stream_state:
|
63
|
+
raise AirbyteTracedException(
|
64
|
+
internal_message=f"Could not sync parse the following state: {stream_state}",
|
65
|
+
message="The state for is format invalid. Validate that the migration steps included a reset and that it was performed "
|
66
|
+
"properly. Otherwise, please contact Airbyte support.",
|
67
|
+
failure_type=FailureType.config_error,
|
68
|
+
)
|
69
|
+
|
70
|
+
for state in stream_state["states"]:
|
71
|
+
self._per_partition_state[self._to_partition_key(state["partition"])] = state
|
72
|
+
|
73
|
+
def observe(self, stream_slice: StreamSlice, record: Record) -> None:
|
74
|
+
"""
|
75
|
+
Substream resumable full refresh manages state by closing the slice after syncing a parent so observe is not used.
|
76
|
+
"""
|
77
|
+
pass
|
78
|
+
|
79
|
+
def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
|
80
|
+
self._per_partition_state[self._to_partition_key(stream_slice.partition)] = {
|
81
|
+
"partition": stream_slice.partition,
|
82
|
+
"cursor": FULL_REFRESH_COMPLETE_STATE,
|
83
|
+
}
|
84
|
+
|
85
|
+
def should_be_synced(self, record: Record) -> bool:
|
86
|
+
"""
|
87
|
+
Unlike date-based cursors which filter out records outside slice boundaries, resumable full refresh records exist within pages
|
88
|
+
that don't have filterable bounds. We should always return them.
|
89
|
+
"""
|
90
|
+
return True
|
91
|
+
|
92
|
+
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
93
|
+
"""
|
94
|
+
RFR record don't have ordering to be compared between one another.
|
95
|
+
"""
|
96
|
+
return False
|
97
|
+
|
98
|
+
def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
|
99
|
+
if not stream_slice:
|
100
|
+
raise ValueError("A partition needs to be provided in order to extract a state")
|
101
|
+
|
102
|
+
return self._per_partition_state.get(
|
103
|
+
self._to_partition_key(stream_slice.partition), {}
|
104
|
+
).get("cursor")
|
105
|
+
|
106
|
+
def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
|
107
|
+
return self._partition_serializer.to_partition_key(partition)
|
108
|
+
|
109
|
+
def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
|
110
|
+
return self._partition_serializer.to_partition(partition_key)
|
@@ -0,0 +1,7 @@
|
|
1
|
+
## Breaking Changes & Limitations
|
2
|
+
|
3
|
+
- [bigger scope than Concurrent CDK] checkpointing state was acting on the number of records per slice. This has been changed to consider the number of records per syncs
|
4
|
+
- `Source.read_state` and `Source._emit_legacy_state_format` are now classmethods to allow for developers to have access to the state before instantiating the source
|
5
|
+
- send_per_stream_state is always True for Concurrent CDK
|
6
|
+
- Using stream_state during read_records: The concern is that today, stream_instance.get_updated_state is called on every record and read_records on every slice. The implication is that the argument stream_state passed to read_records will have the value after the last stream_instance.get_updated_state of the previous slice. For Concurrent CDK, this is not possible as slices are processed in an unordered way.
|
7
|
+
- Cursor fields can only be data-time formatted as epoch. Eventually, we want to move to ISO 8601 as it provides more flexibility but for the first iteration on Stripe, it was easier to use the same format that was already used
|
@@ -5,14 +5,19 @@
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from typing import Any, Iterable, Mapping, Optional
|
7
7
|
|
8
|
+
from typing_extensions import deprecated
|
9
|
+
|
8
10
|
from airbyte_cdk.models import AirbyteStream
|
11
|
+
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
9
12
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability
|
10
13
|
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
11
14
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
12
|
-
from deprecated.classic import deprecated
|
13
15
|
|
14
16
|
|
15
|
-
@deprecated(
|
17
|
+
@deprecated(
|
18
|
+
"This class is experimental. Use at your own risk.",
|
19
|
+
category=ExperimentalClassWarning,
|
20
|
+
)
|
16
21
|
class AbstractStream(ABC):
|
17
22
|
"""
|
18
23
|
AbstractStream is an experimental interface for streams developed as part of the Concurrent CDK.
|
@@ -8,37 +8,52 @@ import logging
|
|
8
8
|
from functools import lru_cache
|
9
9
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
|
10
10
|
|
11
|
-
from
|
11
|
+
from typing_extensions import deprecated
|
12
|
+
|
13
|
+
from airbyte_cdk.models import (
|
14
|
+
AirbyteLogMessage,
|
15
|
+
AirbyteMessage,
|
16
|
+
AirbyteStream,
|
17
|
+
ConfiguredAirbyteStream,
|
18
|
+
Level,
|
19
|
+
SyncMode,
|
20
|
+
Type,
|
21
|
+
)
|
12
22
|
from airbyte_cdk.sources import AbstractSource, Source
|
13
23
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
14
24
|
from airbyte_cdk.sources.message import MessageRepository
|
25
|
+
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
15
26
|
from airbyte_cdk.sources.streams import Stream
|
16
27
|
from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
|
17
28
|
from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
|
18
29
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
19
30
|
AbstractAvailabilityStrategy,
|
20
|
-
|
21
|
-
StreamAvailable,
|
22
|
-
StreamUnavailable,
|
31
|
+
AlwaysAvailableAvailabilityStrategy,
|
23
32
|
)
|
24
33
|
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
|
25
34
|
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
26
35
|
from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
|
27
|
-
from airbyte_cdk.sources.streams.concurrent.helpers import
|
36
|
+
from airbyte_cdk.sources.streams.concurrent.helpers import (
|
37
|
+
get_cursor_field_from_stream,
|
38
|
+
get_primary_key_from_stream,
|
39
|
+
)
|
28
40
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
29
41
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
30
|
-
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
31
42
|
from airbyte_cdk.sources.streams.core import StreamData
|
43
|
+
from airbyte_cdk.sources.types import Record
|
32
44
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
|
33
45
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
34
|
-
from
|
46
|
+
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
35
47
|
|
36
48
|
"""
|
37
49
|
This module contains adapters to help enabling concurrency on Stream objects without needing to migrate to AbstractStream
|
38
50
|
"""
|
39
51
|
|
40
52
|
|
41
|
-
@deprecated(
|
53
|
+
@deprecated(
|
54
|
+
"This class is experimental. Use at your own risk.",
|
55
|
+
category=ExperimentalClassWarning,
|
56
|
+
)
|
42
57
|
class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
|
43
58
|
"""
|
44
59
|
The StreamFacade is a Stream that wraps an AbstractStream and exposes it as a Stream.
|
@@ -77,15 +92,16 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
|
|
77
92
|
partition_generator=StreamPartitionGenerator(
|
78
93
|
stream,
|
79
94
|
message_repository,
|
80
|
-
SyncMode.full_refresh
|
95
|
+
SyncMode.full_refresh
|
96
|
+
if isinstance(cursor, FinalStateCursor)
|
97
|
+
else SyncMode.incremental,
|
81
98
|
[cursor_field] if cursor_field is not None else None,
|
82
99
|
state,
|
83
|
-
cursor,
|
84
100
|
),
|
85
101
|
name=stream.name,
|
86
102
|
namespace=stream.namespace,
|
87
103
|
json_schema=stream.get_json_schema(),
|
88
|
-
availability_strategy=
|
104
|
+
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
89
105
|
primary_key=pk,
|
90
106
|
cursor_field=cursor_field,
|
91
107
|
logger=logger,
|
@@ -99,14 +115,23 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
|
|
99
115
|
|
100
116
|
@property
|
101
117
|
def state(self) -> MutableMapping[str, Any]:
|
102
|
-
raise NotImplementedError(
|
118
|
+
raise NotImplementedError(
|
119
|
+
"This should not be called as part of the Concurrent CDK code. Please report the problem to Airbyte"
|
120
|
+
)
|
103
121
|
|
104
122
|
@state.setter
|
105
123
|
def state(self, value: Mapping[str, Any]) -> None:
|
106
124
|
if "state" in dir(self._legacy_stream):
|
107
125
|
self._legacy_stream.state = value # type: ignore # validating `state` is attribute of stream using `if` above
|
108
126
|
|
109
|
-
def __init__(
|
127
|
+
def __init__(
|
128
|
+
self,
|
129
|
+
stream: DefaultStream,
|
130
|
+
legacy_stream: Stream,
|
131
|
+
cursor: Cursor,
|
132
|
+
slice_logger: SliceLogger,
|
133
|
+
logger: logging.Logger,
|
134
|
+
):
|
110
135
|
"""
|
111
136
|
:param stream: The underlying AbstractStream
|
112
137
|
"""
|
@@ -143,7 +168,10 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
|
|
143
168
|
# This shouldn't happen if the ConcurrentCursor was used
|
144
169
|
state = "unknown; no state attribute was available on the cursor"
|
145
170
|
yield AirbyteMessage(
|
146
|
-
type=Type.LOG,
|
171
|
+
type=Type.LOG,
|
172
|
+
log=AirbyteLogMessage(
|
173
|
+
level=Level.ERROR, message=f"Cursor State at time of exception: {state}"
|
174
|
+
),
|
147
175
|
)
|
148
176
|
raise exc
|
149
177
|
|
@@ -170,6 +198,10 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
|
|
170
198
|
else:
|
171
199
|
return self._abstract_stream.cursor_field
|
172
200
|
|
201
|
+
@property
|
202
|
+
def cursor(self) -> Optional[Cursor]: # type: ignore[override] # StreamFaced expects to use only airbyte_cdk.sources.streams.concurrent.cursor.Cursor
|
203
|
+
return self._cursor
|
204
|
+
|
173
205
|
@lru_cache(maxsize=None)
|
174
206
|
def get_json_schema(self) -> Mapping[str, Any]:
|
175
207
|
return self._abstract_stream.get_json_schema()
|
@@ -178,7 +210,9 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
|
|
178
210
|
def supports_incremental(self) -> bool:
|
179
211
|
return self._legacy_stream.supports_incremental
|
180
212
|
|
181
|
-
def check_availability(
|
213
|
+
def check_availability(
|
214
|
+
self, logger: logging.Logger, source: Optional["Source"] = None
|
215
|
+
) -> Tuple[bool, Optional[str]]:
|
182
216
|
"""
|
183
217
|
Verifies the stream is available. Delegates to the underlying AbstractStream and ignores the parameters
|
184
218
|
:param logger: (ignored)
|
@@ -198,6 +232,15 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
|
|
198
232
|
return self._abstract_stream
|
199
233
|
|
200
234
|
|
235
|
+
class SliceEncoder(json.JSONEncoder):
|
236
|
+
def default(self, obj: Any) -> Any:
|
237
|
+
if hasattr(obj, "__json_serializable__"):
|
238
|
+
return obj.__json_serializable__()
|
239
|
+
|
240
|
+
# Let the base class default method raise the TypeError
|
241
|
+
return super().default(obj)
|
242
|
+
|
243
|
+
|
201
244
|
class StreamPartition(Partition):
|
202
245
|
"""
|
203
246
|
This class acts as an adapter between the new Partition interface and the Stream's stream_slice interface
|
@@ -216,7 +259,6 @@ class StreamPartition(Partition):
|
|
216
259
|
sync_mode: SyncMode,
|
217
260
|
cursor_field: Optional[List[str]],
|
218
261
|
state: Optional[MutableMapping[str, Any]],
|
219
|
-
cursor: Cursor,
|
220
262
|
):
|
221
263
|
"""
|
222
264
|
:param stream: The stream to delegate to
|
@@ -229,8 +271,7 @@ class StreamPartition(Partition):
|
|
229
271
|
self._sync_mode = sync_mode
|
230
272
|
self._cursor_field = cursor_field
|
231
273
|
self._state = state
|
232
|
-
self.
|
233
|
-
self._is_closed = False
|
274
|
+
self._hash = SliceHasher.hash(self._stream.name, self._slice)
|
234
275
|
|
235
276
|
def read(self) -> Iterable[Record]:
|
236
277
|
"""
|
@@ -253,10 +294,14 @@ class StreamPartition(Partition):
|
|
253
294
|
):
|
254
295
|
if isinstance(record_data, Mapping):
|
255
296
|
data_to_return = dict(record_data)
|
256
|
-
self._stream.transformer.transform(
|
257
|
-
|
258
|
-
|
259
|
-
yield Record(
|
297
|
+
self._stream.transformer.transform(
|
298
|
+
data_to_return, self._stream.get_json_schema()
|
299
|
+
)
|
300
|
+
yield Record(
|
301
|
+
data=data_to_return,
|
302
|
+
stream_name=self.stream_name(),
|
303
|
+
associated_slice=self._slice, # type: ignore [arg-type]
|
304
|
+
)
|
260
305
|
else:
|
261
306
|
self._message_repository.emit_message(record_data)
|
262
307
|
except Exception as e:
|
@@ -270,23 +315,11 @@ class StreamPartition(Partition):
|
|
270
315
|
return self._slice
|
271
316
|
|
272
317
|
def __hash__(self) -> int:
|
273
|
-
|
274
|
-
# Convert the slice to a string so that it can be hashed
|
275
|
-
s = json.dumps(self._slice, sort_keys=True)
|
276
|
-
return hash((self._stream.name, s))
|
277
|
-
else:
|
278
|
-
return hash(self._stream.name)
|
318
|
+
return self._hash
|
279
319
|
|
280
320
|
def stream_name(self) -> str:
|
281
321
|
return self._stream.name
|
282
322
|
|
283
|
-
def close(self) -> None:
|
284
|
-
self._cursor.close_partition(self)
|
285
|
-
self._is_closed = True
|
286
|
-
|
287
|
-
def is_closed(self) -> bool:
|
288
|
-
return self._is_closed
|
289
|
-
|
290
323
|
def __repr__(self) -> str:
|
291
324
|
return f"StreamPartition({self._stream.name}, {self._slice})"
|
292
325
|
|
@@ -306,7 +339,6 @@ class StreamPartitionGenerator(PartitionGenerator):
|
|
306
339
|
sync_mode: SyncMode,
|
307
340
|
cursor_field: Optional[List[str]],
|
308
341
|
state: Optional[MutableMapping[str, Any]],
|
309
|
-
cursor: Cursor,
|
310
342
|
):
|
311
343
|
"""
|
312
344
|
:param stream: The stream to delegate to
|
@@ -317,21 +349,32 @@ class StreamPartitionGenerator(PartitionGenerator):
|
|
317
349
|
self._sync_mode = sync_mode
|
318
350
|
self._cursor_field = cursor_field
|
319
351
|
self._state = state
|
320
|
-
self._cursor = cursor
|
321
352
|
|
322
353
|
def generate(self) -> Iterable[Partition]:
|
323
|
-
for s in self._stream.stream_slices(
|
354
|
+
for s in self._stream.stream_slices(
|
355
|
+
sync_mode=self._sync_mode, cursor_field=self._cursor_field, stream_state=self._state
|
356
|
+
):
|
324
357
|
yield StreamPartition(
|
325
|
-
self._stream,
|
358
|
+
self._stream,
|
359
|
+
copy.deepcopy(s),
|
360
|
+
self.message_repository,
|
361
|
+
self._sync_mode,
|
362
|
+
self._cursor_field,
|
363
|
+
self._state,
|
326
364
|
)
|
327
365
|
|
328
366
|
|
329
|
-
@deprecated(
|
367
|
+
@deprecated(
|
368
|
+
"Availability strategy has been soft deprecated. Do not use. Class is subject to removal",
|
369
|
+
category=ExperimentalClassWarning,
|
370
|
+
)
|
330
371
|
class AvailabilityStrategyFacade(AvailabilityStrategy):
|
331
372
|
def __init__(self, abstract_availability_strategy: AbstractAvailabilityStrategy):
|
332
373
|
self._abstract_availability_strategy = abstract_availability_strategy
|
333
374
|
|
334
|
-
def check_availability(
|
375
|
+
def check_availability(
|
376
|
+
self, stream: Stream, logger: logging.Logger, source: Optional["Source"] = None
|
377
|
+
) -> Tuple[bool, Optional[str]]:
|
335
378
|
"""
|
336
379
|
Checks stream availability.
|
337
380
|
|
@@ -344,37 +387,3 @@ class AvailabilityStrategyFacade(AvailabilityStrategy):
|
|
344
387
|
"""
|
345
388
|
stream_availability = self._abstract_availability_strategy.check_availability(logger)
|
346
389
|
return stream_availability.is_available(), stream_availability.message()
|
347
|
-
|
348
|
-
|
349
|
-
class StreamAvailabilityStrategy(AbstractAvailabilityStrategy):
|
350
|
-
"""
|
351
|
-
This class acts as an adapter between the existing AvailabilityStrategy and the new AbstractAvailabilityStrategy.
|
352
|
-
StreamAvailabilityStrategy is instantiated with a Stream and a Source to allow the existing AvailabilityStrategy to be used with the new AbstractAvailabilityStrategy interface.
|
353
|
-
|
354
|
-
A more convenient implementation would not depend on the docs URL instead of the Source itself, and would support running on an AbstractStream instead of only on a Stream.
|
355
|
-
|
356
|
-
This class can be used to help enable concurrency on existing connectors without having to rewrite everything as AbstractStream and AbstractAvailabilityStrategy.
|
357
|
-
In the long-run, it would be preferable to update the connectors, but we don't have the tooling or need to justify the effort at this time.
|
358
|
-
"""
|
359
|
-
|
360
|
-
def __init__(self, stream: Stream, source: Source):
|
361
|
-
"""
|
362
|
-
:param stream: The stream to delegate to
|
363
|
-
:param source: The source to delegate to
|
364
|
-
"""
|
365
|
-
self._stream = stream
|
366
|
-
self._source = source
|
367
|
-
|
368
|
-
def check_availability(self, logger: logging.Logger) -> StreamAvailability:
|
369
|
-
try:
|
370
|
-
available, message = self._stream.check_availability(logger, self._source)
|
371
|
-
if available:
|
372
|
-
return StreamAvailable()
|
373
|
-
else:
|
374
|
-
return StreamUnavailable(str(message))
|
375
|
-
except Exception as e:
|
376
|
-
display_message = self._stream.get_error_display_message(e)
|
377
|
-
if display_message:
|
378
|
-
raise ExceptionWithDisplayMessage(display_message)
|
379
|
-
else:
|
380
|
-
raise e
|
@@ -6,7 +6,9 @@ import logging
|
|
6
6
|
from abc import ABC, abstractmethod
|
7
7
|
from typing import Optional
|
8
8
|
|
9
|
-
from
|
9
|
+
from typing_extensions import deprecated
|
10
|
+
|
11
|
+
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
10
12
|
|
11
13
|
|
12
14
|
class StreamAvailability(ABC):
|
@@ -46,7 +48,10 @@ class StreamUnavailable(StreamAvailability):
|
|
46
48
|
STREAM_AVAILABLE = StreamAvailable()
|
47
49
|
|
48
50
|
|
49
|
-
@deprecated(
|
51
|
+
@deprecated(
|
52
|
+
"This class is experimental. Use at your own risk.",
|
53
|
+
category=ExperimentalClassWarning,
|
54
|
+
)
|
50
55
|
class AbstractAvailabilityStrategy(ABC):
|
51
56
|
"""
|
52
57
|
AbstractAvailabilityStrategy is an experimental interface developed as part of the Concurrent CDK.
|
@@ -64,3 +69,26 @@ class AbstractAvailabilityStrategy(ABC):
|
|
64
69
|
:param logger: logger object to use
|
65
70
|
:return: A StreamAvailability object describing the stream's availability
|
66
71
|
"""
|
72
|
+
|
73
|
+
|
74
|
+
@deprecated(
|
75
|
+
"This class is experimental. Use at your own risk.",
|
76
|
+
category=ExperimentalClassWarning,
|
77
|
+
)
|
78
|
+
class AlwaysAvailableAvailabilityStrategy(AbstractAvailabilityStrategy):
|
79
|
+
"""
|
80
|
+
An availability strategy that always indicates a stream is available.
|
81
|
+
|
82
|
+
This strategy is used to avoid breaking changes and serves as a soft
|
83
|
+
deprecation of the availability strategy, allowing a smoother transition
|
84
|
+
without disrupting existing functionality.
|
85
|
+
"""
|
86
|
+
|
87
|
+
def check_availability(self, logger: logging.Logger) -> StreamAvailability:
|
88
|
+
"""
|
89
|
+
Checks stream availability.
|
90
|
+
|
91
|
+
:param logger: logger object to use
|
92
|
+
:return: A StreamAvailability object describing the stream's availability
|
93
|
+
"""
|
94
|
+
return StreamAvailable()
|