airbyte-cdk 0.72.0__py3-none-any.whl → 6.17.1.dev0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/__init__.py +355 -6
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +29 -10
- airbyte_cdk/connector.py +24 -24
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
- airbyte_cdk/connector_builder/main.py +45 -13
- airbyte_cdk/connector_builder/message_grouper.py +189 -50
- airbyte_cdk/connector_builder/models.py +3 -2
- airbyte_cdk/destinations/__init__.py +4 -3
- airbyte_cdk/destinations/destination.py +54 -20
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/config.py +40 -17
- airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
- airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
- airbyte_cdk/entrypoint.py +153 -44
- airbyte_cdk/exception_handler.py +21 -3
- airbyte_cdk/logger.py +30 -44
- airbyte_cdk/models/__init__.py +13 -2
- airbyte_cdk/models/airbyte_protocol.py +86 -1
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/__init__.py +5 -1
- airbyte_cdk/sources/abstract_source.py +125 -79
- airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
- airbyte_cdk/sources/config.py +3 -2
- airbyte_cdk/sources/connector_state_manager.py +49 -83
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
- airbyte_cdk/sources/declarative/auth/token.py +28 -10
- airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
- airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1213 -88
- airbyte_cdk/sources/declarative/declarative_source.py +5 -2
- airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
- airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
- airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
- airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +346 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +173 -74
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
- airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
- airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1329 -595
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1763 -226
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
- airbyte_cdk/sources/declarative/spec/spec.py +12 -5
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
- airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
- airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
- airbyte_cdk/sources/declarative/types.py +19 -110
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
- airbyte_cdk/sources/embedded/base_integration.py +16 -5
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +5 -2
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
- airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
- airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
- airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
- airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
- airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +18 -15
- airbyte_cdk/sources/file_based/file_based_source.py +140 -33
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
- airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
- airbyte_cdk/sources/file_based/remote_file.py +1 -1
- airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
- airbyte_cdk/sources/http_logger.py +8 -3
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +42 -38
- airbyte_cdk/sources/streams/__init__.py +2 -2
- airbyte_cdk/sources/streams/availability_strategy.py +54 -3
- airbyte_cdk/sources/streams/call_rate.py +64 -21
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
- airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
- airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
- airbyte_cdk/sources/streams/core.py +412 -87
- airbyte_cdk/sources/streams/http/__init__.py +2 -1
- airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +27 -7
- airbyte_cdk/sources/streams/http/http.py +369 -246
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +154 -0
- airbyte_cdk/sources/utils/record_helper.py +36 -21
- airbyte_cdk/sources/utils/schema_helpers.py +13 -6
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +54 -20
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/catalog_builder.py +70 -18
- airbyte_cdk/test/entrypoint_wrapper.py +117 -42
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/matcher.py +6 -0
- airbyte_cdk/test/mock_http/mocker.py +57 -10
- airbyte_cdk/test/mock_http/request.py +19 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +32 -16
- airbyte_cdk/test/state_builder.py +18 -10
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +2 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +20 -11
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +198 -28
- airbyte_cdk/utils/slice_hasher.py +30 -0
- airbyte_cdk/utils/spec_schema_transformations.py +6 -3
- airbyte_cdk/utils/stream_status_utils.py +8 -1
- airbyte_cdk/utils/traced_exception.py +61 -21
- airbyte_cdk-6.17.1.dev0.dist-info/METADATA +109 -0
- airbyte_cdk-6.17.1.dev0.dist-info/RECORD +350 -0
- {airbyte_cdk-0.72.0.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/WHEEL +1 -2
- airbyte_cdk-6.17.1.dev0.dist-info/entry_points.txt +3 -0
- airbyte_cdk/sources/declarative/create_partial.py +0 -92
- airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
- airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
- airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
- airbyte_cdk/sources/deprecated/base_source.py +0 -94
- airbyte_cdk/sources/deprecated/client.py +0 -99
- airbyte_cdk/sources/singer/__init__.py +0 -8
- airbyte_cdk/sources/singer/singer_helpers.py +0 -304
- airbyte_cdk/sources/singer/source.py +0 -186
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
- airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
- airbyte_cdk/sources/streams/http/auth/core.py +0 -29
- airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
- airbyte_cdk/sources/streams/http/auth/token.py +0 -47
- airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
- airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
- airbyte_cdk/sources/utils/schema_models.py +0 -84
- airbyte_cdk-0.72.0.dist-info/METADATA +0 -243
- airbyte_cdk-0.72.0.dist-info/RECORD +0 -466
- airbyte_cdk-0.72.0.dist-info/top_level.txt +0 -3
- source_declarative_manifest/main.py +0 -29
- unit_tests/connector_builder/__init__.py +0 -3
- unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
- unit_tests/connector_builder/test_message_grouper.py +0 -713
- unit_tests/connector_builder/utils.py +0 -27
- unit_tests/destinations/test_destination.py +0 -243
- unit_tests/singer/test_singer_helpers.py +0 -56
- unit_tests/singer/test_singer_source.py +0 -112
- unit_tests/sources/__init__.py +0 -0
- unit_tests/sources/concurrent_source/__init__.py +0 -3
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
- unit_tests/sources/declarative/__init__.py +0 -3
- unit_tests/sources/declarative/auth/__init__.py +0 -3
- unit_tests/sources/declarative/auth/test_oauth.py +0 -331
- unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
- unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
- unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
- unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
- unit_tests/sources/declarative/checks/__init__.py +0 -3
- unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
- unit_tests/sources/declarative/decoders/__init__.py +0 -0
- unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
- unit_tests/sources/declarative/external_component.py +0 -13
- unit_tests/sources/declarative/extractors/__init__.py +0 -3
- unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
- unit_tests/sources/declarative/incremental/__init__.py +0 -0
- unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
- unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
- unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
- unit_tests/sources/declarative/interpolation/__init__.py +0 -3
- unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
- unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
- unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
- unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
- unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
- unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
- unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
- unit_tests/sources/declarative/parsers/__init__.py +0 -3
- unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
- unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1841
- unit_tests/sources/declarative/parsers/testing_components.py +0 -36
- unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
- unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
- unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
- unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
- unit_tests/sources/declarative/requesters/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
- unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
- unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
- unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
- unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
- unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
- unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
- unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
- unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
- unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
- unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
- unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
- unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
- unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
- unit_tests/sources/declarative/retrievers/__init__.py +0 -3
- unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
- unit_tests/sources/declarative/schema/__init__.py +0 -6
- unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
- unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
- unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
- unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
- unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
- unit_tests/sources/declarative/states/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
- unit_tests/sources/declarative/test_create_partial.py +0 -83
- unit_tests/sources/declarative/test_declarative_stream.py +0 -103
- unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
- unit_tests/sources/declarative/test_types.py +0 -39
- unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
- unit_tests/sources/file_based/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
- unit_tests/sources/file_based/config/__init__.py +0 -0
- unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
- unit_tests/sources/file_based/config/test_csv_format.py +0 -34
- unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
- unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
- unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
- unit_tests/sources/file_based/file_types/__init__.py +0 -0
- unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
- unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
- unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
- unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
- unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
- unit_tests/sources/file_based/helpers.py +0 -70
- unit_tests/sources/file_based/in_memory_files_source.py +0 -211
- unit_tests/sources/file_based/scenarios/__init__.py +0 -0
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
- unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
- unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
- unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
- unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
- unit_tests/sources/file_based/stream/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
- unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
- unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
- unit_tests/sources/file_based/test_scenarios.py +0 -253
- unit_tests/sources/file_based/test_schema_helpers.py +0 -346
- unit_tests/sources/fixtures/__init__.py +0 -3
- unit_tests/sources/fixtures/source_test_fixture.py +0 -153
- unit_tests/sources/message/__init__.py +0 -0
- unit_tests/sources/message/test_repository.py +0 -153
- unit_tests/sources/streams/__init__.py +0 -0
- unit_tests/sources/streams/concurrent/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
- unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
- unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
- unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
- unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
- unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
- unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
- unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
- unit_tests/sources/streams/http/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/test_auth.py +0 -173
- unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
- unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
- unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
- unit_tests/sources/streams/http/test_http.py +0 -635
- unit_tests/sources/streams/test_availability_strategy.py +0 -70
- unit_tests/sources/streams/test_call_rate.py +0 -300
- unit_tests/sources/streams/test_stream_read.py +0 -405
- unit_tests/sources/streams/test_streams_core.py +0 -184
- unit_tests/sources/test_abstract_source.py +0 -1442
- unit_tests/sources/test_concurrent_source.py +0 -112
- unit_tests/sources/test_config.py +0 -92
- unit_tests/sources/test_connector_state_manager.py +0 -482
- unit_tests/sources/test_http_logger.py +0 -252
- unit_tests/sources/test_integration_source.py +0 -86
- unit_tests/sources/test_source.py +0 -684
- unit_tests/sources/test_source_read.py +0 -460
- unit_tests/test/__init__.py +0 -0
- unit_tests/test/mock_http/__init__.py +0 -0
- unit_tests/test/mock_http/test_matcher.py +0 -53
- unit_tests/test/mock_http/test_mocker.py +0 -214
- unit_tests/test/mock_http/test_request.py +0 -117
- unit_tests/test/mock_http/test_response_builder.py +0 -177
- unit_tests/test/test_entrypoint_wrapper.py +0 -240
- unit_tests/utils/__init__.py +0 -0
- unit_tests/utils/test_datetime_format_inferrer.py +0 -60
- unit_tests/utils/test_mapping_helpers.py +0 -54
- unit_tests/utils/test_message_utils.py +0 -91
- unit_tests/utils/test_rate_limiting.py +0 -26
- unit_tests/utils/test_schema_inferrer.py +0 -202
- unit_tests/utils/test_secret_utils.py +0 -135
- unit_tests/utils/test_stream_status_utils.py +0 -61
- unit_tests/utils/test_traced_exception.py +0 -107
- /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
- {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
- {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
- {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
- {airbyte_cdk-0.72.0.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/LICENSE.txt +0 -0
@@ -1,28 +1,42 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
-
|
5
|
-
|
4
|
+
import copy
|
6
5
|
import inspect
|
6
|
+
import itertools
|
7
7
|
import logging
|
8
|
-
import typing
|
9
8
|
from abc import ABC, abstractmethod
|
10
|
-
from
|
11
|
-
from
|
9
|
+
from dataclasses import dataclass
|
10
|
+
from functools import cached_property, lru_cache
|
11
|
+
from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Union
|
12
|
+
|
13
|
+
from typing_extensions import deprecated
|
12
14
|
|
13
15
|
import airbyte_cdk.sources.utils.casing as casing
|
14
|
-
from airbyte_cdk.models import
|
16
|
+
from airbyte_cdk.models import (
|
17
|
+
AirbyteMessage,
|
18
|
+
AirbyteStream,
|
19
|
+
ConfiguredAirbyteStream,
|
20
|
+
DestinationSyncMode,
|
21
|
+
SyncMode,
|
22
|
+
)
|
15
23
|
from airbyte_cdk.models import Type as MessageType
|
24
|
+
from airbyte_cdk.sources.streams.checkpoint import (
|
25
|
+
CheckpointMode,
|
26
|
+
CheckpointReader,
|
27
|
+
Cursor,
|
28
|
+
CursorBasedCheckpointReader,
|
29
|
+
FullRefreshCheckpointReader,
|
30
|
+
IncrementalCheckpointReader,
|
31
|
+
LegacyCursorBasedCheckpointReader,
|
32
|
+
ResumableFullRefreshCheckpointReader,
|
33
|
+
)
|
34
|
+
from airbyte_cdk.sources.types import StreamSlice
|
16
35
|
|
17
36
|
# list of all possible HTTP methods which can be used for sending of request bodies
|
18
37
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, ResourceSchemaLoader
|
19
|
-
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
38
|
+
from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
|
20
39
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
21
|
-
from deprecated.classic import deprecated
|
22
|
-
|
23
|
-
if typing.TYPE_CHECKING:
|
24
|
-
from airbyte_cdk.sources import Source
|
25
|
-
from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
|
26
40
|
|
27
41
|
# A stream's read method can return one of the following types:
|
28
42
|
# Mapping[str, Any]: The content of an AirbyteRecordMessage
|
@@ -31,9 +45,7 @@ StreamData = Union[Mapping[str, Any], AirbyteMessage]
|
|
31
45
|
|
32
46
|
JsonSchema = Mapping[str, Any]
|
33
47
|
|
34
|
-
|
35
|
-
# value is used to indicate that stream should not load the incoming state value
|
36
|
-
FULL_REFRESH_SENTINEL_STATE_KEY = "__ab_full_refresh_state_message"
|
48
|
+
NO_CURSOR_STATE_KEY = "__ab_no_cursor_state_message"
|
37
49
|
|
38
50
|
|
39
51
|
def package_name_from_class(cls: object) -> str:
|
@@ -45,10 +57,10 @@ def package_name_from_class(cls: object) -> str:
|
|
45
57
|
raise ValueError(f"Could not find package name for class {cls}")
|
46
58
|
|
47
59
|
|
48
|
-
class
|
49
|
-
"""Mixin
|
60
|
+
class CheckpointMixin(ABC):
|
61
|
+
"""Mixin for a stream that implements reading and writing the internal state used to checkpoint sync progress to the platform
|
50
62
|
|
51
|
-
class
|
63
|
+
class CheckpointedStream(Stream, CheckpointMixin):
|
52
64
|
@property
|
53
65
|
def state(self):
|
54
66
|
return self._state
|
@@ -79,11 +91,38 @@ class IncrementalMixin(ABC):
|
|
79
91
|
"""State setter, accept state serialized by state getter."""
|
80
92
|
|
81
93
|
|
94
|
+
@deprecated(
|
95
|
+
"Deprecated as of CDK version 0.87.0. "
|
96
|
+
"Deprecated in favor of the `CheckpointMixin` which offers similar functionality."
|
97
|
+
)
|
98
|
+
class IncrementalMixin(CheckpointMixin, ABC):
|
99
|
+
"""Mixin to make stream incremental.
|
100
|
+
|
101
|
+
class IncrementalStream(Stream, IncrementalMixin):
|
102
|
+
@property
|
103
|
+
def state(self):
|
104
|
+
return self._state
|
105
|
+
|
106
|
+
@state.setter
|
107
|
+
def state(self, value):
|
108
|
+
self._state[self.cursor_field] = value[self.cursor_field]
|
109
|
+
"""
|
110
|
+
|
111
|
+
|
112
|
+
@dataclass
|
113
|
+
class StreamClassification:
|
114
|
+
is_legacy_format: bool
|
115
|
+
has_multiple_slices: bool
|
116
|
+
|
117
|
+
|
82
118
|
class Stream(ABC):
|
83
119
|
"""
|
84
120
|
Base abstract class for an Airbyte Stream. Makes no assumption of the Stream's underlying transport protocol.
|
85
121
|
"""
|
86
122
|
|
123
|
+
_configured_json_schema: Optional[Dict[str, Any]] = None
|
124
|
+
_exit_on_rate_limit: bool = False
|
125
|
+
|
87
126
|
# Use self.logger in subclasses to log any messages
|
88
127
|
@property
|
89
128
|
def logger(self) -> logging.Logger:
|
@@ -92,7 +131,11 @@ class Stream(ABC):
|
|
92
131
|
# TypeTransformer object to perform output data transformation
|
93
132
|
transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform)
|
94
133
|
|
95
|
-
|
134
|
+
cursor: Optional[Cursor] = None
|
135
|
+
|
136
|
+
has_multiple_slices = False
|
137
|
+
|
138
|
+
@cached_property
|
96
139
|
def name(self) -> str:
|
97
140
|
"""
|
98
141
|
:return: Stream name. By default this is the implementing class name, but it can be overridden as needed.
|
@@ -122,62 +165,121 @@ class Stream(ABC):
|
|
122
165
|
) -> Iterable[StreamData]:
|
123
166
|
sync_mode = configured_stream.sync_mode
|
124
167
|
cursor_field = configured_stream.cursor_field
|
168
|
+
self.configured_json_schema = configured_stream.stream.json_schema
|
125
169
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
170
|
+
# WARNING: When performing a read() that uses incoming stream state, we MUST use the self.state that is defined as
|
171
|
+
# opposed to the incoming stream_state value. Because some connectors like ones using the file-based CDK modify
|
172
|
+
# state before setting the value on the Stream attribute, the most up-to-date state is derived from Stream.state
|
173
|
+
# instead of the stream_state parameter. This does not apply to legacy connectors using get_updated_state().
|
174
|
+
try:
|
175
|
+
stream_state = self.state # type: ignore # we know the field might not exist...
|
176
|
+
except AttributeError:
|
177
|
+
pass
|
178
|
+
|
179
|
+
should_checkpoint = bool(state_manager)
|
180
|
+
checkpoint_reader = self._get_checkpoint_reader(
|
181
|
+
logger=logger, cursor_field=cursor_field, sync_mode=sync_mode, stream_state=stream_state
|
130
182
|
)
|
131
|
-
logger.debug(f"Processing stream slices for {self.name} (sync_mode: {sync_mode.name})", extra={"stream_slices": slices})
|
132
183
|
|
133
|
-
|
184
|
+
next_slice = checkpoint_reader.next()
|
134
185
|
record_counter = 0
|
135
|
-
|
136
|
-
|
186
|
+
stream_state_tracker = copy.deepcopy(stream_state)
|
187
|
+
while next_slice is not None:
|
137
188
|
if slice_logger.should_log_slice_message(logger):
|
138
|
-
yield slice_logger.create_slice_log_message(
|
189
|
+
yield slice_logger.create_slice_log_message(next_slice)
|
139
190
|
records = self.read_records(
|
140
191
|
sync_mode=sync_mode, # todo: change this interface to no longer rely on sync_mode for behavior
|
141
|
-
stream_slice=
|
192
|
+
stream_slice=next_slice,
|
142
193
|
stream_state=stream_state,
|
143
194
|
cursor_field=cursor_field or None,
|
144
195
|
)
|
145
196
|
for record_data_or_message in records:
|
146
197
|
yield record_data_or_message
|
147
198
|
if isinstance(record_data_or_message, Mapping) or (
|
148
|
-
hasattr(record_data_or_message, "type")
|
199
|
+
hasattr(record_data_or_message, "type")
|
200
|
+
and record_data_or_message.type == MessageType.RECORD
|
149
201
|
):
|
150
|
-
record_data =
|
151
|
-
|
202
|
+
record_data = (
|
203
|
+
record_data_or_message
|
204
|
+
if isinstance(record_data_or_message, Mapping)
|
205
|
+
else record_data_or_message.record
|
206
|
+
)
|
207
|
+
|
208
|
+
# Thanks I hate it. RFR fundamentally doesn't fit with the concept of the legacy Stream.get_updated_state()
|
209
|
+
# method because RFR streams rely on pagination as a cursor. Stream.get_updated_state() was designed to make
|
210
|
+
# the CDK manage state using specifically the last seen record. don't @ brian.lai
|
211
|
+
#
|
212
|
+
# Also, because the legacy incremental state case decouples observing incoming records from emitting state, it
|
213
|
+
# requires that we separate CheckpointReader.observe() and CheckpointReader.get_checkpoint() which could
|
214
|
+
# otherwise be combined.
|
215
|
+
if self.cursor_field:
|
216
|
+
# Some connectors have streams that implement get_updated_state(), but do not define a cursor_field. This
|
217
|
+
# should be fixed on the stream implementation, but we should also protect against this in the CDK as well
|
218
|
+
stream_state_tracker = self.get_updated_state(
|
219
|
+
stream_state_tracker,
|
220
|
+
record_data, # type: ignore [arg-type]
|
221
|
+
)
|
222
|
+
self._observe_state(checkpoint_reader, stream_state_tracker)
|
152
223
|
record_counter += 1
|
153
224
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
225
|
+
checkpoint_interval = self.state_checkpoint_interval
|
226
|
+
checkpoint = checkpoint_reader.get_checkpoint()
|
227
|
+
if (
|
228
|
+
should_checkpoint
|
229
|
+
and checkpoint_interval
|
230
|
+
and record_counter % checkpoint_interval == 0
|
231
|
+
and checkpoint is not None
|
232
|
+
):
|
233
|
+
airbyte_state_message = self._checkpoint_state(
|
234
|
+
checkpoint, state_manager=state_manager
|
235
|
+
)
|
236
|
+
yield airbyte_state_message
|
160
237
|
|
161
238
|
if internal_config.is_limit_reached(record_counter):
|
162
239
|
break
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
240
|
+
self._observe_state(checkpoint_reader)
|
241
|
+
checkpoint_state = checkpoint_reader.get_checkpoint()
|
242
|
+
if should_checkpoint and checkpoint_state is not None:
|
243
|
+
airbyte_state_message = self._checkpoint_state(
|
244
|
+
checkpoint_state, state_manager=state_manager
|
245
|
+
)
|
169
246
|
yield airbyte_state_message
|
170
247
|
|
171
|
-
|
172
|
-
if sync_mode == SyncMode.full_refresh:
|
173
|
-
# We use a dummy state if there is no suitable value provided by full_refresh streams that do not have a valid cursor.
|
174
|
-
# Incremental streams running full_refresh mode emit a meaningful state
|
175
|
-
stream_state = stream_state or {FULL_REFRESH_SENTINEL_STATE_KEY: True}
|
248
|
+
next_slice = checkpoint_reader.next()
|
176
249
|
|
177
|
-
|
178
|
-
|
250
|
+
checkpoint = checkpoint_reader.get_checkpoint()
|
251
|
+
if should_checkpoint and checkpoint is not None:
|
252
|
+
airbyte_state_message = self._checkpoint_state(checkpoint, state_manager=state_manager)
|
179
253
|
yield airbyte_state_message
|
180
254
|
|
255
|
+
def read_only_records(self, state: Optional[Mapping[str, Any]] = None) -> Iterable[StreamData]:
|
256
|
+
"""
|
257
|
+
Helper method that performs a read on a stream with an optional state and emits records. If the parent stream supports
|
258
|
+
incremental, this operation does not update the stream's internal state (if it uses the modern state setter/getter)
|
259
|
+
or emit state messages.
|
260
|
+
"""
|
261
|
+
|
262
|
+
configured_stream = ConfiguredAirbyteStream(
|
263
|
+
stream=AirbyteStream(
|
264
|
+
name=self.name,
|
265
|
+
json_schema={},
|
266
|
+
supported_sync_modes=[SyncMode.full_refresh, SyncMode.incremental],
|
267
|
+
),
|
268
|
+
sync_mode=SyncMode.incremental if state else SyncMode.full_refresh,
|
269
|
+
destination_sync_mode=DestinationSyncMode.append,
|
270
|
+
)
|
271
|
+
|
272
|
+
yield from self.read(
|
273
|
+
configured_stream=configured_stream,
|
274
|
+
logger=self.logger,
|
275
|
+
slice_logger=DebugSliceLogger(),
|
276
|
+
stream_state=dict(state)
|
277
|
+
if state
|
278
|
+
else {}, # read() expects MutableMapping instead of Mapping which is used more often
|
279
|
+
state_manager=None,
|
280
|
+
internal_config=InternalConfig(), # type: ignore [call-arg]
|
281
|
+
)
|
282
|
+
|
181
283
|
@abstractmethod
|
182
284
|
def read_records(
|
183
285
|
self,
|
@@ -202,14 +304,20 @@ class Stream(ABC):
|
|
202
304
|
return ResourceSchemaLoader(package_name_from_class(self.__class__)).get_schema(self.name)
|
203
305
|
|
204
306
|
def as_airbyte_stream(self) -> AirbyteStream:
|
205
|
-
stream = AirbyteStream(
|
307
|
+
stream = AirbyteStream(
|
308
|
+
name=self.name,
|
309
|
+
json_schema=dict(self.get_json_schema()),
|
310
|
+
supported_sync_modes=[SyncMode.full_refresh],
|
311
|
+
is_resumable=self.is_resumable,
|
312
|
+
)
|
206
313
|
|
207
314
|
if self.namespace:
|
208
315
|
stream.namespace = self.namespace
|
209
316
|
|
317
|
+
# If we can offer incremental we always should. RFR is always less reliable than incremental which uses a real cursor value
|
210
318
|
if self.supports_incremental:
|
211
319
|
stream.source_defined_cursor = self.source_defined_cursor
|
212
|
-
stream.supported_sync_modes.append(SyncMode.incremental)
|
320
|
+
stream.supported_sync_modes.append(SyncMode.incremental)
|
213
321
|
stream.default_cursor_field = self._wrapped_cursor_field()
|
214
322
|
|
215
323
|
keys = Stream._wrapped_primary_key(self.primary_key)
|
@@ -225,6 +333,29 @@ class Stream(ABC):
|
|
225
333
|
"""
|
226
334
|
return len(self._wrapped_cursor_field()) > 0
|
227
335
|
|
336
|
+
@property
|
337
|
+
def is_resumable(self) -> bool:
|
338
|
+
"""
|
339
|
+
:return: True if this stream allows the checkpointing of sync progress and can resume from it on subsequent attempts.
|
340
|
+
This differs from supports_incremental because certain kinds of streams like those supporting resumable full refresh
|
341
|
+
can checkpoint progress in between attempts for improved fault tolerance. However, they will start from the beginning
|
342
|
+
on the next sync job.
|
343
|
+
"""
|
344
|
+
if self.supports_incremental:
|
345
|
+
return True
|
346
|
+
if self.has_multiple_slices:
|
347
|
+
# We temporarily gate substream to not support RFR because puts a pretty high burden on connector developers
|
348
|
+
# to structure stream state in a very specific way. We also can't check for issubclass(HttpSubStream) because
|
349
|
+
# not all substreams implement the interface and it would be a circular dependency so we use parent as a surrogate
|
350
|
+
return False
|
351
|
+
elif hasattr(type(self), "state") and getattr(type(self), "state").fset is not None:
|
352
|
+
# Modern case where a stream manages state using getter/setter
|
353
|
+
return True
|
354
|
+
else:
|
355
|
+
# Legacy case where the CDK manages state via the get_updated_state() method. This is determined by checking if
|
356
|
+
# the stream's get_updated_state() differs from the Stream class and therefore has been overridden
|
357
|
+
return type(self).get_updated_state != Stream.get_updated_state
|
358
|
+
|
228
359
|
def _wrapped_cursor_field(self) -> List[str]:
|
229
360
|
return [self.cursor_field] if isinstance(self.cursor_field, str) else self.cursor_field
|
230
361
|
|
@@ -251,27 +382,15 @@ class Stream(ABC):
|
|
251
382
|
"""
|
252
383
|
return True
|
253
384
|
|
254
|
-
def check_availability(self, logger: logging.Logger, source: Optional["Source"] = None) -> Tuple[bool, Optional[str]]:
|
255
|
-
"""
|
256
|
-
Checks whether this stream is available.
|
257
|
-
|
258
|
-
:param logger: source logger
|
259
|
-
:param source: (optional) source
|
260
|
-
:return: A tuple of (boolean, str). If boolean is true, then this stream
|
261
|
-
is available, and no str is required. Otherwise, this stream is unavailable
|
262
|
-
for some reason and the str should describe what went wrong and how to
|
263
|
-
resolve the unavailability, if possible.
|
264
|
-
"""
|
265
|
-
if self.availability_strategy:
|
266
|
-
return self.availability_strategy.check_availability(self, logger, source)
|
267
|
-
return True, None
|
268
|
-
|
269
385
|
@property
|
270
|
-
def
|
271
|
-
"""
|
272
|
-
|
273
|
-
|
274
|
-
|
386
|
+
def exit_on_rate_limit(self) -> bool:
|
387
|
+
"""Exit on rate limit getter, should return bool value. False if the stream will retry endlessly when rate limited."""
|
388
|
+
return self._exit_on_rate_limit
|
389
|
+
|
390
|
+
@exit_on_rate_limit.setter
|
391
|
+
def exit_on_rate_limit(self, value: bool) -> None:
|
392
|
+
"""Exit on rate limit setter, accept bool value."""
|
393
|
+
self._exit_on_rate_limit = value
|
275
394
|
|
276
395
|
@property
|
277
396
|
@abstractmethod
|
@@ -282,7 +401,11 @@ class Stream(ABC):
|
|
282
401
|
"""
|
283
402
|
|
284
403
|
def stream_slices(
|
285
|
-
self,
|
404
|
+
self,
|
405
|
+
*,
|
406
|
+
sync_mode: SyncMode,
|
407
|
+
cursor_field: Optional[List[str]] = None,
|
408
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
286
409
|
) -> Iterable[Optional[Mapping[str, Any]]]:
|
287
410
|
"""
|
288
411
|
Override to define the slices for this stream. See the stream slicing section of the docs for more information.
|
@@ -292,7 +415,7 @@ class Stream(ABC):
|
|
292
415
|
:param stream_state:
|
293
416
|
:return:
|
294
417
|
"""
|
295
|
-
|
418
|
+
yield StreamSlice(partition={}, cursor_slice={})
|
296
419
|
|
297
420
|
@property
|
298
421
|
def state_checkpoint_interval(self) -> Optional[int]:
|
@@ -308,11 +431,18 @@ class Stream(ABC):
|
|
308
431
|
"""
|
309
432
|
return None
|
310
433
|
|
311
|
-
|
434
|
+
# Commented-out to avoid any runtime penalty, since this is used in a hot per-record codepath.
|
435
|
+
# To be evaluated for re-introduction here: https://github.com/airbytehq/airbyte-python-cdk/issues/116
|
436
|
+
# @deprecated(
|
437
|
+
# "Deprecated method `get_updated_state` as of CDK version 0.1.49. "
|
438
|
+
# "Please use explicit state property instead, see `IncrementalMixin` docs."
|
439
|
+
# )
|
312
440
|
def get_updated_state(
|
313
441
|
self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]
|
314
442
|
) -> MutableMapping[str, Any]:
|
315
|
-
"""
|
443
|
+
"""DEPRECATED. Please use explicit state property instead, see `IncrementalMixin` docs.
|
444
|
+
|
445
|
+
Override to extract state from the latest record. Needed to implement incremental sync.
|
316
446
|
|
317
447
|
Inspects the latest record extracted from the data source and the current state object and return an updated state object.
|
318
448
|
|
@@ -325,6 +455,138 @@ class Stream(ABC):
|
|
325
455
|
"""
|
326
456
|
return {}
|
327
457
|
|
458
|
+
def get_cursor(self) -> Optional[Cursor]:
|
459
|
+
"""
|
460
|
+
A Cursor is an interface that a stream can implement to manage how its internal state is read and updated while
|
461
|
+
reading records. Historically, Python connectors had no concept of a cursor to manage state. Python streams need
|
462
|
+
to define a cursor implementation and override this method to manage state through a Cursor.
|
463
|
+
"""
|
464
|
+
return self.cursor
|
465
|
+
|
466
|
+
def _get_checkpoint_reader(
|
467
|
+
self,
|
468
|
+
logger: logging.Logger,
|
469
|
+
cursor_field: Optional[List[str]],
|
470
|
+
sync_mode: SyncMode,
|
471
|
+
stream_state: MutableMapping[str, Any],
|
472
|
+
) -> CheckpointReader:
|
473
|
+
mappings_or_slices = self.stream_slices(
|
474
|
+
cursor_field=cursor_field,
|
475
|
+
sync_mode=sync_mode, # todo: change this interface to no longer rely on sync_mode for behavior
|
476
|
+
stream_state=stream_state,
|
477
|
+
)
|
478
|
+
|
479
|
+
# Because of poor foresight, we wrote the default Stream.stream_slices() method to return [None] which is confusing and
|
480
|
+
# has now normalized this behavior for connector developers. Now some connectors return [None]. This is objectively
|
481
|
+
# misleading and a more ideal interface is [{}] to indicate we still want to iterate over one slice, but with no
|
482
|
+
# specific slice values. None is bad, and now I feel bad that I have to write this hack.
|
483
|
+
if mappings_or_slices == [None]:
|
484
|
+
mappings_or_slices = [{}]
|
485
|
+
|
486
|
+
slices_iterable_copy, iterable_for_detecting_format = itertools.tee(mappings_or_slices, 2)
|
487
|
+
stream_classification = self._classify_stream(
|
488
|
+
mappings_or_slices=iterable_for_detecting_format
|
489
|
+
)
|
490
|
+
|
491
|
+
# Streams that override has_multiple_slices are explicitly indicating that they will iterate over
|
492
|
+
# multiple partitions. Inspecting slices to automatically apply the correct cursor is only needed as
|
493
|
+
# a backup. So if this value was already assigned to True by the stream, we don't need to reassign it
|
494
|
+
self.has_multiple_slices = (
|
495
|
+
self.has_multiple_slices or stream_classification.has_multiple_slices
|
496
|
+
)
|
497
|
+
|
498
|
+
cursor = self.get_cursor()
|
499
|
+
if cursor:
|
500
|
+
cursor.set_initial_state(stream_state=stream_state)
|
501
|
+
|
502
|
+
checkpoint_mode = self._checkpoint_mode
|
503
|
+
|
504
|
+
if cursor and stream_classification.is_legacy_format:
|
505
|
+
return LegacyCursorBasedCheckpointReader(
|
506
|
+
stream_slices=slices_iterable_copy, cursor=cursor, read_state_from_cursor=True
|
507
|
+
)
|
508
|
+
elif cursor:
|
509
|
+
return CursorBasedCheckpointReader(
|
510
|
+
stream_slices=slices_iterable_copy,
|
511
|
+
cursor=cursor,
|
512
|
+
read_state_from_cursor=checkpoint_mode == CheckpointMode.RESUMABLE_FULL_REFRESH,
|
513
|
+
)
|
514
|
+
elif checkpoint_mode == CheckpointMode.RESUMABLE_FULL_REFRESH:
|
515
|
+
# Resumable full refresh readers rely on the stream state dynamically being updated during pagination and does
|
516
|
+
# not iterate over a static set of slices.
|
517
|
+
return ResumableFullRefreshCheckpointReader(stream_state=stream_state)
|
518
|
+
elif checkpoint_mode == CheckpointMode.INCREMENTAL:
|
519
|
+
return IncrementalCheckpointReader(
|
520
|
+
stream_slices=slices_iterable_copy, stream_state=stream_state
|
521
|
+
)
|
522
|
+
else:
|
523
|
+
return FullRefreshCheckpointReader(stream_slices=slices_iterable_copy)
|
524
|
+
|
525
|
+
@property
|
526
|
+
def _checkpoint_mode(self) -> CheckpointMode:
|
527
|
+
if self.is_resumable and len(self._wrapped_cursor_field()) > 0:
|
528
|
+
return CheckpointMode.INCREMENTAL
|
529
|
+
elif self.is_resumable:
|
530
|
+
return CheckpointMode.RESUMABLE_FULL_REFRESH
|
531
|
+
else:
|
532
|
+
return CheckpointMode.FULL_REFRESH
|
533
|
+
|
534
|
+
@staticmethod
|
535
|
+
def _classify_stream(
|
536
|
+
mappings_or_slices: Iterator[Optional[Union[Mapping[str, Any], StreamSlice]]],
|
537
|
+
) -> StreamClassification:
|
538
|
+
"""
|
539
|
+
This is a bit of a crazy solution, but also the only way we can detect certain attributes about the stream since Python
|
540
|
+
streams do not follow consistent implementation patterns. We care about the following two attributes:
|
541
|
+
- is_substream: Helps to incrementally release changes since substreams w/ parents are much more complicated. Also
|
542
|
+
helps de-risk the release of changes that might impact all connectors
|
543
|
+
- uses_legacy_slice_format: Since the checkpoint reader must manage a complex state object, we opted to have it always
|
544
|
+
use the structured StreamSlice object. However, this requires backwards compatibility with Python sources that only
|
545
|
+
support the legacy mapping object
|
546
|
+
|
547
|
+
Both attributes can eventually be deprecated once stream's define this method deleted once substreams have been implemented and
|
548
|
+
legacy connectors all adhere to the StreamSlice object.
|
549
|
+
"""
|
550
|
+
if not mappings_or_slices:
|
551
|
+
raise ValueError("A stream should always have at least one slice")
|
552
|
+
try:
|
553
|
+
next_slice = next(mappings_or_slices)
|
554
|
+
if isinstance(next_slice, StreamSlice) and next_slice == StreamSlice(
|
555
|
+
partition={}, cursor_slice={}
|
556
|
+
):
|
557
|
+
is_legacy_format = False
|
558
|
+
slice_has_value = False
|
559
|
+
elif next_slice == {}:
|
560
|
+
is_legacy_format = True
|
561
|
+
slice_has_value = False
|
562
|
+
elif isinstance(next_slice, StreamSlice):
|
563
|
+
is_legacy_format = False
|
564
|
+
slice_has_value = True
|
565
|
+
else:
|
566
|
+
is_legacy_format = True
|
567
|
+
slice_has_value = True
|
568
|
+
except StopIteration:
|
569
|
+
# If the stream has no slices, the format ultimately does not matter since no data will get synced. This is technically
|
570
|
+
# a valid case because it is up to the stream to define its slicing behavior
|
571
|
+
return StreamClassification(is_legacy_format=False, has_multiple_slices=False)
|
572
|
+
|
573
|
+
if slice_has_value:
|
574
|
+
# If the first slice contained a partition value from the result of stream_slices(), this is a substream that might
|
575
|
+
# have multiple parent records to iterate over
|
576
|
+
return StreamClassification(
|
577
|
+
is_legacy_format=is_legacy_format, has_multiple_slices=slice_has_value
|
578
|
+
)
|
579
|
+
|
580
|
+
try:
|
581
|
+
# If stream_slices() returns multiple slices, this is also a substream that can potentially generate empty slices
|
582
|
+
next(mappings_or_slices)
|
583
|
+
return StreamClassification(is_legacy_format=is_legacy_format, has_multiple_slices=True)
|
584
|
+
except StopIteration:
|
585
|
+
# If the result of stream_slices() only returns a single empty stream slice, then we know this is a regular stream
|
586
|
+
return StreamClassification(
|
587
|
+
is_legacy_format=is_legacy_format, has_multiple_slices=False
|
588
|
+
)
|
589
|
+
|
328
590
|
def log_stream_sync_configuration(self) -> None:
|
329
591
|
"""
|
330
592
|
Logs the configuration of this stream.
|
@@ -338,7 +600,9 @@ class Stream(ABC):
|
|
338
600
|
)
|
339
601
|
|
340
602
|
@staticmethod
|
341
|
-
def _wrapped_primary_key(
|
603
|
+
def _wrapped_primary_key(
|
604
|
+
keys: Optional[Union[str, List[str], List[List[str]]]],
|
605
|
+
) -> Optional[List[List[str]]]:
|
342
606
|
"""
|
343
607
|
:return: wrap the primary_key property in a list of list of strings required by the Airbyte Stream object.
|
344
608
|
"""
|
@@ -360,19 +624,80 @@ class Stream(ABC):
|
|
360
624
|
else:
|
361
625
|
raise ValueError(f"Element must be either list or str. Got: {type(keys)}")
|
362
626
|
|
627
|
+
def _observe_state(
|
628
|
+
self, checkpoint_reader: CheckpointReader, stream_state: Optional[Mapping[str, Any]] = None
|
629
|
+
) -> None:
|
630
|
+
"""
|
631
|
+
Convenience method that attempts to read the Stream's state using the recommended way of connector's managing their
|
632
|
+
own state via state setter/getter. But if we get back an AttributeError, then the legacy Stream.get_updated_state()
|
633
|
+
method is used as a fallback method.
|
634
|
+
"""
|
635
|
+
|
636
|
+
# This is an inversion of the original logic that used to try state getter/setters first. As part of the work to
|
637
|
+
# automatically apply resumable full refresh to all streams, all HttpStream classes implement default state
|
638
|
+
# getter/setter methods, we should default to only using the incoming stream_state parameter value is {} which
|
639
|
+
# indicates the stream does not override the default get_updated_state() implementation. When the default method
|
640
|
+
# is not overridden, then the stream defers to self.state getter
|
641
|
+
if stream_state:
|
642
|
+
checkpoint_reader.observe(stream_state)
|
643
|
+
elif type(self).get_updated_state == Stream.get_updated_state:
|
644
|
+
# We only default to the state getter/setter if the stream does not use the legacy get_updated_state() method
|
645
|
+
try:
|
646
|
+
new_state = self.state # type: ignore # This will always exist on HttpStreams, but may not for Stream
|
647
|
+
if new_state:
|
648
|
+
checkpoint_reader.observe(new_state)
|
649
|
+
except AttributeError:
|
650
|
+
pass
|
651
|
+
|
363
652
|
def _checkpoint_state( # type: ignore # ignoring typing for ConnectorStateManager because of circular dependencies
|
364
653
|
self,
|
365
654
|
stream_state: Mapping[str, Any],
|
366
655
|
state_manager,
|
367
656
|
) -> AirbyteMessage:
|
368
|
-
#
|
369
|
-
#
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
657
|
+
# todo: This can be consolidated into one ConnectorStateManager.update_and_create_state_message() method, but I want
|
658
|
+
# to reduce changes right now and this would span concurrent as well
|
659
|
+
state_manager.update_state_for_stream(self.name, self.namespace, stream_state)
|
660
|
+
return state_manager.create_state_message(self.name, self.namespace) # type: ignore [no-any-return]
|
661
|
+
|
662
|
+
@property
|
663
|
+
def configured_json_schema(self) -> Optional[Dict[str, Any]]:
|
664
|
+
"""
|
665
|
+
This property is set from the read method.
|
666
|
+
|
667
|
+
:return Optional[Dict]: JSON schema from configured catalog if provided, otherwise None.
|
668
|
+
"""
|
669
|
+
return self._configured_json_schema
|
670
|
+
|
671
|
+
@configured_json_schema.setter
|
672
|
+
def configured_json_schema(self, json_schema: Dict[str, Any]) -> None:
|
673
|
+
self._configured_json_schema = self._filter_schema_invalid_properties(json_schema)
|
674
|
+
|
675
|
+
def _filter_schema_invalid_properties(
|
676
|
+
self, configured_catalog_json_schema: Dict[str, Any]
|
677
|
+
) -> Dict[str, Any]:
|
678
|
+
"""
|
679
|
+
Filters the properties in json_schema that are not present in the stream schema.
|
680
|
+
Configured Schemas can have very old fields, so we need to housekeeping ourselves.
|
681
|
+
"""
|
682
|
+
configured_schema: Any = configured_catalog_json_schema.get("properties", {})
|
683
|
+
stream_schema_properties: Any = self.get_json_schema().get("properties", {})
|
684
|
+
|
685
|
+
configured_keys = configured_schema.keys()
|
686
|
+
stream_keys = stream_schema_properties.keys()
|
687
|
+
invalid_properties = configured_keys - stream_keys
|
688
|
+
if not invalid_properties:
|
689
|
+
return configured_catalog_json_schema
|
690
|
+
|
691
|
+
self.logger.warning(
|
692
|
+
f"Stream {self.name}: the following fields are deprecated and cannot be synced. {invalid_properties}. Refresh the connection's source schema to resolve this warning."
|
693
|
+
)
|
694
|
+
|
695
|
+
valid_configured_schema_properties_keys = stream_keys & configured_keys
|
696
|
+
valid_configured_schema_properties = {}
|
697
|
+
|
698
|
+
for configured_schema_property in valid_configured_schema_properties_keys:
|
699
|
+
valid_configured_schema_properties[configured_schema_property] = (
|
700
|
+
stream_schema_properties[configured_schema_property]
|
374
701
|
)
|
375
702
|
|
376
|
-
|
377
|
-
state_manager.update_state_for_stream(self.name, self.namespace, stream_state)
|
378
|
-
return state_manager.create_state_message(self.name, self.namespace)
|
703
|
+
return {**configured_catalog_json_schema, "properties": valid_configured_schema_properties}
|
@@ -5,5 +5,6 @@
|
|
5
5
|
# Initialize Streams Package
|
6
6
|
from .exceptions import UserDefinedBackoffException
|
7
7
|
from .http import HttpStream, HttpSubStream
|
8
|
+
from .http_client import HttpClient
|
8
9
|
|
9
|
-
__all__ = ["HttpStream", "HttpSubStream", "UserDefinedBackoffException"]
|
10
|
+
__all__ = ["HttpClient", "HttpStream", "HttpSubStream", "UserDefinedBackoffException"]
|