airbyte-cdk 0.72.1__py3-none-any.whl → 6.13.1.dev4107__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/__init__.py +355 -6
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +29 -10
- airbyte_cdk/connector.py +24 -24
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
- airbyte_cdk/connector_builder/main.py +45 -13
- airbyte_cdk/connector_builder/message_grouper.py +189 -50
- airbyte_cdk/connector_builder/models.py +3 -2
- airbyte_cdk/destinations/__init__.py +4 -3
- airbyte_cdk/destinations/destination.py +54 -20
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/config.py +40 -17
- airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
- airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
- airbyte_cdk/entrypoint.py +153 -44
- airbyte_cdk/exception_handler.py +21 -3
- airbyte_cdk/logger.py +30 -44
- airbyte_cdk/models/__init__.py +13 -2
- airbyte_cdk/models/airbyte_protocol.py +86 -1
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/__init__.py +5 -1
- airbyte_cdk/sources/abstract_source.py +125 -79
- airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
- airbyte_cdk/sources/config.py +3 -2
- airbyte_cdk/sources/connector_state_manager.py +49 -83
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
- airbyte_cdk/sources/declarative/auth/token.py +28 -10
- airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
- airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
- airbyte_cdk/sources/declarative/declarative_source.py +5 -2
- airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
- airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
- airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +65 -8
- airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +25 -3
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +159 -74
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
- airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
- airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1695 -225
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +228 -72
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
- airbyte_cdk/sources/declarative/spec/spec.py +12 -5
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
- airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
- airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
- airbyte_cdk/sources/declarative/types.py +19 -110
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
- airbyte_cdk/sources/embedded/base_integration.py +16 -5
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +5 -2
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +58 -10
- airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
- airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
- airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
- airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
- airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +52 -15
- airbyte_cdk/sources/file_based/file_based_source.py +163 -33
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +83 -5
- airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +147 -41
- airbyte_cdk/sources/file_based/remote_file.py +1 -1
- airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +175 -45
- airbyte_cdk/sources/http_logger.py +8 -3
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +42 -38
- airbyte_cdk/sources/streams/__init__.py +2 -2
- airbyte_cdk/sources/streams/availability_strategy.py +54 -3
- airbyte_cdk/sources/streams/call_rate.py +64 -21
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
- airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
- airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
- airbyte_cdk/sources/streams/core.py +412 -87
- airbyte_cdk/sources/streams/http/__init__.py +2 -1
- airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +27 -7
- airbyte_cdk/sources/streams/http/http.py +369 -246
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +154 -0
- airbyte_cdk/sources/utils/record_helper.py +36 -21
- airbyte_cdk/sources/utils/schema_helpers.py +13 -6
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +54 -20
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/catalog_builder.py +70 -18
- airbyte_cdk/test/entrypoint_wrapper.py +117 -42
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/matcher.py +6 -0
- airbyte_cdk/test/mock_http/mocker.py +57 -10
- airbyte_cdk/test/mock_http/request.py +19 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +32 -16
- airbyte_cdk/test/state_builder.py +18 -10
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +2 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +20 -11
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +198 -28
- airbyte_cdk/utils/slice_hasher.py +30 -0
- airbyte_cdk/utils/spec_schema_transformations.py +6 -3
- airbyte_cdk/utils/stream_status_utils.py +8 -1
- airbyte_cdk/utils/traced_exception.py +61 -21
- airbyte_cdk-6.13.1.dev4107.dist-info/METADATA +109 -0
- airbyte_cdk-6.13.1.dev4107.dist-info/RECORD +349 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.13.1.dev4107.dist-info}/WHEEL +1 -2
- airbyte_cdk-6.13.1.dev4107.dist-info/entry_points.txt +3 -0
- airbyte_cdk/sources/declarative/create_partial.py +0 -92
- airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
- airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
- airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
- airbyte_cdk/sources/deprecated/base_source.py +0 -94
- airbyte_cdk/sources/deprecated/client.py +0 -99
- airbyte_cdk/sources/singer/__init__.py +0 -8
- airbyte_cdk/sources/singer/singer_helpers.py +0 -304
- airbyte_cdk/sources/singer/source.py +0 -186
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
- airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
- airbyte_cdk/sources/streams/http/auth/core.py +0 -29
- airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
- airbyte_cdk/sources/streams/http/auth/token.py +0 -47
- airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
- airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
- airbyte_cdk/sources/utils/schema_models.py +0 -84
- airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
- airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
- airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
- source_declarative_manifest/main.py +0 -29
- unit_tests/connector_builder/__init__.py +0 -3
- unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
- unit_tests/connector_builder/test_message_grouper.py +0 -713
- unit_tests/connector_builder/utils.py +0 -27
- unit_tests/destinations/test_destination.py +0 -243
- unit_tests/singer/test_singer_helpers.py +0 -56
- unit_tests/singer/test_singer_source.py +0 -112
- unit_tests/sources/__init__.py +0 -0
- unit_tests/sources/concurrent_source/__init__.py +0 -3
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
- unit_tests/sources/declarative/__init__.py +0 -3
- unit_tests/sources/declarative/auth/__init__.py +0 -3
- unit_tests/sources/declarative/auth/test_oauth.py +0 -331
- unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
- unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
- unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
- unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
- unit_tests/sources/declarative/checks/__init__.py +0 -3
- unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
- unit_tests/sources/declarative/decoders/__init__.py +0 -0
- unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
- unit_tests/sources/declarative/external_component.py +0 -13
- unit_tests/sources/declarative/extractors/__init__.py +0 -3
- unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
- unit_tests/sources/declarative/incremental/__init__.py +0 -0
- unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
- unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
- unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
- unit_tests/sources/declarative/interpolation/__init__.py +0 -3
- unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
- unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
- unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
- unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
- unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
- unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
- unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
- unit_tests/sources/declarative/parsers/__init__.py +0 -3
- unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
- unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
- unit_tests/sources/declarative/parsers/testing_components.py +0 -36
- unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
- unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
- unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
- unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
- unit_tests/sources/declarative/requesters/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
- unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
- unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
- unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
- unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
- unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
- unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
- unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
- unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
- unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
- unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
- unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
- unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
- unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
- unit_tests/sources/declarative/retrievers/__init__.py +0 -3
- unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
- unit_tests/sources/declarative/schema/__init__.py +0 -6
- unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
- unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
- unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
- unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
- unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
- unit_tests/sources/declarative/states/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
- unit_tests/sources/declarative/test_create_partial.py +0 -83
- unit_tests/sources/declarative/test_declarative_stream.py +0 -103
- unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
- unit_tests/sources/declarative/test_types.py +0 -39
- unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
- unit_tests/sources/file_based/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
- unit_tests/sources/file_based/config/__init__.py +0 -0
- unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
- unit_tests/sources/file_based/config/test_csv_format.py +0 -34
- unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
- unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
- unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
- unit_tests/sources/file_based/file_types/__init__.py +0 -0
- unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
- unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
- unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
- unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
- unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
- unit_tests/sources/file_based/helpers.py +0 -70
- unit_tests/sources/file_based/in_memory_files_source.py +0 -211
- unit_tests/sources/file_based/scenarios/__init__.py +0 -0
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
- unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
- unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
- unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
- unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
- unit_tests/sources/file_based/stream/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
- unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
- unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
- unit_tests/sources/file_based/test_scenarios.py +0 -253
- unit_tests/sources/file_based/test_schema_helpers.py +0 -346
- unit_tests/sources/fixtures/__init__.py +0 -3
- unit_tests/sources/fixtures/source_test_fixture.py +0 -153
- unit_tests/sources/message/__init__.py +0 -0
- unit_tests/sources/message/test_repository.py +0 -153
- unit_tests/sources/streams/__init__.py +0 -0
- unit_tests/sources/streams/concurrent/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
- unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
- unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
- unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
- unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
- unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
- unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
- unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
- unit_tests/sources/streams/http/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/test_auth.py +0 -173
- unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
- unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
- unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
- unit_tests/sources/streams/http/test_http.py +0 -635
- unit_tests/sources/streams/test_availability_strategy.py +0 -70
- unit_tests/sources/streams/test_call_rate.py +0 -300
- unit_tests/sources/streams/test_stream_read.py +0 -405
- unit_tests/sources/streams/test_streams_core.py +0 -184
- unit_tests/sources/test_abstract_source.py +0 -1442
- unit_tests/sources/test_concurrent_source.py +0 -112
- unit_tests/sources/test_config.py +0 -92
- unit_tests/sources/test_connector_state_manager.py +0 -482
- unit_tests/sources/test_http_logger.py +0 -252
- unit_tests/sources/test_integration_source.py +0 -86
- unit_tests/sources/test_source.py +0 -684
- unit_tests/sources/test_source_read.py +0 -460
- unit_tests/test/__init__.py +0 -0
- unit_tests/test/mock_http/__init__.py +0 -0
- unit_tests/test/mock_http/test_matcher.py +0 -53
- unit_tests/test/mock_http/test_mocker.py +0 -214
- unit_tests/test/mock_http/test_request.py +0 -117
- unit_tests/test/mock_http/test_response_builder.py +0 -177
- unit_tests/test/test_entrypoint_wrapper.py +0 -240
- unit_tests/utils/__init__.py +0 -0
- unit_tests/utils/test_datetime_format_inferrer.py +0 -60
- unit_tests/utils/test_mapping_helpers.py +0 -54
- unit_tests/utils/test_message_utils.py +0 -91
- unit_tests/utils/test_rate_limiting.py +0 -26
- unit_tests/utils/test_schema_inferrer.py +0 -202
- unit_tests/utils/test_secret_utils.py +0 -135
- unit_tests/utils/test_stream_status_utils.py +0 -61
- unit_tests/utils/test_traced_exception.py +0 -107
- /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
- {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
- {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
- {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.13.1.dev4107.dist-info}/LICENSE.txt +0 -0
@@ -0,0 +1,350 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import threading
|
6
|
+
import time
|
7
|
+
from typing import Any, Callable, Iterable, Mapping, Optional, TypeVar, Union
|
8
|
+
|
9
|
+
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
10
|
+
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
11
|
+
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
12
|
+
from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
13
|
+
|
14
|
+
T = TypeVar("T")
|
15
|
+
|
16
|
+
|
17
|
+
def iterate_with_last_flag_and_state(
|
18
|
+
generator: Iterable[T], get_stream_state_func: Callable[[], Optional[Mapping[str, StreamState]]]
|
19
|
+
) -> Iterable[tuple[T, bool, Any]]:
|
20
|
+
"""
|
21
|
+
Iterates over the given generator, yielding tuples containing the element, a flag
|
22
|
+
indicating whether it's the last element in the generator, and the result of
|
23
|
+
`get_stream_state_func` applied to the element.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
generator: The iterable to iterate over.
|
27
|
+
get_stream_state_func: A function that takes an element from the generator and
|
28
|
+
returns its state.
|
29
|
+
|
30
|
+
Returns:
|
31
|
+
An iterator that yields tuples of the form (element, is_last, state).
|
32
|
+
"""
|
33
|
+
|
34
|
+
iterator = iter(generator)
|
35
|
+
|
36
|
+
try:
|
37
|
+
current = next(iterator)
|
38
|
+
state = get_stream_state_func()
|
39
|
+
except StopIteration:
|
40
|
+
return # Return an empty iterator
|
41
|
+
|
42
|
+
for next_item in iterator:
|
43
|
+
yield current, False, state
|
44
|
+
current = next_item
|
45
|
+
state = get_stream_state_func()
|
46
|
+
|
47
|
+
yield current, True, state
|
48
|
+
|
49
|
+
|
50
|
+
class Timer:
|
51
|
+
"""
|
52
|
+
A simple timer class that measures elapsed time in seconds using a high-resolution performance counter.
|
53
|
+
"""
|
54
|
+
|
55
|
+
def __init__(self) -> None:
|
56
|
+
self._start: Optional[int] = None
|
57
|
+
|
58
|
+
def start(self) -> None:
|
59
|
+
self._start = time.perf_counter_ns()
|
60
|
+
|
61
|
+
def finish(self) -> int:
|
62
|
+
if self._start:
|
63
|
+
return ((time.perf_counter_ns() - self._start) / 1e9).__ceil__()
|
64
|
+
else:
|
65
|
+
raise RuntimeError("Global substream cursor timer not started")
|
66
|
+
|
67
|
+
|
68
|
+
class GlobalSubstreamCursor(DeclarativeCursor):
|
69
|
+
"""
|
70
|
+
The GlobalSubstreamCursor is designed to track the state of substreams using a single global cursor.
|
71
|
+
This class is beneficial for streams with many partitions, as it allows the state to be managed globally
|
72
|
+
instead of per partition, simplifying state management and reducing the size of state messages.
|
73
|
+
|
74
|
+
This cursor is activated by setting the `global_substream_cursor` parameter for incremental sync.
|
75
|
+
|
76
|
+
Warnings:
|
77
|
+
- This class enforces a minimal lookback window for substream based on the duration of the previous sync to avoid losing records. This lookback ensures that any records added or updated during the sync are captured in subsequent syncs.
|
78
|
+
- The global cursor is updated only at the end of the sync. If the sync ends prematurely (e.g., due to an exception), the state will not be updated.
|
79
|
+
- When using the `incremental_dependency` option, the sync will progress through parent records, preventing the sync from getting infinitely stuck. However, it is crucial to understand the requirements for both the `global_substream_cursor` and `incremental_dependency` options to avoid data loss.
|
80
|
+
"""
|
81
|
+
|
82
|
+
def __init__(self, stream_cursor: DatetimeBasedCursor, partition_router: PartitionRouter):
|
83
|
+
self._stream_cursor = stream_cursor
|
84
|
+
self._partition_router = partition_router
|
85
|
+
self._timer = Timer()
|
86
|
+
self._lock = threading.Lock()
|
87
|
+
self._slice_semaphore = threading.Semaphore(
|
88
|
+
0
|
89
|
+
) # Start with 0, indicating no slices being tracked
|
90
|
+
self._all_slices_yielded = False
|
91
|
+
self._lookback_window: Optional[int] = None
|
92
|
+
self._current_partition: Optional[Mapping[str, Any]] = None
|
93
|
+
self._last_slice: bool = False
|
94
|
+
self._parent_state: Optional[Mapping[str, Any]] = None
|
95
|
+
|
96
|
+
def start_slices_generation(self) -> None:
|
97
|
+
self._timer.start()
|
98
|
+
|
99
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
100
|
+
"""
|
101
|
+
Generates stream slices, ensuring the last slice is properly flagged and processed.
|
102
|
+
|
103
|
+
This method creates a sequence of stream slices by iterating over partitions and cursor slices.
|
104
|
+
It holds onto one slice in memory to set `_all_slices_yielded` to `True` before yielding the
|
105
|
+
final slice. A semaphore is used to track the processing of slices, ensuring that `close_slice`
|
106
|
+
is called only after all slices have been processed.
|
107
|
+
|
108
|
+
We expect the following events:
|
109
|
+
* Yields all the slices except the last one. At this point, `close_slice` won't actually close the global slice as `self._all_slices_yielded == False`
|
110
|
+
* Release the semaphore one last time before setting `self._all_slices_yielded = True`. This will cause `close_slice` to know about all the slices before we indicate that all slices have been yielded so the left side of `if self._all_slices_yielded and self._slice_semaphore._value == 0` will be false if not everything is closed
|
111
|
+
* Setting `self._all_slices_yielded = True`. We do that before actually yielding the last slice as the caller of `stream_slices` might stop iterating at any point and hence the code after `yield` might not be executed
|
112
|
+
* Yield the last slice. At that point, once there are as many slices yielded as closes, the global slice will be closed too
|
113
|
+
"""
|
114
|
+
slice_generator = (
|
115
|
+
StreamSlice(partition=partition, cursor_slice=cursor_slice)
|
116
|
+
for partition in self._partition_router.stream_slices()
|
117
|
+
for cursor_slice in self._stream_cursor.stream_slices()
|
118
|
+
)
|
119
|
+
|
120
|
+
self.start_slices_generation()
|
121
|
+
for slice, last, state in iterate_with_last_flag_and_state(
|
122
|
+
slice_generator, self._partition_router.get_stream_state
|
123
|
+
):
|
124
|
+
self._parent_state = state
|
125
|
+
self.register_slice(last)
|
126
|
+
yield slice
|
127
|
+
self._parent_state = self._partition_router.get_stream_state()
|
128
|
+
|
129
|
+
def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
|
130
|
+
slice_generator = (
|
131
|
+
StreamSlice(partition=partition, cursor_slice=cursor_slice)
|
132
|
+
for cursor_slice in self._stream_cursor.stream_slices()
|
133
|
+
)
|
134
|
+
|
135
|
+
yield from slice_generator
|
136
|
+
|
137
|
+
def register_slice(self, last: bool) -> None:
|
138
|
+
"""
|
139
|
+
Tracks the processing of a stream slice.
|
140
|
+
|
141
|
+
Releases the semaphore for each slice. If it's the last slice (`last=True`),
|
142
|
+
sets `_all_slices_yielded` to `True` to indicate no more slices will be processed.
|
143
|
+
|
144
|
+
Args:
|
145
|
+
last (bool): True if the current slice is the last in the sequence.
|
146
|
+
"""
|
147
|
+
self._slice_semaphore.release()
|
148
|
+
if last:
|
149
|
+
self._all_slices_yielded = True
|
150
|
+
|
151
|
+
def set_initial_state(self, stream_state: StreamState) -> None:
|
152
|
+
"""
|
153
|
+
Set the initial state for the cursors.
|
154
|
+
|
155
|
+
This method initializes the state for the global cursor using the provided stream state.
|
156
|
+
|
157
|
+
Additionally, it sets the parent state for partition routers that are based on parent streams. If a partition router
|
158
|
+
does not have parent streams, this step will be skipped due to the default PartitionRouter implementation.
|
159
|
+
|
160
|
+
Args:
|
161
|
+
stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
|
162
|
+
{
|
163
|
+
"state": {
|
164
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
165
|
+
},
|
166
|
+
"parent_state": {
|
167
|
+
"parent_stream_name": {
|
168
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
169
|
+
}
|
170
|
+
},
|
171
|
+
"lookback_window": 132
|
172
|
+
}
|
173
|
+
"""
|
174
|
+
if not stream_state:
|
175
|
+
return
|
176
|
+
|
177
|
+
if "lookback_window" in stream_state:
|
178
|
+
self._lookback_window = stream_state["lookback_window"]
|
179
|
+
self._inject_lookback_into_stream_cursor(stream_state["lookback_window"])
|
180
|
+
|
181
|
+
if "state" in stream_state:
|
182
|
+
self._stream_cursor.set_initial_state(stream_state["state"])
|
183
|
+
elif "states" not in stream_state:
|
184
|
+
# We assume that `stream_state` is in the old global format
|
185
|
+
# Example: {"global_state_format_key": "global_state_format_value"}
|
186
|
+
self._stream_cursor.set_initial_state(stream_state)
|
187
|
+
|
188
|
+
# Set parent state for partition routers based on parent streams
|
189
|
+
self._partition_router.set_initial_state(stream_state)
|
190
|
+
|
191
|
+
def _inject_lookback_into_stream_cursor(self, lookback_window: int) -> None:
|
192
|
+
"""
|
193
|
+
Modifies the stream cursor's lookback window based on the duration of the previous sync.
|
194
|
+
This adjustment ensures the cursor is set to the minimal lookback window necessary for
|
195
|
+
avoiding missing data.
|
196
|
+
|
197
|
+
Parameters:
|
198
|
+
lookback_window (int): The lookback duration in seconds to be set, derived from
|
199
|
+
the previous sync.
|
200
|
+
|
201
|
+
Raises:
|
202
|
+
ValueError: If the cursor does not support dynamic lookback window adjustments.
|
203
|
+
"""
|
204
|
+
if hasattr(self._stream_cursor, "set_runtime_lookback_window"):
|
205
|
+
self._stream_cursor.set_runtime_lookback_window(lookback_window)
|
206
|
+
else:
|
207
|
+
raise ValueError(
|
208
|
+
"The cursor class for Global Substream Cursor does not have a set_runtime_lookback_window method"
|
209
|
+
)
|
210
|
+
|
211
|
+
def observe(self, stream_slice: StreamSlice, record: Record) -> None:
|
212
|
+
self._stream_cursor.observe(
|
213
|
+
StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), record
|
214
|
+
)
|
215
|
+
|
216
|
+
def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
|
217
|
+
"""
|
218
|
+
Close the current stream slice.
|
219
|
+
|
220
|
+
This method is called when a stream slice is completed. For the global parent cursor, we close the child cursor
|
221
|
+
only after reading all slices. This ensures that we do not miss any child records from a later parent record
|
222
|
+
if the child cursor is earlier than a record from the first parent record.
|
223
|
+
|
224
|
+
Args:
|
225
|
+
stream_slice (StreamSlice): The stream slice to be closed.
|
226
|
+
*args (Any): Additional arguments.
|
227
|
+
"""
|
228
|
+
with self._lock:
|
229
|
+
self._slice_semaphore.acquire()
|
230
|
+
if self._all_slices_yielded and self._slice_semaphore._value == 0:
|
231
|
+
self._lookback_window = self._timer.finish()
|
232
|
+
self._stream_cursor.close_slice(
|
233
|
+
StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), *args
|
234
|
+
)
|
235
|
+
|
236
|
+
def get_stream_state(self) -> StreamState:
|
237
|
+
state: dict[str, Any] = {"state": self._stream_cursor.get_stream_state()}
|
238
|
+
|
239
|
+
if self._parent_state:
|
240
|
+
state["parent_state"] = self._parent_state
|
241
|
+
|
242
|
+
if self._lookback_window is not None:
|
243
|
+
state["lookback_window"] = self._lookback_window
|
244
|
+
|
245
|
+
return state
|
246
|
+
|
247
|
+
def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
|
248
|
+
# stream_slice is ignored as cursor is global
|
249
|
+
return self._stream_cursor.get_stream_state()
|
250
|
+
|
251
|
+
def get_request_params(
|
252
|
+
self,
|
253
|
+
*,
|
254
|
+
stream_state: Optional[StreamState] = None,
|
255
|
+
stream_slice: Optional[StreamSlice] = None,
|
256
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
257
|
+
) -> Mapping[str, Any]:
|
258
|
+
if stream_slice:
|
259
|
+
return self._partition_router.get_request_params( # type: ignore # this always returns a mapping
|
260
|
+
stream_state=stream_state,
|
261
|
+
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
|
262
|
+
next_page_token=next_page_token,
|
263
|
+
) | self._stream_cursor.get_request_params(
|
264
|
+
stream_state=stream_state,
|
265
|
+
stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice),
|
266
|
+
next_page_token=next_page_token,
|
267
|
+
)
|
268
|
+
else:
|
269
|
+
raise ValueError("A partition needs to be provided in order to get request params")
|
270
|
+
|
271
|
+
def get_request_headers(
|
272
|
+
self,
|
273
|
+
*,
|
274
|
+
stream_state: Optional[StreamState] = None,
|
275
|
+
stream_slice: Optional[StreamSlice] = None,
|
276
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
277
|
+
) -> Mapping[str, Any]:
|
278
|
+
if stream_slice:
|
279
|
+
return self._partition_router.get_request_headers( # type: ignore # this always returns a mapping
|
280
|
+
stream_state=stream_state,
|
281
|
+
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
|
282
|
+
next_page_token=next_page_token,
|
283
|
+
) | self._stream_cursor.get_request_headers(
|
284
|
+
stream_state=stream_state,
|
285
|
+
stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice),
|
286
|
+
next_page_token=next_page_token,
|
287
|
+
)
|
288
|
+
else:
|
289
|
+
raise ValueError("A partition needs to be provided in order to get request headers")
|
290
|
+
|
291
|
+
def get_request_body_data(
|
292
|
+
self,
|
293
|
+
*,
|
294
|
+
stream_state: Optional[StreamState] = None,
|
295
|
+
stream_slice: Optional[StreamSlice] = None,
|
296
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
297
|
+
) -> Union[Mapping[str, Any], str]:
|
298
|
+
if stream_slice:
|
299
|
+
return self._partition_router.get_request_body_data( # type: ignore # this always returns a mapping
|
300
|
+
stream_state=stream_state,
|
301
|
+
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
|
302
|
+
next_page_token=next_page_token,
|
303
|
+
) | self._stream_cursor.get_request_body_data(
|
304
|
+
stream_state=stream_state,
|
305
|
+
stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice),
|
306
|
+
next_page_token=next_page_token,
|
307
|
+
)
|
308
|
+
else:
|
309
|
+
raise ValueError("A partition needs to be provided in order to get request body data")
|
310
|
+
|
311
|
+
def get_request_body_json(
|
312
|
+
self,
|
313
|
+
*,
|
314
|
+
stream_state: Optional[StreamState] = None,
|
315
|
+
stream_slice: Optional[StreamSlice] = None,
|
316
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
317
|
+
) -> Mapping[str, Any]:
|
318
|
+
if stream_slice:
|
319
|
+
return self._partition_router.get_request_body_json( # type: ignore # this always returns a mapping
|
320
|
+
stream_state=stream_state,
|
321
|
+
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
|
322
|
+
next_page_token=next_page_token,
|
323
|
+
) | self._stream_cursor.get_request_body_json(
|
324
|
+
stream_state=stream_state,
|
325
|
+
stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice),
|
326
|
+
next_page_token=next_page_token,
|
327
|
+
)
|
328
|
+
else:
|
329
|
+
raise ValueError("A partition needs to be provided in order to get request body json")
|
330
|
+
|
331
|
+
def should_be_synced(self, record: Record) -> bool:
|
332
|
+
return self._stream_cursor.should_be_synced(self._convert_record_to_cursor_record(record))
|
333
|
+
|
334
|
+
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
335
|
+
return self._stream_cursor.is_greater_than_or_equal(
|
336
|
+
self._convert_record_to_cursor_record(first),
|
337
|
+
self._convert_record_to_cursor_record(second),
|
338
|
+
)
|
339
|
+
|
340
|
+
@staticmethod
|
341
|
+
def _convert_record_to_cursor_record(record: Record) -> Record:
|
342
|
+
return Record(
|
343
|
+
data=record.data,
|
344
|
+
stream_name=record.stream_name,
|
345
|
+
associated_slice=StreamSlice(
|
346
|
+
partition={}, cursor_slice=record.associated_slice.cursor_slice
|
347
|
+
)
|
348
|
+
if record.associated_slice
|
349
|
+
else None,
|
350
|
+
)
|