airbyte-cdk 0.72.1__py3-none-any.whl → 6.17.1.dev1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/__init__.py +355 -6
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +29 -10
- airbyte_cdk/connector.py +24 -24
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
- airbyte_cdk/connector_builder/main.py +45 -13
- airbyte_cdk/connector_builder/message_grouper.py +189 -50
- airbyte_cdk/connector_builder/models.py +3 -2
- airbyte_cdk/destinations/__init__.py +4 -3
- airbyte_cdk/destinations/destination.py +54 -20
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/config.py +40 -17
- airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
- airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
- airbyte_cdk/entrypoint.py +153 -44
- airbyte_cdk/exception_handler.py +21 -3
- airbyte_cdk/logger.py +30 -44
- airbyte_cdk/models/__init__.py +13 -2
- airbyte_cdk/models/airbyte_protocol.py +86 -1
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/__init__.py +5 -1
- airbyte_cdk/sources/abstract_source.py +125 -79
- airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
- airbyte_cdk/sources/config.py +3 -2
- airbyte_cdk/sources/connector_state_manager.py +49 -83
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
- airbyte_cdk/sources/declarative/auth/token.py +28 -10
- airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
- airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
- airbyte_cdk/sources/declarative/declarative_source.py +5 -2
- airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
- airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
- airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
- airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +340 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +174 -74
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
- airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
- airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1759 -225
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
- airbyte_cdk/sources/declarative/spec/spec.py +12 -5
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
- airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
- airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
- airbyte_cdk/sources/declarative/types.py +19 -110
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
- airbyte_cdk/sources/embedded/base_integration.py +16 -5
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +5 -2
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
- airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
- airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
- airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
- airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
- airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +18 -15
- airbyte_cdk/sources/file_based/file_based_source.py +140 -33
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
- airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
- airbyte_cdk/sources/file_based/remote_file.py +1 -1
- airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
- airbyte_cdk/sources/http_logger.py +8 -3
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +42 -38
- airbyte_cdk/sources/streams/__init__.py +2 -2
- airbyte_cdk/sources/streams/availability_strategy.py +54 -3
- airbyte_cdk/sources/streams/call_rate.py +64 -21
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +313 -48
- airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
- airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
- airbyte_cdk/sources/streams/core.py +412 -87
- airbyte_cdk/sources/streams/http/__init__.py +2 -1
- airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +27 -7
- airbyte_cdk/sources/streams/http/http.py +369 -246
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +154 -0
- airbyte_cdk/sources/utils/record_helper.py +36 -21
- airbyte_cdk/sources/utils/schema_helpers.py +13 -6
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +54 -20
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/catalog_builder.py +70 -18
- airbyte_cdk/test/entrypoint_wrapper.py +117 -42
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/matcher.py +6 -0
- airbyte_cdk/test/mock_http/mocker.py +57 -10
- airbyte_cdk/test/mock_http/request.py +19 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +32 -16
- airbyte_cdk/test/state_builder.py +18 -10
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +2 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +20 -11
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +198 -28
- airbyte_cdk/utils/slice_hasher.py +30 -0
- airbyte_cdk/utils/spec_schema_transformations.py +6 -3
- airbyte_cdk/utils/stream_status_utils.py +8 -1
- airbyte_cdk/utils/traced_exception.py +61 -21
- airbyte_cdk-6.17.1.dev1.dist-info/METADATA +109 -0
- airbyte_cdk-6.17.1.dev1.dist-info/RECORD +350 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev1.dist-info}/WHEEL +1 -2
- airbyte_cdk-6.17.1.dev1.dist-info/entry_points.txt +3 -0
- airbyte_cdk/sources/declarative/create_partial.py +0 -92
- airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
- airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
- airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
- airbyte_cdk/sources/deprecated/base_source.py +0 -94
- airbyte_cdk/sources/deprecated/client.py +0 -99
- airbyte_cdk/sources/singer/__init__.py +0 -8
- airbyte_cdk/sources/singer/singer_helpers.py +0 -304
- airbyte_cdk/sources/singer/source.py +0 -186
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
- airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
- airbyte_cdk/sources/streams/http/auth/core.py +0 -29
- airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
- airbyte_cdk/sources/streams/http/auth/token.py +0 -47
- airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
- airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
- airbyte_cdk/sources/utils/schema_models.py +0 -84
- airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
- airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
- airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
- source_declarative_manifest/main.py +0 -29
- unit_tests/connector_builder/__init__.py +0 -3
- unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
- unit_tests/connector_builder/test_message_grouper.py +0 -713
- unit_tests/connector_builder/utils.py +0 -27
- unit_tests/destinations/test_destination.py +0 -243
- unit_tests/singer/test_singer_helpers.py +0 -56
- unit_tests/singer/test_singer_source.py +0 -112
- unit_tests/sources/__init__.py +0 -0
- unit_tests/sources/concurrent_source/__init__.py +0 -3
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
- unit_tests/sources/declarative/__init__.py +0 -3
- unit_tests/sources/declarative/auth/__init__.py +0 -3
- unit_tests/sources/declarative/auth/test_oauth.py +0 -331
- unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
- unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
- unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
- unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
- unit_tests/sources/declarative/checks/__init__.py +0 -3
- unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
- unit_tests/sources/declarative/decoders/__init__.py +0 -0
- unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
- unit_tests/sources/declarative/external_component.py +0 -13
- unit_tests/sources/declarative/extractors/__init__.py +0 -3
- unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
- unit_tests/sources/declarative/incremental/__init__.py +0 -0
- unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
- unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
- unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
- unit_tests/sources/declarative/interpolation/__init__.py +0 -3
- unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
- unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
- unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
- unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
- unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
- unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
- unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
- unit_tests/sources/declarative/parsers/__init__.py +0 -3
- unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
- unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
- unit_tests/sources/declarative/parsers/testing_components.py +0 -36
- unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
- unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
- unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
- unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
- unit_tests/sources/declarative/requesters/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
- unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
- unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
- unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
- unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
- unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
- unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
- unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
- unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
- unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
- unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
- unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
- unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
- unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
- unit_tests/sources/declarative/retrievers/__init__.py +0 -3
- unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
- unit_tests/sources/declarative/schema/__init__.py +0 -6
- unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
- unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
- unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
- unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
- unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
- unit_tests/sources/declarative/states/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
- unit_tests/sources/declarative/test_create_partial.py +0 -83
- unit_tests/sources/declarative/test_declarative_stream.py +0 -103
- unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
- unit_tests/sources/declarative/test_types.py +0 -39
- unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
- unit_tests/sources/file_based/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
- unit_tests/sources/file_based/config/__init__.py +0 -0
- unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
- unit_tests/sources/file_based/config/test_csv_format.py +0 -34
- unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
- unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
- unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
- unit_tests/sources/file_based/file_types/__init__.py +0 -0
- unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
- unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
- unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
- unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
- unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
- unit_tests/sources/file_based/helpers.py +0 -70
- unit_tests/sources/file_based/in_memory_files_source.py +0 -211
- unit_tests/sources/file_based/scenarios/__init__.py +0 -0
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
- unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
- unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
- unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
- unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
- unit_tests/sources/file_based/stream/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
- unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
- unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
- unit_tests/sources/file_based/test_scenarios.py +0 -253
- unit_tests/sources/file_based/test_schema_helpers.py +0 -346
- unit_tests/sources/fixtures/__init__.py +0 -3
- unit_tests/sources/fixtures/source_test_fixture.py +0 -153
- unit_tests/sources/message/__init__.py +0 -0
- unit_tests/sources/message/test_repository.py +0 -153
- unit_tests/sources/streams/__init__.py +0 -0
- unit_tests/sources/streams/concurrent/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
- unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
- unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
- unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
- unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
- unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
- unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
- unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
- unit_tests/sources/streams/http/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/test_auth.py +0 -173
- unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
- unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
- unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
- unit_tests/sources/streams/http/test_http.py +0 -635
- unit_tests/sources/streams/test_availability_strategy.py +0 -70
- unit_tests/sources/streams/test_call_rate.py +0 -300
- unit_tests/sources/streams/test_stream_read.py +0 -405
- unit_tests/sources/streams/test_streams_core.py +0 -184
- unit_tests/sources/test_abstract_source.py +0 -1442
- unit_tests/sources/test_concurrent_source.py +0 -112
- unit_tests/sources/test_config.py +0 -92
- unit_tests/sources/test_connector_state_manager.py +0 -482
- unit_tests/sources/test_http_logger.py +0 -252
- unit_tests/sources/test_integration_source.py +0 -86
- unit_tests/sources/test_source.py +0 -684
- unit_tests/sources/test_source_read.py +0 -460
- unit_tests/test/__init__.py +0 -0
- unit_tests/test/mock_http/__init__.py +0 -0
- unit_tests/test/mock_http/test_matcher.py +0 -53
- unit_tests/test/mock_http/test_mocker.py +0 -214
- unit_tests/test/mock_http/test_request.py +0 -117
- unit_tests/test/mock_http/test_response_builder.py +0 -177
- unit_tests/test/test_entrypoint_wrapper.py +0 -240
- unit_tests/utils/__init__.py +0 -0
- unit_tests/utils/test_datetime_format_inferrer.py +0 -60
- unit_tests/utils/test_mapping_helpers.py +0 -54
- unit_tests/utils/test_message_utils.py +0 -91
- unit_tests/utils/test_rate_limiting.py +0 -26
- unit_tests/utils/test_schema_inferrer.py +0 -202
- unit_tests/utils/test_secret_utils.py +0 -135
- unit_tests/utils/test_stream_status_utils.py +0 -61
- unit_tests/utils/test_traced_exception.py +0 -107
- /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
- {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
- {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
- {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev1.dist-info}/LICENSE.txt +0 -0
@@ -2,8 +2,28 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.partition_routers.
|
6
|
-
|
7
|
-
|
5
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
6
|
+
AsyncJobPartitionRouter,
|
7
|
+
)
|
8
|
+
from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import (
|
9
|
+
CartesianProductStreamSlicer,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import (
|
12
|
+
ListPartitionRouter,
|
13
|
+
)
|
14
|
+
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
15
|
+
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
|
16
|
+
SinglePartitionRouter,
|
17
|
+
)
|
18
|
+
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
19
|
+
SubstreamPartitionRouter,
|
20
|
+
)
|
8
21
|
|
9
|
-
__all__ = [
|
22
|
+
__all__ = [
|
23
|
+
"AsyncJobPartitionRouter",
|
24
|
+
"CartesianProductStreamSlicer",
|
25
|
+
"ListPartitionRouter",
|
26
|
+
"SinglePartitionRouter",
|
27
|
+
"SubstreamPartitionRouter",
|
28
|
+
"PartitionRouter",
|
29
|
+
]
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass, field
|
4
|
+
from typing import Any, Callable, Iterable, Mapping, Optional
|
5
|
+
|
6
|
+
from airbyte_cdk.models import FailureType
|
7
|
+
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
8
|
+
AsyncJobOrchestrator,
|
9
|
+
AsyncPartition,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
|
12
|
+
SinglePartitionRouter,
|
13
|
+
)
|
14
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
15
|
+
from airbyte_cdk.sources.types import Config, StreamSlice
|
16
|
+
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
17
|
+
|
18
|
+
|
19
|
+
@dataclass
|
20
|
+
class AsyncJobPartitionRouter(StreamSlicer):
|
21
|
+
"""
|
22
|
+
Partition router that creates async jobs in a source API, periodically polls for job
|
23
|
+
completion, and supplies the completed job URL locations as stream slices so that
|
24
|
+
records can be extracted.
|
25
|
+
"""
|
26
|
+
|
27
|
+
config: Config
|
28
|
+
parameters: InitVar[Mapping[str, Any]]
|
29
|
+
job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
|
30
|
+
stream_slicer: StreamSlicer = field(
|
31
|
+
default_factory=lambda: SinglePartitionRouter(parameters={})
|
32
|
+
)
|
33
|
+
|
34
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
35
|
+
self._job_orchestrator_factory = self.job_orchestrator_factory
|
36
|
+
self._job_orchestrator: Optional[AsyncJobOrchestrator] = None
|
37
|
+
self._parameters = parameters
|
38
|
+
|
39
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
40
|
+
slices = self.stream_slicer.stream_slices()
|
41
|
+
self._job_orchestrator = self._job_orchestrator_factory(slices)
|
42
|
+
|
43
|
+
for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
|
44
|
+
yield StreamSlice(
|
45
|
+
partition=dict(completed_partition.stream_slice.partition)
|
46
|
+
| {"partition": completed_partition},
|
47
|
+
cursor_slice=completed_partition.stream_slice.cursor_slice,
|
48
|
+
)
|
49
|
+
|
50
|
+
def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
|
51
|
+
"""
|
52
|
+
This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
|
53
|
+
be responsible for. However, this was added in because the JobOrchestrator is required to
|
54
|
+
retrieve records. And without defining fetch_records() on this class, we're stuck with either
|
55
|
+
passing the JobOrchestrator to the AsyncRetriever or storing it on multiple classes.
|
56
|
+
"""
|
57
|
+
|
58
|
+
if not self._job_orchestrator:
|
59
|
+
raise AirbyteTracedException(
|
60
|
+
message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
|
61
|
+
internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
|
62
|
+
failure_type=FailureType.system_error,
|
63
|
+
)
|
64
|
+
|
65
|
+
return self._job_orchestrator.fetch_records(partition=partition)
|
@@ -0,0 +1,176 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import itertools
|
6
|
+
import logging
|
7
|
+
from collections import ChainMap
|
8
|
+
from collections.abc import Callable
|
9
|
+
from dataclasses import InitVar, dataclass
|
10
|
+
from typing import Any, Iterable, List, Mapping, Optional
|
11
|
+
|
12
|
+
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
13
|
+
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
14
|
+
SubstreamPartitionRouter,
|
15
|
+
)
|
16
|
+
from airbyte_cdk.sources.types import StreamSlice, StreamState
|
17
|
+
|
18
|
+
|
19
|
+
def check_for_substream_in_slicers(
|
20
|
+
slicers: Iterable[PartitionRouter], log_warning: Callable[[str], None]
|
21
|
+
) -> None:
|
22
|
+
"""
|
23
|
+
Recursively checks for the presence of SubstreamPartitionRouter within slicers.
|
24
|
+
Logs a warning if a SubstreamPartitionRouter is found within a CartesianProductStreamSlicer.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
slicers (Iterable[PartitionRouter]): The list of slicers to check.
|
28
|
+
log_warning (Callable): Logging function to record warnings.
|
29
|
+
"""
|
30
|
+
for slicer in slicers:
|
31
|
+
if isinstance(slicer, SubstreamPartitionRouter):
|
32
|
+
log_warning("Parent state handling is not supported for CartesianProductStreamSlicer.")
|
33
|
+
return
|
34
|
+
elif isinstance(slicer, CartesianProductStreamSlicer):
|
35
|
+
# Recursively check sub-slicers within CartesianProductStreamSlicer
|
36
|
+
check_for_substream_in_slicers(slicer.stream_slicers, log_warning)
|
37
|
+
|
38
|
+
|
39
|
+
@dataclass
|
40
|
+
class CartesianProductStreamSlicer(PartitionRouter):
|
41
|
+
"""
|
42
|
+
Stream slicers that iterates over the cartesian product of input stream slicers
|
43
|
+
Given 2 stream slicers with the following slices:
|
44
|
+
A: [{"i": 0}, {"i": 1}, {"i": 2}]
|
45
|
+
B: [{"s": "hello"}, {"s": "world"}]
|
46
|
+
the resulting stream slices are
|
47
|
+
[
|
48
|
+
{"i": 0, "s": "hello"},
|
49
|
+
{"i": 0, "s": "world"},
|
50
|
+
{"i": 1, "s": "hello"},
|
51
|
+
{"i": 1, "s": "world"},
|
52
|
+
{"i": 2, "s": "hello"},
|
53
|
+
{"i": 2, "s": "world"},
|
54
|
+
]
|
55
|
+
|
56
|
+
Attributes:
|
57
|
+
stream_slicers (List[PartitionRouter]): Underlying stream slicers. The RequestOptions (e.g: Request headers, parameters, etc..) returned by this slicer are the combination of the RequestOptions of its input slicers. If there are conflicts e.g: two slicers define the same header or request param, the conflict is resolved by taking the value from the first slicer, where ordering is determined by the order in which slicers were input to this composite slicer.
|
58
|
+
"""
|
59
|
+
|
60
|
+
stream_slicers: List[PartitionRouter]
|
61
|
+
parameters: InitVar[Mapping[str, Any]]
|
62
|
+
|
63
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
64
|
+
check_for_substream_in_slicers(self.stream_slicers, self.logger.warning)
|
65
|
+
|
66
|
+
def get_request_params(
|
67
|
+
self,
|
68
|
+
*,
|
69
|
+
stream_state: Optional[StreamState] = None,
|
70
|
+
stream_slice: Optional[StreamSlice] = None,
|
71
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
72
|
+
) -> Mapping[str, Any]:
|
73
|
+
return dict(
|
74
|
+
ChainMap(
|
75
|
+
*[ # type: ignore # ChainMap expects a MutableMapping[Never, Never] for reasons
|
76
|
+
s.get_request_params(
|
77
|
+
stream_state=stream_state,
|
78
|
+
stream_slice=stream_slice,
|
79
|
+
next_page_token=next_page_token,
|
80
|
+
)
|
81
|
+
for s in self.stream_slicers
|
82
|
+
]
|
83
|
+
)
|
84
|
+
)
|
85
|
+
|
86
|
+
def get_request_headers(
|
87
|
+
self,
|
88
|
+
*,
|
89
|
+
stream_state: Optional[StreamState] = None,
|
90
|
+
stream_slice: Optional[StreamSlice] = None,
|
91
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
92
|
+
) -> Mapping[str, Any]:
|
93
|
+
return dict(
|
94
|
+
ChainMap(
|
95
|
+
*[ # type: ignore # ChainMap expects a MutableMapping[Never, Never] for reasons
|
96
|
+
s.get_request_headers(
|
97
|
+
stream_state=stream_state,
|
98
|
+
stream_slice=stream_slice,
|
99
|
+
next_page_token=next_page_token,
|
100
|
+
)
|
101
|
+
for s in self.stream_slicers
|
102
|
+
]
|
103
|
+
)
|
104
|
+
)
|
105
|
+
|
106
|
+
def get_request_body_data(
|
107
|
+
self,
|
108
|
+
*,
|
109
|
+
stream_state: Optional[StreamState] = None,
|
110
|
+
stream_slice: Optional[StreamSlice] = None,
|
111
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
112
|
+
) -> Mapping[str, Any]:
|
113
|
+
return dict(
|
114
|
+
ChainMap(
|
115
|
+
*[ # type: ignore # ChainMap expects a MutableMapping[Never, Never] for reasons
|
116
|
+
s.get_request_body_data(
|
117
|
+
stream_state=stream_state,
|
118
|
+
stream_slice=stream_slice,
|
119
|
+
next_page_token=next_page_token,
|
120
|
+
)
|
121
|
+
for s in self.stream_slicers
|
122
|
+
]
|
123
|
+
)
|
124
|
+
)
|
125
|
+
|
126
|
+
def get_request_body_json(
|
127
|
+
self,
|
128
|
+
*,
|
129
|
+
stream_state: Optional[StreamState] = None,
|
130
|
+
stream_slice: Optional[StreamSlice] = None,
|
131
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
132
|
+
) -> Mapping[str, Any]:
|
133
|
+
return dict(
|
134
|
+
ChainMap(
|
135
|
+
*[ # type: ignore # ChainMap expects a MutableMapping[Never, Never] for reasons
|
136
|
+
s.get_request_body_json(
|
137
|
+
stream_state=stream_state,
|
138
|
+
stream_slice=stream_slice,
|
139
|
+
next_page_token=next_page_token,
|
140
|
+
)
|
141
|
+
for s in self.stream_slicers
|
142
|
+
]
|
143
|
+
)
|
144
|
+
)
|
145
|
+
|
146
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
147
|
+
sub_slices = (s.stream_slices() for s in self.stream_slicers)
|
148
|
+
product = itertools.product(*sub_slices)
|
149
|
+
for stream_slice_tuple in product:
|
150
|
+
partition = dict(ChainMap(*[s.partition for s in stream_slice_tuple])) # type: ignore # ChainMap expects a MutableMapping[Never, Never] for reasons
|
151
|
+
cursor_slices = [s.cursor_slice for s in stream_slice_tuple if s.cursor_slice]
|
152
|
+
if len(cursor_slices) > 1:
|
153
|
+
raise ValueError(
|
154
|
+
f"There should only be a single cursor slice. Found {cursor_slices}"
|
155
|
+
)
|
156
|
+
if cursor_slices:
|
157
|
+
cursor_slice = cursor_slices[0]
|
158
|
+
else:
|
159
|
+
cursor_slice = {}
|
160
|
+
yield StreamSlice(partition=partition, cursor_slice=cursor_slice)
|
161
|
+
|
162
|
+
def set_initial_state(self, stream_state: StreamState) -> None:
|
163
|
+
"""
|
164
|
+
Parent stream states are not supported for cartesian product stream slicer
|
165
|
+
"""
|
166
|
+
pass
|
167
|
+
|
168
|
+
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
169
|
+
"""
|
170
|
+
Parent stream states are not supported for cartesian product stream slicer
|
171
|
+
"""
|
172
|
+
pass
|
173
|
+
|
174
|
+
@property
|
175
|
+
def logger(self) -> logging.Logger:
|
176
|
+
return logging.getLogger("airbyte.CartesianProductStreamSlicer")
|
@@ -6,13 +6,16 @@ from dataclasses import InitVar, dataclass
|
|
6
6
|
from typing import Any, Iterable, List, Mapping, Optional, Union
|
7
7
|
|
8
8
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
9
|
-
from airbyte_cdk.sources.declarative.
|
10
|
-
from airbyte_cdk.sources.declarative.
|
11
|
-
|
9
|
+
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
10
|
+
from airbyte_cdk.sources.declarative.requesters.request_option import (
|
11
|
+
RequestOption,
|
12
|
+
RequestOptionType,
|
13
|
+
)
|
14
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
12
15
|
|
13
16
|
|
14
17
|
@dataclass
|
15
|
-
class ListPartitionRouter(
|
18
|
+
class ListPartitionRouter(PartitionRouter):
|
16
19
|
"""
|
17
20
|
Partition router that iterates over the values of a list
|
18
21
|
If values is a string, then evaluate it as literal and assert the resulting literal is a list
|
@@ -32,9 +35,13 @@ class ListPartitionRouter(StreamSlicer):
|
|
32
35
|
|
33
36
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
34
37
|
if isinstance(self.values, str):
|
35
|
-
self.values = InterpolatedString.create(self.values, parameters=parameters).eval(
|
38
|
+
self.values = InterpolatedString.create(self.values, parameters=parameters).eval(
|
39
|
+
self.config
|
40
|
+
)
|
36
41
|
self._cursor_field = (
|
37
|
-
InterpolatedString(string=self.cursor_field, parameters=parameters)
|
42
|
+
InterpolatedString(string=self.cursor_field, parameters=parameters)
|
43
|
+
if isinstance(self.cursor_field, str)
|
44
|
+
else self.cursor_field
|
38
45
|
)
|
39
46
|
|
40
47
|
self._cursor = None
|
@@ -76,10 +83,21 @@ class ListPartitionRouter(StreamSlicer):
|
|
76
83
|
return self._get_request_option(RequestOptionType.body_json, stream_slice)
|
77
84
|
|
78
85
|
def stream_slices(self) -> Iterable[StreamSlice]:
|
79
|
-
return [
|
86
|
+
return [
|
87
|
+
StreamSlice(
|
88
|
+
partition={self._cursor_field.eval(self.config): slice_value}, cursor_slice={}
|
89
|
+
)
|
90
|
+
for slice_value in self.values
|
91
|
+
]
|
80
92
|
|
81
|
-
def _get_request_option(
|
82
|
-
|
93
|
+
def _get_request_option(
|
94
|
+
self, request_option_type: RequestOptionType, stream_slice: Optional[StreamSlice]
|
95
|
+
) -> Mapping[str, Any]:
|
96
|
+
if (
|
97
|
+
self.request_option
|
98
|
+
and self.request_option.inject_into == request_option_type
|
99
|
+
and stream_slice
|
100
|
+
):
|
83
101
|
slice_value = stream_slice.get(self._cursor_field.eval(self.config))
|
84
102
|
if slice_value:
|
85
103
|
return {self.request_option.field_name.eval(self.config): slice_value} # type: ignore # field_name is always casted to InterpolatedString
|
@@ -87,3 +105,15 @@ class ListPartitionRouter(StreamSlicer):
|
|
87
105
|
return {}
|
88
106
|
else:
|
89
107
|
return {}
|
108
|
+
|
109
|
+
def set_initial_state(self, stream_state: StreamState) -> None:
|
110
|
+
"""
|
111
|
+
ListPartitionRouter doesn't have parent streams
|
112
|
+
"""
|
113
|
+
pass
|
114
|
+
|
115
|
+
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
116
|
+
"""
|
117
|
+
ListPartitionRouter doesn't have parent streams
|
118
|
+
"""
|
119
|
+
pass
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from abc import abstractmethod
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from typing import Mapping, Optional
|
8
|
+
|
9
|
+
from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer
|
10
|
+
from airbyte_cdk.sources.types import StreamState
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass
|
14
|
+
class PartitionRouter(StreamSlicer):
|
15
|
+
"""
|
16
|
+
Base class for partition routers.
|
17
|
+
Methods:
|
18
|
+
set_parent_state(stream_state): Set the state of the parent streams.
|
19
|
+
get_parent_state(): Get the state of the parent streams.
|
20
|
+
"""
|
21
|
+
|
22
|
+
@abstractmethod
|
23
|
+
def set_initial_state(self, stream_state: StreamState) -> None:
|
24
|
+
"""
|
25
|
+
Set the state of the parent streams.
|
26
|
+
|
27
|
+
This method should only be implemented if the slicer is based on some parent stream and needs to read this stream
|
28
|
+
incrementally using the state.
|
29
|
+
|
30
|
+
Args:
|
31
|
+
stream_state (StreamState): The state of the streams to be set. The expected format is a dictionary that includes
|
32
|
+
'parent_state' which is a dictionary of parent state names to their corresponding state.
|
33
|
+
Example:
|
34
|
+
{
|
35
|
+
"parent_state": {
|
36
|
+
"parent_stream_name_1": { ... },
|
37
|
+
"parent_stream_name_2": { ... },
|
38
|
+
...
|
39
|
+
}
|
40
|
+
}
|
41
|
+
"""
|
42
|
+
|
43
|
+
@abstractmethod
|
44
|
+
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
45
|
+
"""
|
46
|
+
Get the state of the parent streams.
|
47
|
+
|
48
|
+
This method should only be implemented if the slicer is based on some parent stream and needs to read this stream
|
49
|
+
incrementally using the state.
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
Optional[Mapping[str, StreamState]]: The current state of the parent streams in a dictionary format.
|
53
|
+
The returned format will be:
|
54
|
+
{
|
55
|
+
"parent_stream_name1": {
|
56
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
57
|
+
},
|
58
|
+
"parent_stream_name2": {
|
59
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
60
|
+
}
|
61
|
+
}
|
62
|
+
"""
|
@@ -5,12 +5,12 @@
|
|
5
5
|
from dataclasses import InitVar, dataclass
|
6
6
|
from typing import Any, Iterable, Mapping, Optional
|
7
7
|
|
8
|
-
from airbyte_cdk.sources.declarative.
|
9
|
-
from airbyte_cdk.sources.
|
8
|
+
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
9
|
+
from airbyte_cdk.sources.types import StreamSlice, StreamState
|
10
10
|
|
11
11
|
|
12
12
|
@dataclass
|
13
|
-
class SinglePartitionRouter(
|
13
|
+
class SinglePartitionRouter(PartitionRouter):
|
14
14
|
"""Partition router returning only a stream slice"""
|
15
15
|
|
16
16
|
parameters: InitVar[Mapping[str, Any]]
|
@@ -49,3 +49,15 @@ class SinglePartitionRouter(StreamSlicer):
|
|
49
49
|
|
50
50
|
def stream_slices(self) -> Iterable[StreamSlice]:
|
51
51
|
yield StreamSlice(partition={}, cursor_slice={})
|
52
|
+
|
53
|
+
def set_initial_state(self, stream_state: StreamState) -> None:
|
54
|
+
"""
|
55
|
+
SinglePartitionRouter doesn't have parent streams
|
56
|
+
"""
|
57
|
+
pass
|
58
|
+
|
59
|
+
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
60
|
+
"""
|
61
|
+
SinglePartitionRouter doesn't have parent streams
|
62
|
+
"""
|
63
|
+
pass
|