airbyte-cdk 0.72.1__py3-none-any.whl → 6.17.1.dev1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/__init__.py +355 -6
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +29 -10
- airbyte_cdk/connector.py +24 -24
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
- airbyte_cdk/connector_builder/main.py +45 -13
- airbyte_cdk/connector_builder/message_grouper.py +189 -50
- airbyte_cdk/connector_builder/models.py +3 -2
- airbyte_cdk/destinations/__init__.py +4 -3
- airbyte_cdk/destinations/destination.py +54 -20
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/config.py +40 -17
- airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
- airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
- airbyte_cdk/entrypoint.py +153 -44
- airbyte_cdk/exception_handler.py +21 -3
- airbyte_cdk/logger.py +30 -44
- airbyte_cdk/models/__init__.py +13 -2
- airbyte_cdk/models/airbyte_protocol.py +86 -1
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/__init__.py +5 -1
- airbyte_cdk/sources/abstract_source.py +125 -79
- airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
- airbyte_cdk/sources/config.py +3 -2
- airbyte_cdk/sources/connector_state_manager.py +49 -83
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
- airbyte_cdk/sources/declarative/auth/token.py +28 -10
- airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
- airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
- airbyte_cdk/sources/declarative/declarative_source.py +5 -2
- airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
- airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
- airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
- airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +340 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +174 -74
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
- airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
- airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1759 -225
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
- airbyte_cdk/sources/declarative/spec/spec.py +12 -5
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
- airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
- airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
- airbyte_cdk/sources/declarative/types.py +19 -110
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
- airbyte_cdk/sources/embedded/base_integration.py +16 -5
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +5 -2
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
- airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
- airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
- airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
- airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
- airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +18 -15
- airbyte_cdk/sources/file_based/file_based_source.py +140 -33
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
- airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
- airbyte_cdk/sources/file_based/remote_file.py +1 -1
- airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
- airbyte_cdk/sources/http_logger.py +8 -3
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +42 -38
- airbyte_cdk/sources/streams/__init__.py +2 -2
- airbyte_cdk/sources/streams/availability_strategy.py +54 -3
- airbyte_cdk/sources/streams/call_rate.py +64 -21
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +313 -48
- airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
- airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
- airbyte_cdk/sources/streams/core.py +412 -87
- airbyte_cdk/sources/streams/http/__init__.py +2 -1
- airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +27 -7
- airbyte_cdk/sources/streams/http/http.py +369 -246
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +154 -0
- airbyte_cdk/sources/utils/record_helper.py +36 -21
- airbyte_cdk/sources/utils/schema_helpers.py +13 -6
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +54 -20
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/catalog_builder.py +70 -18
- airbyte_cdk/test/entrypoint_wrapper.py +117 -42
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/matcher.py +6 -0
- airbyte_cdk/test/mock_http/mocker.py +57 -10
- airbyte_cdk/test/mock_http/request.py +19 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +32 -16
- airbyte_cdk/test/state_builder.py +18 -10
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +2 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +20 -11
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +198 -28
- airbyte_cdk/utils/slice_hasher.py +30 -0
- airbyte_cdk/utils/spec_schema_transformations.py +6 -3
- airbyte_cdk/utils/stream_status_utils.py +8 -1
- airbyte_cdk/utils/traced_exception.py +61 -21
- airbyte_cdk-6.17.1.dev1.dist-info/METADATA +109 -0
- airbyte_cdk-6.17.1.dev1.dist-info/RECORD +350 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev1.dist-info}/WHEEL +1 -2
- airbyte_cdk-6.17.1.dev1.dist-info/entry_points.txt +3 -0
- airbyte_cdk/sources/declarative/create_partial.py +0 -92
- airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
- airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
- airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
- airbyte_cdk/sources/deprecated/base_source.py +0 -94
- airbyte_cdk/sources/deprecated/client.py +0 -99
- airbyte_cdk/sources/singer/__init__.py +0 -8
- airbyte_cdk/sources/singer/singer_helpers.py +0 -304
- airbyte_cdk/sources/singer/source.py +0 -186
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
- airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
- airbyte_cdk/sources/streams/http/auth/core.py +0 -29
- airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
- airbyte_cdk/sources/streams/http/auth/token.py +0 -47
- airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
- airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
- airbyte_cdk/sources/utils/schema_models.py +0 -84
- airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
- airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
- airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
- source_declarative_manifest/main.py +0 -29
- unit_tests/connector_builder/__init__.py +0 -3
- unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
- unit_tests/connector_builder/test_message_grouper.py +0 -713
- unit_tests/connector_builder/utils.py +0 -27
- unit_tests/destinations/test_destination.py +0 -243
- unit_tests/singer/test_singer_helpers.py +0 -56
- unit_tests/singer/test_singer_source.py +0 -112
- unit_tests/sources/__init__.py +0 -0
- unit_tests/sources/concurrent_source/__init__.py +0 -3
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
- unit_tests/sources/declarative/__init__.py +0 -3
- unit_tests/sources/declarative/auth/__init__.py +0 -3
- unit_tests/sources/declarative/auth/test_oauth.py +0 -331
- unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
- unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
- unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
- unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
- unit_tests/sources/declarative/checks/__init__.py +0 -3
- unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
- unit_tests/sources/declarative/decoders/__init__.py +0 -0
- unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
- unit_tests/sources/declarative/external_component.py +0 -13
- unit_tests/sources/declarative/extractors/__init__.py +0 -3
- unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
- unit_tests/sources/declarative/incremental/__init__.py +0 -0
- unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
- unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
- unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
- unit_tests/sources/declarative/interpolation/__init__.py +0 -3
- unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
- unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
- unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
- unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
- unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
- unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
- unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
- unit_tests/sources/declarative/parsers/__init__.py +0 -3
- unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
- unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
- unit_tests/sources/declarative/parsers/testing_components.py +0 -36
- unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
- unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
- unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
- unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
- unit_tests/sources/declarative/requesters/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
- unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
- unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
- unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
- unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
- unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
- unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
- unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
- unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
- unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
- unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
- unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
- unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
- unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
- unit_tests/sources/declarative/retrievers/__init__.py +0 -3
- unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
- unit_tests/sources/declarative/schema/__init__.py +0 -6
- unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
- unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
- unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
- unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
- unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
- unit_tests/sources/declarative/states/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
- unit_tests/sources/declarative/test_create_partial.py +0 -83
- unit_tests/sources/declarative/test_declarative_stream.py +0 -103
- unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
- unit_tests/sources/declarative/test_types.py +0 -39
- unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
- unit_tests/sources/file_based/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
- unit_tests/sources/file_based/config/__init__.py +0 -0
- unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
- unit_tests/sources/file_based/config/test_csv_format.py +0 -34
- unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
- unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
- unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
- unit_tests/sources/file_based/file_types/__init__.py +0 -0
- unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
- unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
- unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
- unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
- unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
- unit_tests/sources/file_based/helpers.py +0 -70
- unit_tests/sources/file_based/in_memory_files_source.py +0 -211
- unit_tests/sources/file_based/scenarios/__init__.py +0 -0
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
- unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
- unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
- unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
- unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
- unit_tests/sources/file_based/stream/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
- unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
- unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
- unit_tests/sources/file_based/test_scenarios.py +0 -253
- unit_tests/sources/file_based/test_schema_helpers.py +0 -346
- unit_tests/sources/fixtures/__init__.py +0 -3
- unit_tests/sources/fixtures/source_test_fixture.py +0 -153
- unit_tests/sources/message/__init__.py +0 -0
- unit_tests/sources/message/test_repository.py +0 -153
- unit_tests/sources/streams/__init__.py +0 -0
- unit_tests/sources/streams/concurrent/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
- unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
- unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
- unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
- unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
- unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
- unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
- unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
- unit_tests/sources/streams/http/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/test_auth.py +0 -173
- unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
- unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
- unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
- unit_tests/sources/streams/http/test_http.py +0 -635
- unit_tests/sources/streams/test_availability_strategy.py +0 -70
- unit_tests/sources/streams/test_call_rate.py +0 -300
- unit_tests/sources/streams/test_stream_read.py +0 -405
- unit_tests/sources/streams/test_streams_core.py +0 -184
- unit_tests/sources/test_abstract_source.py +0 -1442
- unit_tests/sources/test_concurrent_source.py +0 -112
- unit_tests/sources/test_config.py +0 -92
- unit_tests/sources/test_connector_state_manager.py +0 -482
- unit_tests/sources/test_http_logger.py +0 -252
- unit_tests/sources/test_integration_source.py +0 -86
- unit_tests/sources/test_source.py +0 -684
- unit_tests/sources/test_source_read.py +0 -460
- unit_tests/test/__init__.py +0 -0
- unit_tests/test/mock_http/__init__.py +0 -0
- unit_tests/test/mock_http/test_matcher.py +0 -53
- unit_tests/test/mock_http/test_mocker.py +0 -214
- unit_tests/test/mock_http/test_request.py +0 -117
- unit_tests/test/mock_http/test_response_builder.py +0 -177
- unit_tests/test/test_entrypoint_wrapper.py +0 -240
- unit_tests/utils/__init__.py +0 -0
- unit_tests/utils/test_datetime_format_inferrer.py +0 -60
- unit_tests/utils/test_mapping_helpers.py +0 -54
- unit_tests/utils/test_message_utils.py +0 -91
- unit_tests/utils/test_rate_limiting.py +0 -26
- unit_tests/utils/test_schema_inferrer.py +0 -202
- unit_tests/utils/test_secret_utils.py +0 -135
- unit_tests/utils/test_stream_status_utils.py +0 -61
- unit_tests/utils/test_traced_exception.py +0 -107
- /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
- {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
- {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
- {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev1.dist-info}/LICENSE.txt +0 -0
@@ -1,39 +0,0 @@
|
|
1
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
2
|
-
|
3
|
-
import pytest
|
4
|
-
from airbyte_cdk.sources.declarative.types import StreamSlice
|
5
|
-
|
6
|
-
|
7
|
-
@pytest.mark.parametrize(
|
8
|
-
"stream_slice, expected_partition",
|
9
|
-
[
|
10
|
-
pytest.param(StreamSlice(partition={},cursor_slice={}), {}, id="test_partition_with_empty_partition"),
|
11
|
-
pytest.param(StreamSlice(partition=StreamSlice(partition={}, cursor_slice={}), cursor_slice={}), {}, id="test_partition_nested_empty"),
|
12
|
-
pytest.param(StreamSlice(partition={"key": "value"}, cursor_slice={}), {"key": "value"}, id="test_partition_with_mapping_partition"),
|
13
|
-
pytest.param(StreamSlice(partition={},cursor_slice={"cursor": "value"}), {}, id="test_partition_with_only_cursor"),
|
14
|
-
pytest.param(StreamSlice(partition=StreamSlice(partition={}, cursor_slice={}), cursor_slice={"cursor": "value"}), {}, id="test_partition_nested_empty_and_cursor_value_mapping"),
|
15
|
-
pytest.param(StreamSlice(partition=StreamSlice(partition={}, cursor_slice={"cursor": "value"}), cursor_slice={}), {}, id="test_partition_nested_empty_and_cursor_value"),
|
16
|
-
]
|
17
|
-
)
|
18
|
-
def test_partition(stream_slice, expected_partition):
|
19
|
-
partition = stream_slice.partition
|
20
|
-
|
21
|
-
assert partition == expected_partition
|
22
|
-
|
23
|
-
|
24
|
-
@pytest.mark.parametrize(
|
25
|
-
"stream_slice, expected_cursor_slice",
|
26
|
-
[
|
27
|
-
pytest.param(StreamSlice(partition={},cursor_slice={}), {}, id="test_cursor_slice_with_empty_cursor"),
|
28
|
-
pytest.param(StreamSlice(partition={}, cursor_slice=StreamSlice(partition={}, cursor_slice={})), {}, id="test_cursor_slice_nested_empty"),
|
29
|
-
|
30
|
-
pytest.param(StreamSlice(partition={}, cursor_slice={"key": "value"}), {"key": "value"}, id="test_cursor_slice_with_mapping_cursor_slice"),
|
31
|
-
pytest.param(StreamSlice(partition={"partition": "value"}, cursor_slice={}), {}, id="test_cursor_slice_with_only_partition"),
|
32
|
-
pytest.param(StreamSlice(partition={"partition": "value"}, cursor_slice=StreamSlice(partition={}, cursor_slice={})), {}, id="test_cursor_slice_nested_empty_and_partition_mapping"),
|
33
|
-
pytest.param(StreamSlice(partition=StreamSlice(partition={"partition": "value"}, cursor_slice={}), cursor_slice={}), {}, id="test_cursor_slice_nested_empty_and_partition"),
|
34
|
-
]
|
35
|
-
)
|
36
|
-
def test_cursor_slice(stream_slice, expected_cursor_slice):
|
37
|
-
cursor_slice = stream_slice.cursor_slice
|
38
|
-
|
39
|
-
assert cursor_slice == expected_cursor_slice
|
@@ -1,148 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
import logging
|
6
|
-
import os
|
7
|
-
import tempfile
|
8
|
-
|
9
|
-
import pytest
|
10
|
-
from airbyte_cdk.sources.declarative.parsers.custom_exceptions import UndefinedReferenceException
|
11
|
-
from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
|
12
|
-
from yaml.parser import ParserError
|
13
|
-
|
14
|
-
logger = logging.getLogger("airbyte")
|
15
|
-
|
16
|
-
|
17
|
-
EXTERNAL_CONNECTION_SPECIFICATION = {
|
18
|
-
"type": "object",
|
19
|
-
"required": ["api_token"],
|
20
|
-
"additionalProperties": False,
|
21
|
-
"properties": {"api_token": {"type": "string"}},
|
22
|
-
}
|
23
|
-
|
24
|
-
|
25
|
-
class MockYamlDeclarativeSource(YamlDeclarativeSource):
|
26
|
-
"""
|
27
|
-
Mock test class that is needed to monkey patch how we read from various files that make up a declarative source because of how our
|
28
|
-
tests write configuration files during testing. It is also used to properly namespace where files get written in specific
|
29
|
-
cases like when we temporarily write files like spec.yaml to the package unit_tests, which is the directory where it will
|
30
|
-
be read in during the tests.
|
31
|
-
"""
|
32
|
-
|
33
|
-
def _read_and_parse_yaml_file(self, path_to_yaml_file):
|
34
|
-
"""
|
35
|
-
We override the default behavior because we use tempfile to write the yaml manifest to a temporary directory which is
|
36
|
-
not mounted during runtime which prevents pkgutil.get_data() from being able to find the yaml file needed to generate
|
37
|
-
# the declarative source. For tests we use open() which supports using an absolute path.
|
38
|
-
"""
|
39
|
-
with open(path_to_yaml_file, "r") as f:
|
40
|
-
config_content = f.read()
|
41
|
-
parsed_config = YamlDeclarativeSource._parse(config_content)
|
42
|
-
return parsed_config
|
43
|
-
|
44
|
-
|
45
|
-
class TestYamlDeclarativeSource:
|
46
|
-
def test_source_is_created_if_toplevel_fields_are_known(self):
|
47
|
-
content = """
|
48
|
-
version: "0.29.3"
|
49
|
-
definitions:
|
50
|
-
schema_loader:
|
51
|
-
name: "{{ parameters.stream_name }}"
|
52
|
-
file_path: "./source_sendgrid/schemas/{{ parameters.name }}.yaml"
|
53
|
-
retriever:
|
54
|
-
paginator:
|
55
|
-
type: "DefaultPaginator"
|
56
|
-
page_size: 10
|
57
|
-
page_size_option:
|
58
|
-
inject_into: request_parameter
|
59
|
-
field_name: page_size
|
60
|
-
page_token_option:
|
61
|
-
type: RequestPath
|
62
|
-
pagination_strategy:
|
63
|
-
type: "CursorPagination"
|
64
|
-
cursor_value: "{{ response._metadata.next }}"
|
65
|
-
requester:
|
66
|
-
url_base: "https://api.sendgrid.com"
|
67
|
-
path: "/v3/marketing/lists"
|
68
|
-
authenticator:
|
69
|
-
type: "BearerAuthenticator"
|
70
|
-
api_token: "{{ config.apikey }}"
|
71
|
-
request_parameters:
|
72
|
-
page_size: "{{ 10 }}"
|
73
|
-
record_selector:
|
74
|
-
extractor:
|
75
|
-
field_path: ["result"]
|
76
|
-
streams:
|
77
|
-
- type: DeclarativeStream
|
78
|
-
$parameters:
|
79
|
-
name: "lists"
|
80
|
-
primary_key: id
|
81
|
-
schema_loader: "#/definitions/schema_loader"
|
82
|
-
retriever: "#/definitions/retriever"
|
83
|
-
check:
|
84
|
-
type: CheckStream
|
85
|
-
stream_names: ["lists"]
|
86
|
-
"""
|
87
|
-
temporary_file = TestFileContent(content)
|
88
|
-
MockYamlDeclarativeSource(temporary_file.filename)
|
89
|
-
|
90
|
-
def test_source_fails_for_invalid_yaml(self):
|
91
|
-
content = """
|
92
|
-
version: "version"
|
93
|
-
definitions:
|
94
|
-
this is not parsable yaml: " at all
|
95
|
-
streams:
|
96
|
-
- type: DeclarativeStream
|
97
|
-
$parameters:
|
98
|
-
name: "lists"
|
99
|
-
primary_key: id
|
100
|
-
url_base: "https://api.sendgrid.com"
|
101
|
-
check:
|
102
|
-
type: CheckStream
|
103
|
-
stream_names: ["lists"]
|
104
|
-
"""
|
105
|
-
temporary_file = TestFileContent(content)
|
106
|
-
with pytest.raises(ParserError):
|
107
|
-
MockYamlDeclarativeSource(temporary_file.filename)
|
108
|
-
|
109
|
-
def test_source_with_missing_reference_fails(self):
|
110
|
-
content = """
|
111
|
-
version: "version"
|
112
|
-
definitions:
|
113
|
-
schema_loader:
|
114
|
-
name: "{{ parameters.stream_name }}"
|
115
|
-
file_path: "./source_sendgrid/schemas/{{ parameters.name }}.yaml"
|
116
|
-
streams:
|
117
|
-
- type: DeclarativeStream
|
118
|
-
$parameters:
|
119
|
-
name: "lists"
|
120
|
-
primary_key: id
|
121
|
-
url_base: "https://api.sendgrid.com"
|
122
|
-
schema_loader: "#/definitions/schema_loader"
|
123
|
-
retriever: "#/definitions/retriever"
|
124
|
-
check:
|
125
|
-
type: CheckStream
|
126
|
-
stream_names: ["lists"]
|
127
|
-
"""
|
128
|
-
temporary_file = TestFileContent(content)
|
129
|
-
with pytest.raises(UndefinedReferenceException):
|
130
|
-
MockYamlDeclarativeSource(temporary_file.filename)
|
131
|
-
|
132
|
-
|
133
|
-
class TestFileContent:
|
134
|
-
def __init__(self, content):
|
135
|
-
self.file = tempfile.NamedTemporaryFile(mode="w", delete=False)
|
136
|
-
|
137
|
-
with self.file as f:
|
138
|
-
f.write(content)
|
139
|
-
|
140
|
-
@property
|
141
|
-
def filename(self):
|
142
|
-
return self.file.name
|
143
|
-
|
144
|
-
def __enter__(self):
|
145
|
-
return self
|
146
|
-
|
147
|
-
def __exit__(self, type, value, traceback):
|
148
|
-
os.unlink(self.filename)
|
File without changes
|
File without changes
|
unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py
DELETED
@@ -1,100 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
import unittest
|
6
|
-
from datetime import datetime
|
7
|
-
from unittest.mock import Mock, PropertyMock
|
8
|
-
|
9
|
-
from airbyte_cdk.sources.file_based.availability_strategy.default_file_based_availability_strategy import (
|
10
|
-
DefaultFileBasedAvailabilityStrategy,
|
11
|
-
)
|
12
|
-
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
13
|
-
from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
|
14
|
-
from airbyte_cdk.sources.file_based.exceptions import CustomFileBasedException
|
15
|
-
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
|
16
|
-
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
17
|
-
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
18
|
-
from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream
|
19
|
-
|
20
|
-
_FILE_WITH_UNKNOWN_EXTENSION = RemoteFile(uri="a.unknown_extension", last_modified=datetime.now(), file_type="csv")
|
21
|
-
_ANY_CONFIG = FileBasedStreamConfig(
|
22
|
-
name="config.name",
|
23
|
-
file_type="parquet",
|
24
|
-
format=JsonlFormat(),
|
25
|
-
)
|
26
|
-
_ANY_SCHEMA = {"key": "value"}
|
27
|
-
|
28
|
-
|
29
|
-
class DefaultFileBasedAvailabilityStrategyTest(unittest.TestCase):
|
30
|
-
def setUp(self) -> None:
|
31
|
-
self._stream_reader = Mock(spec=AbstractFileBasedStreamReader)
|
32
|
-
self._strategy = DefaultFileBasedAvailabilityStrategy(self._stream_reader)
|
33
|
-
|
34
|
-
self._parser = Mock(spec=FileTypeParser)
|
35
|
-
self._parser.check_config.return_value = (True, None)
|
36
|
-
self._stream = Mock(spec=AbstractFileBasedStream)
|
37
|
-
self._stream.get_parser.return_value = self._parser
|
38
|
-
self._stream.catalog_schema = _ANY_SCHEMA
|
39
|
-
self._stream.config = _ANY_CONFIG
|
40
|
-
self._stream.validation_policy = PropertyMock(validate_schema_before_sync=False)
|
41
|
-
self._stream.stream_reader = self._stream_reader
|
42
|
-
|
43
|
-
def test_given_file_extension_does_not_match_when_check_availability_and_parsability_then_stream_is_still_available(self) -> None:
|
44
|
-
"""
|
45
|
-
Before, we had a validation on the file extension but it turns out that in production, users sometimes have mismatch there. The
|
46
|
-
example we've seen was for JSONL parser but the file extension was just `.json`. Note that there we more than one record extracted
|
47
|
-
from this stream so it's not just that the file is one JSON object
|
48
|
-
"""
|
49
|
-
self._stream.get_files.return_value = [_FILE_WITH_UNKNOWN_EXTENSION]
|
50
|
-
self._parser.parse_records.return_value = [{"a record": 1}]
|
51
|
-
|
52
|
-
is_available, reason = self._strategy.check_availability_and_parsability(self._stream, Mock(), Mock())
|
53
|
-
|
54
|
-
assert is_available
|
55
|
-
|
56
|
-
def test_not_available_given_no_files(self) -> None:
|
57
|
-
"""
|
58
|
-
If no files are returned, then the stream is not available.
|
59
|
-
"""
|
60
|
-
self._stream.get_files.return_value = []
|
61
|
-
|
62
|
-
is_available, reason = self._strategy.check_availability_and_parsability(self._stream, Mock(), Mock())
|
63
|
-
|
64
|
-
assert not is_available
|
65
|
-
assert "No files were identified in the stream" in reason
|
66
|
-
|
67
|
-
def test_parse_records_is_not_called_with_parser_max_n_files_for_parsability_set(self) -> None:
|
68
|
-
"""
|
69
|
-
If the stream parser sets parser_max_n_files_for_parsability to 0, then we should not call parse_records on it
|
70
|
-
"""
|
71
|
-
self._parser.parser_max_n_files_for_parsability = 0
|
72
|
-
self._stream.get_files.return_value = [_FILE_WITH_UNKNOWN_EXTENSION]
|
73
|
-
|
74
|
-
is_available, reason = self._strategy.check_availability_and_parsability(self._stream, Mock(), Mock())
|
75
|
-
|
76
|
-
assert is_available
|
77
|
-
assert not self._parser.parse_records.called
|
78
|
-
assert self._stream_reader.open_file.called
|
79
|
-
|
80
|
-
def test_passing_config_check(self) -> None:
|
81
|
-
"""
|
82
|
-
Test if the DefaultFileBasedAvailabilityStrategy correctly handles the check_config method defined on the parser.
|
83
|
-
"""
|
84
|
-
self._parser.check_config.return_value = (False, "Ran into error")
|
85
|
-
is_available, error_message = self._strategy.check_availability_and_parsability(self._stream, Mock(), Mock())
|
86
|
-
assert not is_available
|
87
|
-
assert "Ran into error" in error_message
|
88
|
-
|
89
|
-
def test_catching_and_raising_custom_file_based_exception(self) -> None:
|
90
|
-
"""
|
91
|
-
Test if the DefaultFileBasedAvailabilityStrategy correctly handles the CustomFileBasedException
|
92
|
-
by raising a CheckAvailabilityError when the get_files method is called.
|
93
|
-
"""
|
94
|
-
# Mock the get_files method to raise CustomFileBasedException when called
|
95
|
-
self._stream.get_files.side_effect = CustomFileBasedException("Custom exception for testing.")
|
96
|
-
|
97
|
-
# Invoke the check_availability_and_parsability method and check if it correctly handles the exception
|
98
|
-
is_available, error_message = self._strategy.check_availability_and_parsability(self._stream, Mock(), Mock())
|
99
|
-
assert not is_available
|
100
|
-
assert "Custom exception for testing." in error_message
|
File without changes
|
@@ -1,28 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
from typing import Type
|
6
|
-
|
7
|
-
import pytest
|
8
|
-
from airbyte_cdk.sources.file_based.config.file_based_stream_config import AvroFormat, CsvFormat, ParquetFormat
|
9
|
-
from jsonschema import ValidationError, validate
|
10
|
-
from pydantic import BaseModel
|
11
|
-
|
12
|
-
|
13
|
-
@pytest.mark.parametrize(
|
14
|
-
"file_format, file_type, expected_error",
|
15
|
-
[
|
16
|
-
pytest.param(ParquetFormat, "parquet", None, id="test_parquet_format_is_a_valid_parquet_file_type"),
|
17
|
-
pytest.param(AvroFormat, "avro", None, id="test_avro_format_is_a_valid_avro_file_type"),
|
18
|
-
pytest.param(CsvFormat, "parquet", ValidationError, id="test_csv_format_is_not_a_valid_parquet_file_type"),
|
19
|
-
],
|
20
|
-
)
|
21
|
-
def test_parquet_file_type_is_not_a_valid_csv_file_type(file_format: BaseModel, file_type: str, expected_error: Type[Exception]) -> None:
|
22
|
-
format_config = {file_type: {"filetype": file_type, "decimal_as_float": True}}
|
23
|
-
|
24
|
-
if expected_error:
|
25
|
-
with pytest.raises(expected_error):
|
26
|
-
validate(instance=format_config[file_type], schema=file_format.schema())
|
27
|
-
else:
|
28
|
-
validate(instance=format_config[file_type], schema=file_format.schema())
|
@@ -1,34 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
import unittest
|
6
|
-
|
7
|
-
import pytest
|
8
|
-
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat, CsvHeaderAutogenerated, CsvHeaderFromCsv, CsvHeaderUserProvided
|
9
|
-
from pydantic import ValidationError
|
10
|
-
|
11
|
-
|
12
|
-
class CsvHeaderDefinitionTest(unittest.TestCase):
|
13
|
-
def test_given_user_provided_and_not_column_names_provided_then_raise_exception(self) -> None:
|
14
|
-
with pytest.raises(ValidationError):
|
15
|
-
CsvHeaderUserProvided(column_names=[])
|
16
|
-
|
17
|
-
def test_given_user_provided_and_column_names_then_config_is_valid(self) -> None:
|
18
|
-
# no error means that this test succeeds
|
19
|
-
CsvHeaderUserProvided(column_names=["1", "2", "3"])
|
20
|
-
|
21
|
-
def test_given_user_provided_then_csv_does_not_have_header_row(self) -> None:
|
22
|
-
assert not CsvHeaderUserProvided(column_names=["1", "2", "3"]).has_header_row()
|
23
|
-
|
24
|
-
def test_given_autogenerated_then_csv_does_not_have_header_row(self) -> None:
|
25
|
-
assert not CsvHeaderAutogenerated().has_header_row()
|
26
|
-
|
27
|
-
def test_given_from_csv_then_csv_has_header_row(self) -> None:
|
28
|
-
assert CsvHeaderFromCsv().has_header_row()
|
29
|
-
|
30
|
-
|
31
|
-
class CsvDelimiterTest(unittest.TestCase):
|
32
|
-
def test_tab_delimter(self):
|
33
|
-
assert CsvFormat(delimiter=r"\t").delimiter == '\t'
|
34
|
-
assert len(CsvFormat(delimiter=r"\t").delimiter) == 1
|
@@ -1,84 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
from typing import Any, Mapping, Type
|
6
|
-
|
7
|
-
import pytest as pytest
|
8
|
-
from airbyte_cdk.sources.file_based.config.file_based_stream_config import CsvFormat, FileBasedStreamConfig
|
9
|
-
from pydantic import ValidationError
|
10
|
-
|
11
|
-
|
12
|
-
@pytest.mark.parametrize(
|
13
|
-
"file_type, input_format, expected_format, expected_error",
|
14
|
-
[
|
15
|
-
pytest.param(
|
16
|
-
"csv",
|
17
|
-
{"filetype": "csv", "delimiter": "d", "quote_char": "q", "escape_char": "e", "encoding": "ascii", "double_quote": True},
|
18
|
-
{"filetype": "csv", "delimiter": "d", "quote_char": "q", "escape_char": "e", "encoding": "ascii", "double_quote": True},
|
19
|
-
None,
|
20
|
-
id="test_valid_format",
|
21
|
-
),
|
22
|
-
pytest.param(
|
23
|
-
"csv",
|
24
|
-
{"filetype": "csv", "double_quote": False},
|
25
|
-
{"delimiter": ",", "quote_char": '"', "encoding": "utf8", "double_quote": False},
|
26
|
-
None,
|
27
|
-
id="test_default_format_values",
|
28
|
-
),
|
29
|
-
pytest.param(
|
30
|
-
"csv", {"filetype": "csv", "delimiter": "nope", "double_quote": True}, None, ValidationError, id="test_invalid_delimiter"
|
31
|
-
),
|
32
|
-
pytest.param(
|
33
|
-
"csv", {"filetype": "csv", "quote_char": "nope", "double_quote": True}, None, ValidationError, id="test_invalid_quote_char"
|
34
|
-
),
|
35
|
-
pytest.param(
|
36
|
-
"csv", {"filetype": "csv", "escape_char": "nope", "double_quote": True}, None, ValidationError, id="test_invalid_escape_char"
|
37
|
-
),
|
38
|
-
pytest.param(
|
39
|
-
"csv",
|
40
|
-
{"filetype": "csv", "delimiter": ",", "quote_char": '"', "encoding": "not_a_format", "double_quote": True},
|
41
|
-
{},
|
42
|
-
ValidationError,
|
43
|
-
id="test_invalid_encoding_type",
|
44
|
-
),
|
45
|
-
pytest.param(
|
46
|
-
"invalid", {"filetype": "invalid", "double_quote": False}, {}, ValidationError, id="test_config_format_file_type_mismatch"
|
47
|
-
),
|
48
|
-
],
|
49
|
-
)
|
50
|
-
def test_csv_config(
|
51
|
-
file_type: str, input_format: Mapping[str, Any], expected_format: Mapping[str, Any], expected_error: Type[Exception]
|
52
|
-
) -> None:
|
53
|
-
stream_config = {"name": "stream1", "file_type": file_type, "globs": ["*"], "validation_policy": "Emit Record", "format": input_format}
|
54
|
-
|
55
|
-
if expected_error:
|
56
|
-
with pytest.raises(expected_error):
|
57
|
-
FileBasedStreamConfig(**stream_config)
|
58
|
-
else:
|
59
|
-
actual_config = FileBasedStreamConfig(**stream_config)
|
60
|
-
if actual_config.format is not None:
|
61
|
-
for expected_format_field, expected_format_value in expected_format.items():
|
62
|
-
assert isinstance(actual_config.format, CsvFormat)
|
63
|
-
assert getattr(actual_config.format, expected_format_field) == expected_format_value
|
64
|
-
else:
|
65
|
-
assert False, "Expected format to be set"
|
66
|
-
|
67
|
-
|
68
|
-
def test_invalid_validation_policy() -> None:
|
69
|
-
stream_config = {
|
70
|
-
"name": "stream1",
|
71
|
-
"file_type": "csv",
|
72
|
-
"globs": ["*"],
|
73
|
-
"validation_policy": "Not Valid Policy",
|
74
|
-
"format": {
|
75
|
-
"filetype": "csv",
|
76
|
-
"delimiter": "d",
|
77
|
-
"quote_char": "q",
|
78
|
-
"escape_char": "e",
|
79
|
-
"encoding": "ascii",
|
80
|
-
"double_quote": True,
|
81
|
-
},
|
82
|
-
}
|
83
|
-
with pytest.raises(ValidationError):
|
84
|
-
FileBasedStreamConfig(**stream_config)
|
File without changes
|
@@ -1,31 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
import unittest
|
6
|
-
from unittest.mock import Mock
|
7
|
-
|
8
|
-
from airbyte_cdk.sources.file_based.discovery_policy.default_discovery_policy import DefaultDiscoveryPolicy
|
9
|
-
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
10
|
-
|
11
|
-
|
12
|
-
class DefaultDiscoveryPolicyTest(unittest.TestCase):
|
13
|
-
def setUp(self) -> None:
|
14
|
-
self._policy = DefaultDiscoveryPolicy()
|
15
|
-
|
16
|
-
self._parser = Mock(spec=FileTypeParser)
|
17
|
-
self._parser.parser_max_n_files_for_schema_inference = None
|
18
|
-
|
19
|
-
def test_hardcoded_schema_inference_file_limit_is_returned(self) -> None:
|
20
|
-
"""
|
21
|
-
If the parser is not providing a limit, then we should use the hardcoded limit
|
22
|
-
"""
|
23
|
-
assert self._policy.get_max_n_files_for_schema_inference(self._parser) == 10
|
24
|
-
|
25
|
-
def test_parser_limit_is_respected(self) -> None:
|
26
|
-
"""
|
27
|
-
If the parser is providing a limit, then we should use that limit
|
28
|
-
"""
|
29
|
-
self._parser.parser_max_n_files_for_schema_inference = 1
|
30
|
-
|
31
|
-
assert self._policy.get_max_n_files_for_schema_inference(self._parser) == 1
|
File without changes
|