airbyte-cdk 0.53.2__tar.gz → 0.53.4__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/PKG-INFO +1 -1
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/destinations/vector_db_based/config.py +32 -32
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/config/avro_format.py +3 -1
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/config/csv_format.py +10 -5
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/config/jsonl_format.py +3 -1
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/config/parquet_format.py +3 -1
- airbyte-cdk-0.53.4/airbyte_cdk/sources/file_based/config/unstructured_format.py +27 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +27 -9
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +1 -1
- airbyte-cdk-0.53.4/airbyte_cdk/utils/oneof_option_config.py +33 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk.egg-info/PKG-INFO +1 -1
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk.egg-info/SOURCES.txt +1 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/setup.py +1 -1
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/file_types/test_unstructured_parser.py +38 -8
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/csv_scenarios.py +16 -2
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +67 -3
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/test_file_based_scenarios.py +4 -2
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py +24 -4
- airbyte-cdk-0.53.2/airbyte_cdk/sources/file_based/config/unstructured_format.py +0 -16
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/LICENSE.txt +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/README.md +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/config_observation.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/connector.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/connector_builder/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/connector_builder/connector_builder_handler.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/connector_builder/main.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/connector_builder/message_grouper.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/connector_builder/models.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/destinations/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/destinations/destination.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/destinations/vector_db_based/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/destinations/vector_db_based/document_processor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/destinations/vector_db_based/embedder.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/destinations/vector_db_based/indexer.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/destinations/vector_db_based/test_utils.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/destinations/vector_db_based/utils.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/destinations/vector_db_based/writer.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/entrypoint.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/exception_handler.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/logger.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/models/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/models/airbyte_protocol.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/models/well_known_types.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/py.typed +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/abstract_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/config.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/connector_state_manager.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/auth/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/auth/oauth.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/auth/token.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/auth/token_provider.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/checks/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/checks/check_stream.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/checks/connection_checker.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/create_partial.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/datetime/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/datetime/datetime_parser.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/declarative_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/declarative_stream.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/decoders/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/decoders/decoder.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/decoders/json_decoder.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/exceptions.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/extractors/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/extractors/http_selector.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/extractors/record_extractor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/extractors/record_filter.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/extractors/record_selector.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/incremental/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/incremental/cursor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/interpolation/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/interpolation/filters.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/interpolation/interpolation.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/interpolation/jinja.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/interpolation/macros.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/models/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/parsers/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/http_requester.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/request_option.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/request_path.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/requesters/requester.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/retrievers/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/retrievers/retriever.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/schema/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/schema/default_schema_loader.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/schema/schema_loader.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/spec/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/spec/spec.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/stream_slicers/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/transformations/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/transformations/add_fields.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/transformations/remove_fields.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/transformations/transformation.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/types.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/deprecated/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/deprecated/base_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/deprecated/client.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/embedded/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/embedded/base_integration.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/embedded/catalog.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/embedded/runner.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/embedded/tools.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/availability_strategy/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/config/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/config/file_based_stream_config.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/discovery_policy/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/exceptions.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/file_based_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/file_based_stream_reader.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/file_types/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/file_types/avro_parser.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/file_types/csv_parser.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/file_types/file_type_parser.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/file_types/parquet_parser.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/remote_file.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/schema_helpers.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/stream/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/stream/cursor/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/types.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/http_config.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/http_logger.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/message/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/message/repository.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/singer/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/singer/singer_helpers.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/singer/source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/availability_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/call_rate.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/abstract_stream.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/adapters.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/availability_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/cursor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/exceptions.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/partition_reader.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/partitions/partition.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/partitions/types.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/concurrent/state_converter.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/core.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/auth/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/auth/core.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/auth/oauth.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/auth/token.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/availability_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/exceptions.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/http.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/rate_limiting.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/http/requests_native_auth/token.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/utils/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/streams/utils/stream_helper.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/utils/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/utils/casing.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/utils/catalog_helpers.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/utils/record_helper.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/utils/schema_helpers.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/utils/schema_models.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/utils/slice_logger.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/utils/transform.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/utils/types.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/utils/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/utils/airbyte_secrets_utils.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/utils/analytics_message.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/utils/constants.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/utils/datetime_format_inferrer.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/utils/event_timing.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/utils/is_cloud_environment.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/utils/mapping_helpers.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/utils/schema_inferrer.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/utils/spec_schema_transformations.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/utils/stream_status_utils.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/utils/traced_exception.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk.egg-info/dependency_links.txt +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk.egg-info/requires.txt +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk.egg-info/top_level.txt +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/pyproject.toml +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/setup.cfg +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/source_declarative_manifest/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/source_declarative_manifest/main.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/connector_builder/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/connector_builder/test_connector_builder_handler.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/connector_builder/test_message_grouper.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/connector_builder/utils.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/destinations/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/destinations/test_destination.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/singer/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/singer/test_singer_helpers.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/singer/test_singer_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/auth/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/auth/test_oauth.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/auth/test_token_auth.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/auth/test_token_provider.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/checks/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/checks/test_check_stream.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/decoders/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/external_component.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/extractors/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/extractors/test_record_filter.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/extractors/test_record_selector.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/incremental/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/interpolation/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/interpolation/test_filters.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/interpolation/test_jinja.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/interpolation/test_macros.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/parsers/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/parsers/testing_components.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/partition_routers/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/test_http_requester.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/retrievers/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/schema/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/schema/source_test/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/states/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/stream_slicers/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/test_create_partial.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/test_declarative_stream.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/config/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/config/test_csv_format.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/file_types/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/helpers.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/in_memory_files_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/stream/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/test_scenarios.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/test_schema_helpers.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/fixtures/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/fixtures/source_test_fixture.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/message/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/message/test_repository.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/test_adapters.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/test_cursor.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/concurrent/test_state_converter.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/http/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/http/auth/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/http/auth/test_auth.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/http/test_availability_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/http/test_http.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/test_availability_strategy.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/test_call_rate.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/test_stream_read.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/streams/test_streams_core.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/test_abstract_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/test_config.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/test_connector_state_manager.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/test_http_logger.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/test_integration_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/test_source.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/utils/__init__.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/utils/test_datetime_format_inferrer.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/utils/test_mapping_helpers.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/utils/test_rate_limiting.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/utils/test_schema_inferrer.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/utils/test_secret_utils.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/utils/test_stream_status_utils.py +0 -0
- {airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/utils/test_traced_exception.py +0 -0
{airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/destinations/vector_db_based/config.py
RENAMED
@@ -4,6 +4,7 @@
|
|
4
4
|
|
5
5
|
from typing import List, Literal, Optional, Union
|
6
6
|
|
7
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
7
8
|
from pydantic import BaseModel, Field
|
8
9
|
|
9
10
|
|
@@ -16,11 +17,10 @@ class SeparatorSplitterConfigModel(BaseModel):
|
|
16
17
|
)
|
17
18
|
keep_separator: bool = Field(default=False, title="Keep separator", description="Whether to keep the separator in the resulting chunks")
|
18
19
|
|
19
|
-
class Config:
|
20
|
+
class Config(OneOfOptionConfig):
|
20
21
|
title = "By Separator"
|
21
|
-
|
22
|
-
|
23
|
-
}
|
22
|
+
description = "Split the text by the list of separators until the chunk size is reached, using the earlier mentioned separators where possible. This is useful for splitting text fields by paragraphs, sentences, words, etc."
|
23
|
+
discriminator = "mode"
|
24
24
|
|
25
25
|
|
26
26
|
class MarkdownHeaderSplitterConfigModel(BaseModel):
|
@@ -33,11 +33,10 @@ class MarkdownHeaderSplitterConfigModel(BaseModel):
|
|
33
33
|
ge=1,
|
34
34
|
)
|
35
35
|
|
36
|
-
class Config:
|
36
|
+
class Config(OneOfOptionConfig):
|
37
37
|
title = "By Markdown header"
|
38
|
-
|
39
|
-
|
40
|
-
}
|
38
|
+
description = "Split the text by Markdown headers down to the specified header level. If the chunk size fits multiple sections, they will be combined into a single chunk."
|
39
|
+
discriminator = "mode"
|
41
40
|
|
42
41
|
|
43
42
|
class CodeSplitterConfigModel(BaseModel):
|
@@ -65,11 +64,12 @@ class CodeSplitterConfigModel(BaseModel):
|
|
65
64
|
],
|
66
65
|
)
|
67
66
|
|
68
|
-
class Config:
|
67
|
+
class Config(OneOfOptionConfig):
|
69
68
|
title = "By Programming Language"
|
70
|
-
|
71
|
-
"
|
72
|
-
|
69
|
+
description = (
|
70
|
+
"Split the text by suitable delimiters based on the programming language. This is useful for splitting code into chunks."
|
71
|
+
)
|
72
|
+
discriminator = "mode"
|
73
73
|
|
74
74
|
|
75
75
|
TextSplitterConfigModel = Union[SeparatorSplitterConfigModel, MarkdownHeaderSplitterConfigModel, CodeSplitterConfigModel]
|
@@ -128,11 +128,12 @@ class OpenAIEmbeddingConfigModel(BaseModel):
|
|
128
128
|
mode: Literal["openai"] = Field("openai", const=True)
|
129
129
|
openai_key: str = Field(..., title="OpenAI API key", airbyte_secret=True)
|
130
130
|
|
131
|
-
class Config:
|
131
|
+
class Config(OneOfOptionConfig):
|
132
132
|
title = "OpenAI"
|
133
|
-
|
134
|
-
"
|
135
|
-
|
133
|
+
description = (
|
134
|
+
"Use the OpenAI API to embed text. This option is using the text-embedding-ada-002 model with 1536 embedding dimensions."
|
135
|
+
)
|
136
|
+
discriminator = "mode"
|
136
137
|
|
137
138
|
|
138
139
|
class OpenAICompatibleEmbeddingConfigModel(BaseModel):
|
@@ -151,9 +152,10 @@ class OpenAICompatibleEmbeddingConfigModel(BaseModel):
|
|
151
152
|
title="Embedding dimensions", description="The number of dimensions the embedding model is generating", examples=[1536, 384]
|
152
153
|
)
|
153
154
|
|
154
|
-
class Config:
|
155
|
+
class Config(OneOfOptionConfig):
|
155
156
|
title = "OpenAI-compatible"
|
156
|
-
|
157
|
+
description = "Use a service that's compatible with the OpenAI API to embed text."
|
158
|
+
discriminator = "mode"
|
157
159
|
|
158
160
|
|
159
161
|
class AzureOpenAIEmbeddingConfigModel(BaseModel):
|
@@ -177,21 +179,19 @@ class AzureOpenAIEmbeddingConfigModel(BaseModel):
|
|
177
179
|
examples=["your-resource-name"],
|
178
180
|
)
|
179
181
|
|
180
|
-
class Config:
|
182
|
+
class Config(OneOfOptionConfig):
|
181
183
|
title = "Azure OpenAI"
|
182
|
-
|
183
|
-
|
184
|
-
}
|
184
|
+
description = "Use the Azure-hosted OpenAI API to embed text. This option is using the text-embedding-ada-002 model with 1536 embedding dimensions."
|
185
|
+
discriminator = "mode"
|
185
186
|
|
186
187
|
|
187
188
|
class FakeEmbeddingConfigModel(BaseModel):
|
188
189
|
mode: Literal["fake"] = Field("fake", const=True)
|
189
190
|
|
190
|
-
class Config:
|
191
|
+
class Config(OneOfOptionConfig):
|
191
192
|
title = "Fake"
|
192
|
-
|
193
|
-
|
194
|
-
}
|
193
|
+
description = "Use a fake embedding made out of random vectors with 1536 embedding dimensions. This is useful for testing the data pipeline without incurring any costs."
|
194
|
+
discriminator = "mode"
|
195
195
|
|
196
196
|
|
197
197
|
class FromFieldEmbeddingConfigModel(BaseModel):
|
@@ -203,17 +203,17 @@ class FromFieldEmbeddingConfigModel(BaseModel):
|
|
203
203
|
..., title="Embedding dimensions", description="The number of dimensions the embedding model is generating", examples=[1536, 384]
|
204
204
|
)
|
205
205
|
|
206
|
-
class Config:
|
206
|
+
class Config(OneOfOptionConfig):
|
207
207
|
title = "From Field"
|
208
|
-
|
209
|
-
|
210
|
-
}
|
208
|
+
description = "Use a field in the record as the embedding. This is useful if you already have an embedding for your data and want to store it in the vector store."
|
209
|
+
discriminator = "mode"
|
211
210
|
|
212
211
|
|
213
212
|
class CohereEmbeddingConfigModel(BaseModel):
|
214
213
|
mode: Literal["cohere"] = Field("cohere", const=True)
|
215
214
|
cohere_key: str = Field(..., title="Cohere API key", airbyte_secret=True)
|
216
215
|
|
217
|
-
class Config:
|
216
|
+
class Config(OneOfOptionConfig):
|
218
217
|
title = "Cohere"
|
219
|
-
|
218
|
+
description = "Use the Cohere API to embed text."
|
219
|
+
discriminator = "mode"
|
{airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/config/avro_format.py
RENAMED
@@ -2,12 +2,14 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
5
6
|
from pydantic import BaseModel, Field
|
6
7
|
|
7
8
|
|
8
9
|
class AvroFormat(BaseModel):
|
9
|
-
class Config:
|
10
|
+
class Config(OneOfOptionConfig):
|
10
11
|
title = "Avro Format"
|
12
|
+
discriminator = "filetype"
|
11
13
|
|
12
14
|
filetype: str = Field(
|
13
15
|
"avro",
|
{airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/config/csv_format.py
RENAMED
@@ -6,6 +6,7 @@ import codecs
|
|
6
6
|
from enum import Enum
|
7
7
|
from typing import Any, Dict, List, Optional, Set, Union
|
8
8
|
|
9
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
9
10
|
from pydantic import BaseModel, Field, ValidationError, root_validator, validator
|
10
11
|
|
11
12
|
|
@@ -21,8 +22,9 @@ class CsvHeaderDefinitionType(Enum):
|
|
21
22
|
|
22
23
|
|
23
24
|
class CsvHeaderFromCsv(BaseModel):
|
24
|
-
class Config:
|
25
|
+
class Config(OneOfOptionConfig):
|
25
26
|
title = "From CSV"
|
27
|
+
discriminator = "header_definition_type"
|
26
28
|
|
27
29
|
header_definition_type: str = Field(
|
28
30
|
CsvHeaderDefinitionType.FROM_CSV.value,
|
@@ -34,8 +36,9 @@ class CsvHeaderFromCsv(BaseModel):
|
|
34
36
|
|
35
37
|
|
36
38
|
class CsvHeaderAutogenerated(BaseModel):
|
37
|
-
class Config:
|
39
|
+
class Config(OneOfOptionConfig):
|
38
40
|
title = "Autogenerated"
|
41
|
+
discriminator = "header_definition_type"
|
39
42
|
|
40
43
|
header_definition_type: str = Field(
|
41
44
|
CsvHeaderDefinitionType.AUTOGENERATED.value,
|
@@ -47,8 +50,9 @@ class CsvHeaderAutogenerated(BaseModel):
|
|
47
50
|
|
48
51
|
|
49
52
|
class CsvHeaderUserProvided(BaseModel):
|
50
|
-
class Config:
|
53
|
+
class Config(OneOfOptionConfig):
|
51
54
|
title = "User Provided"
|
55
|
+
discriminator = "header_definition_type"
|
52
56
|
|
53
57
|
header_definition_type: str = Field(
|
54
58
|
CsvHeaderDefinitionType.USER_PROVIDED.value,
|
@@ -74,8 +78,9 @@ DEFAULT_FALSE_VALUES = ["n", "no", "f", "false", "off", "0"]
|
|
74
78
|
|
75
79
|
|
76
80
|
class CsvFormat(BaseModel):
|
77
|
-
class Config:
|
81
|
+
class Config(OneOfOptionConfig):
|
78
82
|
title = "CSV Format"
|
83
|
+
discriminator = "filetype"
|
79
84
|
|
80
85
|
filetype: str = Field(
|
81
86
|
"csv",
|
@@ -123,7 +128,7 @@ class CsvFormat(BaseModel):
|
|
123
128
|
)
|
124
129
|
header_definition: Union[CsvHeaderFromCsv, CsvHeaderAutogenerated, CsvHeaderUserProvided] = Field(
|
125
130
|
title="CSV Header Definition",
|
126
|
-
default=CsvHeaderFromCsv(),
|
131
|
+
default=CsvHeaderFromCsv(header_definition_type=CsvHeaderDefinitionType.FROM_CSV.value),
|
127
132
|
description="How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.",
|
128
133
|
)
|
129
134
|
true_values: Set[str] = Field(
|
{airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/config/jsonl_format.py
RENAMED
@@ -2,12 +2,14 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
5
6
|
from pydantic import BaseModel, Field
|
6
7
|
|
7
8
|
|
8
9
|
class JsonlFormat(BaseModel):
|
9
|
-
class Config:
|
10
|
+
class Config(OneOfOptionConfig):
|
10
11
|
title = "Jsonl Format"
|
12
|
+
discriminator = "filetype"
|
11
13
|
|
12
14
|
filetype: str = Field(
|
13
15
|
"jsonl",
|
{airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/airbyte_cdk/sources/file_based/config/parquet_format.py
RENAMED
@@ -2,12 +2,14 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
5
6
|
from pydantic import BaseModel, Field
|
6
7
|
|
7
8
|
|
8
9
|
class ParquetFormat(BaseModel):
|
9
|
-
class Config:
|
10
|
+
class Config(OneOfOptionConfig):
|
10
11
|
title = "Parquet Format"
|
12
|
+
discriminator = "filetype"
|
11
13
|
|
12
14
|
filetype: str = Field(
|
13
15
|
"parquet",
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Optional
|
6
|
+
|
7
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
8
|
+
from pydantic import BaseModel, Field
|
9
|
+
|
10
|
+
|
11
|
+
class UnstructuredFormat(BaseModel):
|
12
|
+
class Config(OneOfOptionConfig):
|
13
|
+
title = "Document File Type Format (Experimental)"
|
14
|
+
description = "Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file."
|
15
|
+
discriminator = "filetype"
|
16
|
+
|
17
|
+
filetype: str = Field(
|
18
|
+
"unstructured",
|
19
|
+
const=True,
|
20
|
+
)
|
21
|
+
|
22
|
+
skip_unprocessable_file_types: Optional[bool] = Field(
|
23
|
+
default=True,
|
24
|
+
title="Skip Unprocessable File Types",
|
25
|
+
description="If true, skip files that cannot be parsed because of their file type and log a warning. If false, fail the sync. Corrupted files with valid file types will still result in a failed sync.",
|
26
|
+
always_show=True,
|
27
|
+
)
|
@@ -6,6 +6,7 @@ from io import BytesIO, IOBase
|
|
6
6
|
from typing import Any, Dict, Iterable, List, Mapping, Optional
|
7
7
|
|
8
8
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
9
|
+
from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat
|
9
10
|
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
|
10
11
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
11
12
|
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
@@ -60,11 +61,12 @@ class UnstructuredParser(FileTypeParser):
|
|
60
61
|
stream_reader: AbstractFileBasedStreamReader,
|
61
62
|
logger: logging.Logger,
|
62
63
|
) -> SchemaType:
|
64
|
+
format = _extract_format(config)
|
63
65
|
with stream_reader.open_file(file, self.file_read_mode, None, logger) as file_handle:
|
64
66
|
filetype = self._get_filetype(file_handle, file)
|
65
67
|
|
66
68
|
if filetype not in self._supported_file_types():
|
67
|
-
|
69
|
+
self._handle_unprocessable_file(file, format, logger)
|
68
70
|
|
69
71
|
return {
|
70
72
|
"content": {"type": "string"},
|
@@ -79,14 +81,16 @@ class UnstructuredParser(FileTypeParser):
|
|
79
81
|
logger: logging.Logger,
|
80
82
|
discovered_schema: Optional[Mapping[str, SchemaType]],
|
81
83
|
) -> Iterable[Dict[str, Any]]:
|
84
|
+
format = _extract_format(config)
|
82
85
|
with stream_reader.open_file(file, self.file_read_mode, None, logger) as file_handle:
|
83
|
-
markdown = self._read_file(file_handle, file)
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
86
|
+
markdown = self._read_file(file_handle, file, format, logger)
|
87
|
+
if markdown is not None:
|
88
|
+
yield {
|
89
|
+
"content": markdown,
|
90
|
+
"document_key": file.uri,
|
91
|
+
}
|
92
|
+
|
93
|
+
def _read_file(self, file_handle: IOBase, remote_file: RemoteFile, format: UnstructuredFormat, logger: logging.Logger) -> Optional[str]:
|
90
94
|
_import_unstructured()
|
91
95
|
if (
|
92
96
|
(not unstructured_partition_pdf)
|
@@ -104,7 +108,8 @@ class UnstructuredParser(FileTypeParser):
|
|
104
108
|
decoded_content: str = unstructured_optional_decode(file_content)
|
105
109
|
return decoded_content
|
106
110
|
if filetype not in self._supported_file_types():
|
107
|
-
|
111
|
+
self._handle_unprocessable_file(remote_file, format, logger)
|
112
|
+
return None
|
108
113
|
|
109
114
|
file: Any = file_handle
|
110
115
|
if filetype == FileType.PDF:
|
@@ -120,6 +125,12 @@ class UnstructuredParser(FileTypeParser):
|
|
120
125
|
|
121
126
|
return self._render_markdown(elements)
|
122
127
|
|
128
|
+
def _handle_unprocessable_file(self, remote_file: RemoteFile, format: UnstructuredFormat, logger: logging.Logger) -> None:
|
129
|
+
if format.skip_unprocessable_file_types:
|
130
|
+
logger.warn(f"File {remote_file.uri} cannot be parsed. Skipping it.")
|
131
|
+
else:
|
132
|
+
raise RecordParseError(FileBasedSourceError.ERROR_PARSING_RECORD, filename=remote_file.uri)
|
133
|
+
|
123
134
|
def _get_filetype(self, file: IOBase, remote_file: RemoteFile) -> Optional[FileType]:
|
124
135
|
"""
|
125
136
|
Detect the file type based on the file name and the file content.
|
@@ -172,3 +183,10 @@ class UnstructuredParser(FileTypeParser):
|
|
172
183
|
@property
|
173
184
|
def file_read_mode(self) -> FileReadMode:
|
174
185
|
return FileReadMode.READ_BINARY
|
186
|
+
|
187
|
+
|
188
|
+
def _extract_format(config: FileBasedStreamConfig) -> UnstructuredFormat:
|
189
|
+
config_format = config.format
|
190
|
+
if not isinstance(config_format, UnstructuredFormat):
|
191
|
+
raise ValueError(f"Invalid format config: {config_format}")
|
192
|
+
return config_format
|
@@ -154,7 +154,7 @@ class ThreadBasedConcurrentStream(AbstractStream):
|
|
154
154
|
if len(futures) < self._max_concurrent_tasks:
|
155
155
|
return
|
156
156
|
|
157
|
-
for index in range(len(futures)):
|
157
|
+
for index in reversed(range(len(futures))):
|
158
158
|
future = futures[index]
|
159
159
|
optional_exception = future.exception()
|
160
160
|
if optional_exception:
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Any, Dict
|
6
|
+
|
7
|
+
|
8
|
+
class OneOfOptionConfig:
|
9
|
+
"""
|
10
|
+
Base class to configure a Pydantic model that's used as a oneOf option in a parent model in a way that's compatible with all Airbyte consumers.
|
11
|
+
|
12
|
+
Inherit from this class in the nested Config class in a model and set title and description (these show up in the UI) and discriminator (this is making sure it's marked as required in the schema).
|
13
|
+
|
14
|
+
Usage:
|
15
|
+
|
16
|
+
```python
|
17
|
+
class OptionModel(BaseModel):
|
18
|
+
mode: Literal["option_a"] = Field("option_a", const=True)
|
19
|
+
option_a_field: str = Field(...)
|
20
|
+
|
21
|
+
class Config(OneOfOptionConfig):
|
22
|
+
title = "Option A"
|
23
|
+
description = "Option A description"
|
24
|
+
discriminator = "mode"
|
25
|
+
```
|
26
|
+
"""
|
27
|
+
|
28
|
+
@staticmethod
|
29
|
+
def schema_extra(schema: Dict[str, Any], model: Any) -> None:
|
30
|
+
if hasattr(model.Config, "description"):
|
31
|
+
schema["description"] = model.Config.description
|
32
|
+
if hasattr(model.Config, "discriminator"):
|
33
|
+
schema.setdefault("required", []).append(model.Config.discriminator)
|
@@ -247,6 +247,7 @@ airbyte_cdk/utils/datetime_format_inferrer.py
|
|
247
247
|
airbyte_cdk/utils/event_timing.py
|
248
248
|
airbyte_cdk/utils/is_cloud_environment.py
|
249
249
|
airbyte_cdk/utils/mapping_helpers.py
|
250
|
+
airbyte_cdk/utils/oneof_option_config.py
|
250
251
|
airbyte_cdk/utils/schema_inferrer.py
|
251
252
|
airbyte_cdk/utils/spec_schema_transformations.py
|
252
253
|
airbyte_cdk/utils/stream_status_utils.py
|
@@ -36,7 +36,7 @@ setup(
|
|
36
36
|
name="airbyte-cdk",
|
37
37
|
# The version of the airbyte-cdk package is used at runtime to validate manifests. That validation must be
|
38
38
|
# updated if our semver format changes such as using release candidate versions.
|
39
|
-
version="0.53.
|
39
|
+
version="0.53.4",
|
40
40
|
description="A framework for writing Airbyte Connectors.",
|
41
41
|
long_description=README,
|
42
42
|
long_description_content_type="text/markdown",
|
@@ -7,6 +7,7 @@ from datetime import datetime
|
|
7
7
|
from unittest.mock import MagicMock, mock_open, patch
|
8
8
|
|
9
9
|
import pytest
|
10
|
+
from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat
|
10
11
|
from airbyte_cdk.sources.file_based.exceptions import RecordParseError
|
11
12
|
from airbyte_cdk.sources.file_based.file_types import UnstructuredParser
|
12
13
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
@@ -17,37 +18,48 @@ FILE_URI = "path/to/file.xyz"
|
|
17
18
|
|
18
19
|
|
19
20
|
@pytest.mark.parametrize(
|
20
|
-
"filetype, raises",
|
21
|
+
"filetype, format_config, raises",
|
21
22
|
[
|
22
23
|
pytest.param(
|
23
24
|
FileType.MD,
|
25
|
+
UnstructuredFormat(skip_unprocessable_file_types=False),
|
24
26
|
False,
|
25
27
|
id="markdown file",
|
26
28
|
),
|
27
29
|
pytest.param(
|
28
30
|
FileType.CSV,
|
31
|
+
UnstructuredFormat(skip_unprocessable_file_types=False),
|
29
32
|
True,
|
30
33
|
id="wrong file format",
|
31
34
|
),
|
35
|
+
pytest.param(
|
36
|
+
FileType.CSV,
|
37
|
+
UnstructuredFormat(skip_unprocessable_file_types=True),
|
38
|
+
False,
|
39
|
+
id="wrong file format skipping",
|
40
|
+
),
|
32
41
|
pytest.param(
|
33
42
|
FileType.PDF,
|
43
|
+
UnstructuredFormat(skip_unprocessable_file_types=False),
|
34
44
|
False,
|
35
45
|
id="pdf file",
|
36
46
|
),
|
37
47
|
pytest.param(
|
38
48
|
FileType.DOCX,
|
49
|
+
UnstructuredFormat(skip_unprocessable_file_types=False),
|
39
50
|
False,
|
40
51
|
id="docx file",
|
41
52
|
),
|
42
53
|
pytest.param(
|
43
54
|
FileType.PPTX,
|
55
|
+
UnstructuredFormat(skip_unprocessable_file_types=False),
|
44
56
|
False,
|
45
57
|
id="pptx file",
|
46
58
|
),
|
47
59
|
],
|
48
60
|
)
|
49
61
|
@patch("airbyte_cdk.sources.file_based.file_types.unstructured_parser.detect_filetype")
|
50
|
-
def test_infer_schema(mock_detect_filetype, filetype, raises):
|
62
|
+
def test_infer_schema(mock_detect_filetype, filetype, format_config, raises):
|
51
63
|
# use a fresh event loop to avoid leaking into other tests
|
52
64
|
main_loop = asyncio.get_event_loop()
|
53
65
|
loop = asyncio.new_event_loop()
|
@@ -59,11 +71,13 @@ def test_infer_schema(mock_detect_filetype, filetype, raises):
|
|
59
71
|
fake_file.uri = FILE_URI
|
60
72
|
logger = MagicMock()
|
61
73
|
mock_detect_filetype.return_value = filetype
|
74
|
+
config = MagicMock()
|
75
|
+
config.format = format_config
|
62
76
|
if raises:
|
63
77
|
with pytest.raises(RecordParseError):
|
64
|
-
loop.run_until_complete(UnstructuredParser().infer_schema(
|
78
|
+
loop.run_until_complete(UnstructuredParser().infer_schema(config, fake_file, stream_reader, logger))
|
65
79
|
else:
|
66
|
-
schema = loop.run_until_complete(UnstructuredParser().infer_schema(
|
80
|
+
schema = loop.run_until_complete(UnstructuredParser().infer_schema(config, MagicMock(), MagicMock(), MagicMock()))
|
67
81
|
assert schema == {
|
68
82
|
"content": {"type": "string"},
|
69
83
|
"document_key": {"type": "string"},
|
@@ -73,10 +87,11 @@ def test_infer_schema(mock_detect_filetype, filetype, raises):
|
|
73
87
|
|
74
88
|
|
75
89
|
@pytest.mark.parametrize(
|
76
|
-
"filetype, parse_result, raises, expected_records",
|
90
|
+
"filetype, format_config, parse_result, raises, expected_records",
|
77
91
|
[
|
78
92
|
pytest.param(
|
79
93
|
FileType.MD,
|
94
|
+
UnstructuredFormat(skip_unprocessable_file_types=False),
|
80
95
|
"test",
|
81
96
|
False,
|
82
97
|
[
|
@@ -89,13 +104,23 @@ def test_infer_schema(mock_detect_filetype, filetype, raises):
|
|
89
104
|
),
|
90
105
|
pytest.param(
|
91
106
|
FileType.CSV,
|
92
|
-
|
107
|
+
UnstructuredFormat(skip_unprocessable_file_types=False),
|
108
|
+
None,
|
93
109
|
True,
|
94
110
|
None,
|
95
111
|
id="wrong file format",
|
96
112
|
),
|
113
|
+
pytest.param(
|
114
|
+
FileType.CSV,
|
115
|
+
UnstructuredFormat(skip_unprocessable_file_types=True),
|
116
|
+
None,
|
117
|
+
False,
|
118
|
+
[],
|
119
|
+
id="skip_unprocessable_file_types",
|
120
|
+
),
|
97
121
|
pytest.param(
|
98
122
|
FileType.PDF,
|
123
|
+
UnstructuredFormat(skip_unprocessable_file_types=False),
|
99
124
|
[
|
100
125
|
Title("heading"),
|
101
126
|
Text("This is the text"),
|
@@ -113,6 +138,7 @@ def test_infer_schema(mock_detect_filetype, filetype, raises):
|
|
113
138
|
),
|
114
139
|
pytest.param(
|
115
140
|
FileType.PDF,
|
141
|
+
UnstructuredFormat(skip_unprocessable_file_types=False),
|
116
142
|
[
|
117
143
|
Title("first level heading", metadata=ElementMetadata(category_depth=1)),
|
118
144
|
Title("second level heading", metadata=ElementMetadata(category_depth=2)),
|
@@ -128,6 +154,7 @@ def test_infer_schema(mock_detect_filetype, filetype, raises):
|
|
128
154
|
),
|
129
155
|
pytest.param(
|
130
156
|
FileType.DOCX,
|
157
|
+
UnstructuredFormat(skip_unprocessable_file_types=False),
|
131
158
|
[
|
132
159
|
Title("heading"),
|
133
160
|
Text("This is the text"),
|
@@ -157,6 +184,7 @@ def test_parse_records(
|
|
157
184
|
mock_partition_pptx,
|
158
185
|
mock_partition_pdf,
|
159
186
|
filetype,
|
187
|
+
format_config,
|
160
188
|
parse_result,
|
161
189
|
raises,
|
162
190
|
expected_records,
|
@@ -166,6 +194,8 @@ def test_parse_records(
|
|
166
194
|
fake_file = RemoteFile(uri=FILE_URI, last_modified=datetime.now())
|
167
195
|
fake_file.uri = FILE_URI
|
168
196
|
logger = MagicMock()
|
197
|
+
config = MagicMock()
|
198
|
+
config.format = format_config
|
169
199
|
mock_detect_filetype.return_value = filetype
|
170
200
|
mock_partition_docx.return_value = parse_result
|
171
201
|
mock_partition_pptx.return_value = parse_result
|
@@ -173,6 +203,6 @@ def test_parse_records(
|
|
173
203
|
mock_optional_decode.side_effect = lambda x: x.decode("utf-8")
|
174
204
|
if raises:
|
175
205
|
with pytest.raises(RecordParseError):
|
176
|
-
list(UnstructuredParser().parse_records(
|
206
|
+
list(UnstructuredParser().parse_records(config, fake_file, stream_reader, logger, MagicMock()))
|
177
207
|
else:
|
178
|
-
assert list(UnstructuredParser().parse_records(
|
208
|
+
assert list(UnstructuredParser().parse_records(config, fake_file, stream_reader, logger, MagicMock())) == expected_records
|
{airbyte-cdk-0.53.2 → airbyte-cdk-0.53.4}/unit_tests/sources/file_based/scenarios/csv_scenarios.py
RENAMED
@@ -122,6 +122,7 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
|
|
122
122
|
"type": "boolean",
|
123
123
|
},
|
124
124
|
},
|
125
|
+
"required": ["filetype"],
|
125
126
|
},
|
126
127
|
{
|
127
128
|
"title": "CSV Format",
|
@@ -200,6 +201,7 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
|
|
200
201
|
"type": "string",
|
201
202
|
},
|
202
203
|
},
|
204
|
+
"required": ["header_definition_type"],
|
203
205
|
},
|
204
206
|
{
|
205
207
|
"title": "Autogenerated",
|
@@ -212,6 +214,7 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
|
|
212
214
|
"type": "string",
|
213
215
|
},
|
214
216
|
},
|
217
|
+
"required": ["header_definition_type"],
|
215
218
|
},
|
216
219
|
{
|
217
220
|
"title": "User Provided",
|
@@ -230,7 +233,7 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
|
|
230
233
|
"items": {"type": "string"},
|
231
234
|
},
|
232
235
|
},
|
233
|
-
"required": ["column_names"],
|
236
|
+
"required": ["column_names", "header_definition_type"],
|
234
237
|
},
|
235
238
|
],
|
236
239
|
},
|
@@ -258,6 +261,7 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
|
|
258
261
|
"enum": ["None", "Primitive Types Only"],
|
259
262
|
},
|
260
263
|
},
|
264
|
+
"required": ["filetype"],
|
261
265
|
},
|
262
266
|
{
|
263
267
|
"title": "Jsonl Format",
|
@@ -265,6 +269,7 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
|
|
265
269
|
"properties": {
|
266
270
|
"filetype": {"title": "Filetype", "default": "jsonl", "const": "jsonl", "type": "string"}
|
267
271
|
},
|
272
|
+
"required": ["filetype"],
|
268
273
|
},
|
269
274
|
{
|
270
275
|
"title": "Parquet Format",
|
@@ -283,6 +288,7 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
|
|
283
288
|
"type": "boolean",
|
284
289
|
},
|
285
290
|
},
|
291
|
+
"required": ["filetype"],
|
286
292
|
},
|
287
293
|
{
|
288
294
|
"title": "Document File Type Format (Experimental)",
|
@@ -293,9 +299,17 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
|
|
293
299
|
"default": "unstructured",
|
294
300
|
"const": "unstructured",
|
295
301
|
"type": "string",
|
296
|
-
}
|
302
|
+
},
|
303
|
+
"skip_unprocessable_file_types": {
|
304
|
+
"type": "boolean",
|
305
|
+
"default": True,
|
306
|
+
"title": "Skip Unprocessable File Types",
|
307
|
+
"description": "If true, skip files that cannot be parsed because of their file type and log a warning. If false, fail the sync. Corrupted files with valid file types will still result in a failed sync.",
|
308
|
+
"always_show": True,
|
309
|
+
},
|
297
310
|
},
|
298
311
|
"description": "Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.",
|
312
|
+
"required": ["filetype"],
|
299
313
|
},
|
300
314
|
],
|
301
315
|
},
|