airbyte-cdk 0.72.1__py3-none-any.whl → 6.17.1.dev1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/__init__.py +355 -6
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +29 -10
- airbyte_cdk/connector.py +24 -24
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
- airbyte_cdk/connector_builder/main.py +45 -13
- airbyte_cdk/connector_builder/message_grouper.py +189 -50
- airbyte_cdk/connector_builder/models.py +3 -2
- airbyte_cdk/destinations/__init__.py +4 -3
- airbyte_cdk/destinations/destination.py +54 -20
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/config.py +40 -17
- airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
- airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
- airbyte_cdk/entrypoint.py +153 -44
- airbyte_cdk/exception_handler.py +21 -3
- airbyte_cdk/logger.py +30 -44
- airbyte_cdk/models/__init__.py +13 -2
- airbyte_cdk/models/airbyte_protocol.py +86 -1
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/__init__.py +5 -1
- airbyte_cdk/sources/abstract_source.py +125 -79
- airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
- airbyte_cdk/sources/config.py +3 -2
- airbyte_cdk/sources/connector_state_manager.py +49 -83
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
- airbyte_cdk/sources/declarative/auth/token.py +28 -10
- airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
- airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
- airbyte_cdk/sources/declarative/declarative_source.py +5 -2
- airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
- airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
- airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
- airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +340 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +174 -74
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
- airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
- airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1759 -225
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
- airbyte_cdk/sources/declarative/spec/spec.py +12 -5
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
- airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
- airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
- airbyte_cdk/sources/declarative/types.py +19 -110
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
- airbyte_cdk/sources/embedded/base_integration.py +16 -5
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +5 -2
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
- airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
- airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
- airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
- airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
- airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +18 -15
- airbyte_cdk/sources/file_based/file_based_source.py +140 -33
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
- airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
- airbyte_cdk/sources/file_based/remote_file.py +1 -1
- airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
- airbyte_cdk/sources/http_logger.py +8 -3
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +42 -38
- airbyte_cdk/sources/streams/__init__.py +2 -2
- airbyte_cdk/sources/streams/availability_strategy.py +54 -3
- airbyte_cdk/sources/streams/call_rate.py +64 -21
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +313 -48
- airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
- airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
- airbyte_cdk/sources/streams/core.py +412 -87
- airbyte_cdk/sources/streams/http/__init__.py +2 -1
- airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +27 -7
- airbyte_cdk/sources/streams/http/http.py +369 -246
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +154 -0
- airbyte_cdk/sources/utils/record_helper.py +36 -21
- airbyte_cdk/sources/utils/schema_helpers.py +13 -6
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +54 -20
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/catalog_builder.py +70 -18
- airbyte_cdk/test/entrypoint_wrapper.py +117 -42
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/matcher.py +6 -0
- airbyte_cdk/test/mock_http/mocker.py +57 -10
- airbyte_cdk/test/mock_http/request.py +19 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +32 -16
- airbyte_cdk/test/state_builder.py +18 -10
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +2 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +20 -11
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +198 -28
- airbyte_cdk/utils/slice_hasher.py +30 -0
- airbyte_cdk/utils/spec_schema_transformations.py +6 -3
- airbyte_cdk/utils/stream_status_utils.py +8 -1
- airbyte_cdk/utils/traced_exception.py +61 -21
- airbyte_cdk-6.17.1.dev1.dist-info/METADATA +109 -0
- airbyte_cdk-6.17.1.dev1.dist-info/RECORD +350 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev1.dist-info}/WHEEL +1 -2
- airbyte_cdk-6.17.1.dev1.dist-info/entry_points.txt +3 -0
- airbyte_cdk/sources/declarative/create_partial.py +0 -92
- airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
- airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
- airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
- airbyte_cdk/sources/deprecated/base_source.py +0 -94
- airbyte_cdk/sources/deprecated/client.py +0 -99
- airbyte_cdk/sources/singer/__init__.py +0 -8
- airbyte_cdk/sources/singer/singer_helpers.py +0 -304
- airbyte_cdk/sources/singer/source.py +0 -186
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
- airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
- airbyte_cdk/sources/streams/http/auth/core.py +0 -29
- airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
- airbyte_cdk/sources/streams/http/auth/token.py +0 -47
- airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
- airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
- airbyte_cdk/sources/utils/schema_models.py +0 -84
- airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
- airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
- airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
- source_declarative_manifest/main.py +0 -29
- unit_tests/connector_builder/__init__.py +0 -3
- unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
- unit_tests/connector_builder/test_message_grouper.py +0 -713
- unit_tests/connector_builder/utils.py +0 -27
- unit_tests/destinations/test_destination.py +0 -243
- unit_tests/singer/test_singer_helpers.py +0 -56
- unit_tests/singer/test_singer_source.py +0 -112
- unit_tests/sources/__init__.py +0 -0
- unit_tests/sources/concurrent_source/__init__.py +0 -3
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
- unit_tests/sources/declarative/__init__.py +0 -3
- unit_tests/sources/declarative/auth/__init__.py +0 -3
- unit_tests/sources/declarative/auth/test_oauth.py +0 -331
- unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
- unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
- unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
- unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
- unit_tests/sources/declarative/checks/__init__.py +0 -3
- unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
- unit_tests/sources/declarative/decoders/__init__.py +0 -0
- unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
- unit_tests/sources/declarative/external_component.py +0 -13
- unit_tests/sources/declarative/extractors/__init__.py +0 -3
- unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
- unit_tests/sources/declarative/incremental/__init__.py +0 -0
- unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
- unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
- unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
- unit_tests/sources/declarative/interpolation/__init__.py +0 -3
- unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
- unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
- unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
- unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
- unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
- unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
- unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
- unit_tests/sources/declarative/parsers/__init__.py +0 -3
- unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
- unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
- unit_tests/sources/declarative/parsers/testing_components.py +0 -36
- unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
- unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
- unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
- unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
- unit_tests/sources/declarative/requesters/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
- unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
- unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
- unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
- unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
- unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
- unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
- unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
- unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
- unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
- unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
- unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
- unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
- unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
- unit_tests/sources/declarative/retrievers/__init__.py +0 -3
- unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
- unit_tests/sources/declarative/schema/__init__.py +0 -6
- unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
- unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
- unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
- unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
- unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
- unit_tests/sources/declarative/states/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
- unit_tests/sources/declarative/test_create_partial.py +0 -83
- unit_tests/sources/declarative/test_declarative_stream.py +0 -103
- unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
- unit_tests/sources/declarative/test_types.py +0 -39
- unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
- unit_tests/sources/file_based/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
- unit_tests/sources/file_based/config/__init__.py +0 -0
- unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
- unit_tests/sources/file_based/config/test_csv_format.py +0 -34
- unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
- unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
- unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
- unit_tests/sources/file_based/file_types/__init__.py +0 -0
- unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
- unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
- unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
- unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
- unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
- unit_tests/sources/file_based/helpers.py +0 -70
- unit_tests/sources/file_based/in_memory_files_source.py +0 -211
- unit_tests/sources/file_based/scenarios/__init__.py +0 -0
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
- unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
- unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
- unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
- unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
- unit_tests/sources/file_based/stream/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
- unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
- unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
- unit_tests/sources/file_based/test_scenarios.py +0 -253
- unit_tests/sources/file_based/test_schema_helpers.py +0 -346
- unit_tests/sources/fixtures/__init__.py +0 -3
- unit_tests/sources/fixtures/source_test_fixture.py +0 -153
- unit_tests/sources/message/__init__.py +0 -0
- unit_tests/sources/message/test_repository.py +0 -153
- unit_tests/sources/streams/__init__.py +0 -0
- unit_tests/sources/streams/concurrent/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
- unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
- unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
- unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
- unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
- unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
- unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
- unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
- unit_tests/sources/streams/http/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/test_auth.py +0 -173
- unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
- unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
- unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
- unit_tests/sources/streams/http/test_http.py +0 -635
- unit_tests/sources/streams/test_availability_strategy.py +0 -70
- unit_tests/sources/streams/test_call_rate.py +0 -300
- unit_tests/sources/streams/test_stream_read.py +0 -405
- unit_tests/sources/streams/test_streams_core.py +0 -184
- unit_tests/sources/test_abstract_source.py +0 -1442
- unit_tests/sources/test_concurrent_source.py +0 -112
- unit_tests/sources/test_config.py +0 -92
- unit_tests/sources/test_connector_state_manager.py +0 -482
- unit_tests/sources/test_http_logger.py +0 -252
- unit_tests/sources/test_integration_source.py +0 -86
- unit_tests/sources/test_source.py +0 -684
- unit_tests/sources/test_source_read.py +0 -460
- unit_tests/test/__init__.py +0 -0
- unit_tests/test/mock_http/__init__.py +0 -0
- unit_tests/test/mock_http/test_matcher.py +0 -53
- unit_tests/test/mock_http/test_mocker.py +0 -214
- unit_tests/test/mock_http/test_request.py +0 -117
- unit_tests/test/mock_http/test_response_builder.py +0 -177
- unit_tests/test/test_entrypoint_wrapper.py +0 -240
- unit_tests/utils/__init__.py +0 -0
- unit_tests/utils/test_datetime_format_inferrer.py +0 -60
- unit_tests/utils/test_mapping_helpers.py +0 -54
- unit_tests/utils/test_message_utils.py +0 -91
- unit_tests/utils/test_rate_limiting.py +0 -26
- unit_tests/utils/test_schema_inferrer.py +0 -202
- unit_tests/utils/test_secret_utils.py +0 -135
- unit_tests/utils/test_stream_status_utils.py +0 -61
- unit_tests/utils/test_traced_exception.py +0 -107
- /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
- {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
- {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
- {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev1.dist-info}/LICENSE.txt +0 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
from .config.abstract_file_based_spec import AbstractFileBasedSpec
|
2
|
+
from .config.csv_format import CsvFormat
|
3
|
+
from .config.file_based_stream_config import FileBasedStreamConfig
|
4
|
+
from .config.jsonl_format import JsonlFormat
|
5
|
+
from .exceptions import CustomFileBasedException, ErrorListingFiles, FileBasedSourceError
|
6
|
+
from .file_based_source import DEFAULT_CONCURRENCY, FileBasedSource
|
7
|
+
from .file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
8
|
+
from .remote_file import RemoteFile
|
9
|
+
from .stream.cursor import DefaultFileBasedCursor
|
10
|
+
|
11
|
+
__all__ = [
|
12
|
+
"AbstractFileBasedSpec",
|
13
|
+
"AbstractFileBasedStreamReader",
|
14
|
+
"CsvFormat",
|
15
|
+
"CustomFileBasedException",
|
16
|
+
"DefaultFileBasedCursor",
|
17
|
+
"ErrorListingFiles",
|
18
|
+
"FileBasedSource",
|
19
|
+
"FileBasedSourceError",
|
20
|
+
"FileBasedStreamConfig",
|
21
|
+
"FileReadMode",
|
22
|
+
"JsonlFormat",
|
23
|
+
"RemoteFile",
|
24
|
+
]
|
@@ -1,4 +1,11 @@
|
|
1
|
-
from .abstract_file_based_availability_strategy import
|
1
|
+
from .abstract_file_based_availability_strategy import (
|
2
|
+
AbstractFileBasedAvailabilityStrategy,
|
3
|
+
AbstractFileBasedAvailabilityStrategyWrapper,
|
4
|
+
)
|
2
5
|
from .default_file_based_availability_strategy import DefaultFileBasedAvailabilityStrategy
|
3
6
|
|
4
|
-
__all__ = [
|
7
|
+
__all__ = [
|
8
|
+
"AbstractFileBasedAvailabilityStrategy",
|
9
|
+
"AbstractFileBasedAvailabilityStrategyWrapper",
|
10
|
+
"DefaultFileBasedAvailabilityStrategy",
|
11
|
+
]
|
airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from __future__ import annotations
|
6
|
+
|
5
7
|
import logging
|
6
8
|
from abc import abstractmethod
|
7
9
|
from typing import TYPE_CHECKING, Optional, Tuple
|
@@ -22,7 +24,12 @@ if TYPE_CHECKING:
|
|
22
24
|
|
23
25
|
class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
|
24
26
|
@abstractmethod
|
25
|
-
def check_availability(
|
27
|
+
def check_availability( # type: ignore[override] # Signature doesn't match base class
|
28
|
+
self,
|
29
|
+
stream: Stream,
|
30
|
+
logger: logging.Logger,
|
31
|
+
_: Optional[Source],
|
32
|
+
) -> Tuple[bool, Optional[str]]:
|
26
33
|
"""
|
27
34
|
Perform a connection check for the stream.
|
28
35
|
|
@@ -32,7 +39,10 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
|
|
32
39
|
|
33
40
|
@abstractmethod
|
34
41
|
def check_availability_and_parsability(
|
35
|
-
self,
|
42
|
+
self,
|
43
|
+
stream: AbstractFileBasedStream,
|
44
|
+
logger: logging.Logger,
|
45
|
+
_: Optional[Source],
|
36
46
|
) -> Tuple[bool, Optional[str]]:
|
37
47
|
"""
|
38
48
|
Performs a connection check for the stream, as well as additional checks that
|
@@ -44,14 +54,20 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
|
|
44
54
|
|
45
55
|
|
46
56
|
class AbstractFileBasedAvailabilityStrategyWrapper(AbstractAvailabilityStrategy):
|
47
|
-
def __init__(self, stream:
|
57
|
+
def __init__(self, stream: AbstractFileBasedStream) -> None:
|
48
58
|
self.stream = stream
|
49
59
|
|
50
60
|
def check_availability(self, logger: logging.Logger) -> StreamAvailability:
|
51
|
-
is_available, reason = self.stream.availability_strategy.check_availability(
|
61
|
+
is_available, reason = self.stream.availability_strategy.check_availability(
|
62
|
+
self.stream, logger, None
|
63
|
+
)
|
52
64
|
if is_available:
|
53
65
|
return StreamAvailable()
|
54
66
|
return StreamUnavailable(reason or "")
|
55
67
|
|
56
|
-
def check_availability_and_parsability(
|
57
|
-
|
68
|
+
def check_availability_and_parsability(
|
69
|
+
self, logger: logging.Logger
|
70
|
+
) -> Tuple[bool, Optional[str]]:
|
71
|
+
return self.stream.availability_strategy.check_availability_and_parsability(
|
72
|
+
self.stream, logger, None
|
73
|
+
)
|
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py
CHANGED
@@ -2,13 +2,22 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from __future__ import annotations
|
6
|
+
|
5
7
|
import logging
|
6
8
|
import traceback
|
7
9
|
from typing import TYPE_CHECKING, Optional, Tuple
|
8
10
|
|
11
|
+
from airbyte_cdk import AirbyteTracedException
|
9
12
|
from airbyte_cdk.sources import Source
|
10
|
-
from airbyte_cdk.sources.file_based.availability_strategy import
|
11
|
-
|
13
|
+
from airbyte_cdk.sources.file_based.availability_strategy import (
|
14
|
+
AbstractFileBasedAvailabilityStrategy,
|
15
|
+
)
|
16
|
+
from airbyte_cdk.sources.file_based.exceptions import (
|
17
|
+
CheckAvailabilityError,
|
18
|
+
CustomFileBasedException,
|
19
|
+
FileBasedSourceError,
|
20
|
+
)
|
12
21
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
|
13
22
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
14
23
|
from airbyte_cdk.sources.file_based.schema_helpers import conforms_to_schema
|
@@ -18,10 +27,15 @@ if TYPE_CHECKING:
|
|
18
27
|
|
19
28
|
|
20
29
|
class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy):
|
21
|
-
def __init__(self, stream_reader: AbstractFileBasedStreamReader):
|
30
|
+
def __init__(self, stream_reader: AbstractFileBasedStreamReader) -> None:
|
22
31
|
self.stream_reader = stream_reader
|
23
32
|
|
24
|
-
def check_availability(
|
33
|
+
def check_availability( # type: ignore[override] # Signature doesn't match base class
|
34
|
+
self,
|
35
|
+
stream: AbstractFileBasedStream,
|
36
|
+
logger: logging.Logger,
|
37
|
+
_: Optional[Source],
|
38
|
+
) -> Tuple[bool, Optional[str]]:
|
25
39
|
"""
|
26
40
|
Perform a connection check for the stream (verify that we can list files from the stream).
|
27
41
|
|
@@ -35,7 +49,10 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
35
49
|
return True, None
|
36
50
|
|
37
51
|
def check_availability_and_parsability(
|
38
|
-
self,
|
52
|
+
self,
|
53
|
+
stream: AbstractFileBasedStream,
|
54
|
+
logger: logging.Logger,
|
55
|
+
_: Optional[Source],
|
39
56
|
) -> Tuple[bool, Optional[str]]:
|
40
57
|
"""
|
41
58
|
Perform a connection check for the stream.
|
@@ -66,12 +83,14 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
66
83
|
# If the parser is set to not check parsability, we still want to check that we can open the file.
|
67
84
|
handle = stream.stream_reader.open_file(file, parser.file_read_mode, None, logger)
|
68
85
|
handle.close()
|
86
|
+
except AirbyteTracedException as ate:
|
87
|
+
raise ate
|
69
88
|
except CheckAvailabilityError:
|
70
89
|
return False, "".join(traceback.format_exc())
|
71
90
|
|
72
91
|
return True, None
|
73
92
|
|
74
|
-
def _check_list_files(self, stream:
|
93
|
+
def _check_list_files(self, stream: AbstractFileBasedStream) -> RemoteFile:
|
75
94
|
"""
|
76
95
|
Check that we can list files from the stream.
|
77
96
|
|
@@ -84,22 +103,39 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
84
103
|
except CustomFileBasedException as exc:
|
85
104
|
raise CheckAvailabilityError(str(exc), stream=stream.name) from exc
|
86
105
|
except Exception as exc:
|
87
|
-
raise CheckAvailabilityError(
|
106
|
+
raise CheckAvailabilityError(
|
107
|
+
FileBasedSourceError.ERROR_LISTING_FILES, stream=stream.name
|
108
|
+
) from exc
|
88
109
|
|
89
110
|
return file
|
90
111
|
|
91
|
-
def _check_parse_record(
|
112
|
+
def _check_parse_record(
|
113
|
+
self,
|
114
|
+
stream: AbstractFileBasedStream,
|
115
|
+
file: RemoteFile,
|
116
|
+
logger: logging.Logger,
|
117
|
+
) -> None:
|
92
118
|
parser = stream.get_parser()
|
93
119
|
|
94
120
|
try:
|
95
|
-
record = next(
|
121
|
+
record = next(
|
122
|
+
iter(
|
123
|
+
parser.parse_records(
|
124
|
+
stream.config, file, self.stream_reader, logger, discovered_schema=None
|
125
|
+
)
|
126
|
+
)
|
127
|
+
)
|
96
128
|
except StopIteration:
|
97
129
|
# The file is empty. We've verified that we can open it, so will
|
98
130
|
# consider the connection check successful even though it means
|
99
131
|
# we skip the schema validation check.
|
100
132
|
return
|
133
|
+
except AirbyteTracedException as ate:
|
134
|
+
raise ate
|
101
135
|
except Exception as exc:
|
102
|
-
raise CheckAvailabilityError(
|
136
|
+
raise CheckAvailabilityError(
|
137
|
+
FileBasedSourceError.ERROR_READING_FILE, stream=stream.name, file=file.uri
|
138
|
+
) from exc
|
103
139
|
|
104
140
|
schema = stream.catalog_schema or stream.config.input_schema
|
105
141
|
if schema and stream.validation_policy.validate_schema_before_sync:
|
@@ -4,12 +4,32 @@
|
|
4
4
|
|
5
5
|
import copy
|
6
6
|
from abc import abstractmethod
|
7
|
-
from typing import Any, Dict, List, Optional
|
7
|
+
from typing import Any, Dict, List, Literal, Optional, Union
|
8
8
|
|
9
|
-
import dpath
|
9
|
+
import dpath
|
10
|
+
from pydantic.v1 import AnyUrl, BaseModel, Field
|
11
|
+
|
12
|
+
from airbyte_cdk import OneOfOptionConfig
|
10
13
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
11
14
|
from airbyte_cdk.sources.utils import schema_helpers
|
12
|
-
|
15
|
+
|
16
|
+
|
17
|
+
class DeliverRecords(BaseModel):
|
18
|
+
class Config(OneOfOptionConfig):
|
19
|
+
title = "Replicate Records"
|
20
|
+
description = "Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination."
|
21
|
+
discriminator = "delivery_type"
|
22
|
+
|
23
|
+
delivery_type: Literal["use_records_transfer"] = Field("use_records_transfer", const=True)
|
24
|
+
|
25
|
+
|
26
|
+
class DeliverRawFiles(BaseModel):
|
27
|
+
class Config(OneOfOptionConfig):
|
28
|
+
title = "Copy Raw Files"
|
29
|
+
description = "Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files."
|
30
|
+
discriminator = "delivery_type"
|
31
|
+
|
32
|
+
delivery_type: Literal["use_file_transfer"] = Field("use_file_transfer", const=True)
|
13
33
|
|
14
34
|
|
15
35
|
class AbstractFileBasedSpec(BaseModel):
|
@@ -34,6 +54,17 @@ class AbstractFileBasedSpec(BaseModel):
|
|
34
54
|
order=10,
|
35
55
|
)
|
36
56
|
|
57
|
+
delivery_method: Union[DeliverRecords, DeliverRawFiles] = Field(
|
58
|
+
title="Delivery Method",
|
59
|
+
discriminator="delivery_type",
|
60
|
+
type="object",
|
61
|
+
order=7,
|
62
|
+
display_type="radio",
|
63
|
+
group="advanced",
|
64
|
+
default="use_records_transfer",
|
65
|
+
airbyte_hidden=True,
|
66
|
+
)
|
67
|
+
|
37
68
|
@classmethod
|
38
69
|
@abstractmethod
|
39
70
|
def documentation_url(cls) -> AnyUrl:
|
@@ -57,7 +88,7 @@ class AbstractFileBasedSpec(BaseModel):
|
|
57
88
|
@staticmethod
|
58
89
|
def remove_discriminator(schema: Dict[str, Any]) -> None:
|
59
90
|
"""pydantic adds "discriminator" to the schema for oneOfs, which is not treated right by the platform as we inline all references"""
|
60
|
-
dpath.
|
91
|
+
dpath.delete(schema, "properties/**/discriminator")
|
61
92
|
|
62
93
|
@staticmethod
|
63
94
|
def replace_enum_allOf_and_anyOf(schema: Dict[str, Any]) -> Dict[str, Any]:
|
@@ -77,10 +108,16 @@ class AbstractFileBasedSpec(BaseModel):
|
|
77
108
|
|
78
109
|
properties_to_change = ["validation_policy"]
|
79
110
|
for property_to_change in properties_to_change:
|
80
|
-
property_object = schema["properties"]["streams"]["items"]["properties"][
|
111
|
+
property_object = schema["properties"]["streams"]["items"]["properties"][
|
112
|
+
property_to_change
|
113
|
+
]
|
81
114
|
if "anyOf" in property_object:
|
82
|
-
schema["properties"]["streams"]["items"]["properties"][property_to_change][
|
83
|
-
|
115
|
+
schema["properties"]["streams"]["items"]["properties"][property_to_change][
|
116
|
+
"type"
|
117
|
+
] = "object"
|
118
|
+
schema["properties"]["streams"]["items"]["properties"][property_to_change][
|
119
|
+
"oneOf"
|
120
|
+
] = property_object.pop("anyOf")
|
84
121
|
AbstractFileBasedSpec.move_enum_to_root(property_object)
|
85
122
|
|
86
123
|
csv_format_schemas = list(
|
@@ -91,9 +128,9 @@ class AbstractFileBasedSpec(BaseModel):
|
|
91
128
|
)
|
92
129
|
if len(csv_format_schemas) != 1:
|
93
130
|
raise ValueError(f"Expecting only one CSV format but got {csv_format_schemas}")
|
94
|
-
csv_format_schemas[0]["properties"]["header_definition"]["oneOf"] = csv_format_schemas[0][
|
95
|
-
"
|
96
|
-
)
|
131
|
+
csv_format_schemas[0]["properties"]["header_definition"]["oneOf"] = csv_format_schemas[0][
|
132
|
+
"properties"
|
133
|
+
]["header_definition"].pop("anyOf", [])
|
97
134
|
csv_format_schemas[0]["properties"]["header_definition"]["type"] = "object"
|
98
135
|
return schema
|
99
136
|
|
@@ -6,8 +6,10 @@ import codecs
|
|
6
6
|
from enum import Enum
|
7
7
|
from typing import Any, Dict, List, Optional, Set, Union
|
8
8
|
|
9
|
+
from pydantic.v1 import BaseModel, Field, root_validator, validator
|
10
|
+
from pydantic.v1.error_wrappers import ValidationError
|
11
|
+
|
9
12
|
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
10
|
-
from pydantic import BaseModel, Field, ValidationError, root_validator, validator
|
11
13
|
|
12
14
|
|
13
15
|
class InferenceType(Enum):
|
@@ -69,7 +71,9 @@ class CsvHeaderUserProvided(BaseModel):
|
|
69
71
|
@validator("column_names")
|
70
72
|
def validate_column_names(cls, v: List[str]) -> List[str]:
|
71
73
|
if not v:
|
72
|
-
raise ValueError(
|
74
|
+
raise ValueError(
|
75
|
+
"At least one column name needs to be provided when using user provided headers"
|
76
|
+
)
|
73
77
|
return v
|
74
78
|
|
75
79
|
|
@@ -106,7 +110,9 @@ class CsvFormat(BaseModel):
|
|
106
110
|
description='The character encoding of the CSV data. Leave blank to default to <strong>UTF8</strong>. See <a href="https://docs.python.org/3/library/codecs.html#standard-encodings" target="_blank">list of python encodings</a> for allowable options.',
|
107
111
|
)
|
108
112
|
double_quote: bool = Field(
|
109
|
-
title="Double Quote",
|
113
|
+
title="Double Quote",
|
114
|
+
default=True,
|
115
|
+
description="Whether two quotes in a quoted CSV value denote a single quote in the data.",
|
110
116
|
)
|
111
117
|
null_values: Set[str] = Field(
|
112
118
|
title="Null Values",
|
@@ -124,12 +130,16 @@ class CsvFormat(BaseModel):
|
|
124
130
|
description="The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field.",
|
125
131
|
)
|
126
132
|
skip_rows_after_header: int = Field(
|
127
|
-
title="Skip Rows After Header",
|
133
|
+
title="Skip Rows After Header",
|
134
|
+
default=0,
|
135
|
+
description="The number of rows to skip after the header row.",
|
128
136
|
)
|
129
|
-
header_definition: Union[CsvHeaderFromCsv, CsvHeaderAutogenerated, CsvHeaderUserProvided] =
|
130
|
-
|
131
|
-
|
132
|
-
|
137
|
+
header_definition: Union[CsvHeaderFromCsv, CsvHeaderAutogenerated, CsvHeaderUserProvided] = (
|
138
|
+
Field(
|
139
|
+
title="CSV Header Definition",
|
140
|
+
default=CsvHeaderFromCsv(header_definition_type=CsvHeaderDefinitionType.FROM_CSV.value),
|
141
|
+
description="How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.",
|
142
|
+
)
|
133
143
|
)
|
134
144
|
true_values: Set[str] = Field(
|
135
145
|
title="True Values",
|
@@ -147,6 +157,11 @@ class CsvFormat(BaseModel):
|
|
147
157
|
description="How to infer the types of the columns. If none, inference default to strings.",
|
148
158
|
airbyte_hidden=True,
|
149
159
|
)
|
160
|
+
ignore_errors_on_fields_mismatch: bool = Field(
|
161
|
+
title="Ignore errors on field mismatch",
|
162
|
+
default=False,
|
163
|
+
description="Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema.",
|
164
|
+
)
|
150
165
|
|
151
166
|
@validator("delimiter")
|
152
167
|
def validate_delimiter(cls, v: str) -> str:
|
@@ -183,9 +198,13 @@ class CsvFormat(BaseModel):
|
|
183
198
|
definition_type = values.get("header_definition_type")
|
184
199
|
column_names = values.get("user_provided_column_names")
|
185
200
|
if definition_type == CsvHeaderDefinitionType.USER_PROVIDED and not column_names:
|
186
|
-
raise ValidationError(
|
201
|
+
raise ValidationError(
|
202
|
+
"`user_provided_column_names` should be defined if the definition 'User Provided'.",
|
203
|
+
model=CsvFormat,
|
204
|
+
)
|
187
205
|
if definition_type != CsvHeaderDefinitionType.USER_PROVIDED and column_names:
|
188
206
|
raise ValidationError(
|
189
|
-
"`user_provided_column_names` should not be defined if the definition is not 'User Provided'.",
|
207
|
+
"`user_provided_column_names` should not be defined if the definition is not 'User Provided'.",
|
208
|
+
model=CsvFormat,
|
190
209
|
)
|
191
210
|
return values
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from pydantic.v1 import BaseModel, Field
|
6
|
+
|
7
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
8
|
+
|
9
|
+
|
10
|
+
class ExcelFormat(BaseModel):
|
11
|
+
class Config(OneOfOptionConfig):
|
12
|
+
title = "Excel Format"
|
13
|
+
discriminator = "filetype"
|
14
|
+
|
15
|
+
filetype: str = Field(
|
16
|
+
"excel",
|
17
|
+
const=True,
|
18
|
+
)
|
@@ -1,18 +1,20 @@
|
|
1
1
|
#
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
5
|
from enum import Enum
|
6
6
|
from typing import Any, List, Mapping, Optional, Union
|
7
7
|
|
8
|
+
from pydantic.v1 import BaseModel, Field, validator
|
9
|
+
|
8
10
|
from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
|
9
11
|
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
12
|
+
from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat
|
10
13
|
from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
|
11
14
|
from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
|
12
15
|
from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat
|
13
16
|
from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError
|
14
17
|
from airbyte_cdk.sources.file_based.schema_helpers import type_mapping_to_jsonschema
|
15
|
-
from pydantic import BaseModel, Field, validator
|
16
18
|
|
17
19
|
PrimaryKeyType = Optional[Union[str, List[str]]]
|
18
20
|
|
@@ -55,7 +57,9 @@ class FileBasedStreamConfig(BaseModel):
|
|
55
57
|
description="When the state history of the file store is full, syncs will only read files that were last modified in the provided day range.",
|
56
58
|
default=3,
|
57
59
|
)
|
58
|
-
format: Union[
|
60
|
+
format: Union[
|
61
|
+
AvroFormat, CsvFormat, JsonlFormat, ParquetFormat, UnstructuredFormat, ExcelFormat
|
62
|
+
] = Field(
|
59
63
|
title="Format",
|
60
64
|
description="The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.",
|
61
65
|
)
|
@@ -64,6 +68,12 @@ class FileBasedStreamConfig(BaseModel):
|
|
64
68
|
description="When enabled, syncs will not validate or structure records against the stream's schema.",
|
65
69
|
default=False,
|
66
70
|
)
|
71
|
+
recent_n_files_to_read_for_schema_discovery: Optional[int] = Field(
|
72
|
+
title="Files To Read For Schema Discover",
|
73
|
+
description="The number of resent files which will be used to discover the schema for this stream.",
|
74
|
+
default=None,
|
75
|
+
gt=0,
|
76
|
+
)
|
67
77
|
|
68
78
|
@validator("input_schema", pre=True)
|
69
79
|
def validate_input_schema(cls, v: Optional[str]) -> Optional[str]:
|
@@ -82,6 +92,8 @@ class FileBasedStreamConfig(BaseModel):
|
|
82
92
|
if self.input_schema:
|
83
93
|
schema = type_mapping_to_jsonschema(self.input_schema)
|
84
94
|
if not schema:
|
85
|
-
raise ValueError(
|
95
|
+
raise ValueError(
|
96
|
+
f"Unable to create JSON schema from input schema {self.input_schema}"
|
97
|
+
)
|
86
98
|
return schema
|
87
99
|
return None
|
@@ -4,8 +4,9 @@
|
|
4
4
|
|
5
5
|
from typing import List, Literal, Optional, Union
|
6
6
|
|
7
|
+
from pydantic.v1 import BaseModel, Field
|
8
|
+
|
7
9
|
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
8
|
-
from pydantic import BaseModel, Field
|
9
10
|
|
10
11
|
|
11
12
|
class LocalProcessingConfigModel(BaseModel):
|
@@ -13,7 +14,9 @@ class LocalProcessingConfigModel(BaseModel):
|
|
13
14
|
|
14
15
|
class Config(OneOfOptionConfig):
|
15
16
|
title = "Local"
|
16
|
-
description =
|
17
|
+
description = (
|
18
|
+
"Process files locally, supporting `fast` and `ocr` modes. This is the default option."
|
19
|
+
)
|
17
20
|
discriminator = "mode"
|
18
21
|
|
19
22
|
|
@@ -23,7 +26,9 @@ class APIParameterConfigModel(BaseModel):
|
|
23
26
|
description="The name of the unstructured API parameter to use",
|
24
27
|
examples=["combine_under_n_chars", "languages"],
|
25
28
|
)
|
26
|
-
value: str = Field(
|
29
|
+
value: str = Field(
|
30
|
+
title="Value", description="The value of the parameter", examples=["true", "hi_res"]
|
31
|
+
)
|
27
32
|
|
28
33
|
|
29
34
|
class APIProcessingConfigModel(BaseModel):
|
@@ -60,7 +65,7 @@ class APIProcessingConfigModel(BaseModel):
|
|
60
65
|
|
61
66
|
class UnstructuredFormat(BaseModel):
|
62
67
|
class Config(OneOfOptionConfig):
|
63
|
-
title = "Document
|
68
|
+
title = "Unstructured Document Format"
|
64
69
|
description = "Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file."
|
65
70
|
discriminator = "filetype"
|
66
71
|
|
@@ -85,7 +90,10 @@ class UnstructuredFormat(BaseModel):
|
|
85
90
|
description="The strategy used to parse documents. `fast` extracts text directly from the document which doesn't work for all files. `ocr_only` is more reliable, but slower. `hi_res` is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf",
|
86
91
|
)
|
87
92
|
|
88
|
-
processing: Union[
|
93
|
+
processing: Union[
|
94
|
+
LocalProcessingConfigModel,
|
95
|
+
APIProcessingConfigModel,
|
96
|
+
] = Field(
|
89
97
|
default=LocalProcessingConfigModel(mode="local"),
|
90
98
|
title="Processing",
|
91
99
|
description="Processing configuration",
|
@@ -1,4 +1,8 @@
|
|
1
|
-
from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import
|
2
|
-
|
1
|
+
from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import (
|
2
|
+
AbstractDiscoveryPolicy,
|
3
|
+
)
|
4
|
+
from airbyte_cdk.sources.file_based.discovery_policy.default_discovery_policy import (
|
5
|
+
DefaultDiscoveryPolicy,
|
6
|
+
)
|
3
7
|
|
4
8
|
__all__ = ["AbstractDiscoveryPolicy", "DefaultDiscoveryPolicy"]
|
@@ -15,9 +15,7 @@ class AbstractDiscoveryPolicy(ABC):
|
|
15
15
|
|
16
16
|
@property
|
17
17
|
@abstractmethod
|
18
|
-
def n_concurrent_requests(self) -> int:
|
19
|
-
...
|
18
|
+
def n_concurrent_requests(self) -> int: ...
|
20
19
|
|
21
20
|
@abstractmethod
|
22
|
-
def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int:
|
23
|
-
...
|
21
|
+
def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int: ...
|
@@ -2,7 +2,9 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import
|
5
|
+
from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import (
|
6
|
+
AbstractDiscoveryPolicy,
|
7
|
+
)
|
6
8
|
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
7
9
|
|
8
10
|
DEFAULT_N_CONCURRENT_REQUESTS = 10
|
@@ -23,6 +25,9 @@ class DefaultDiscoveryPolicy(AbstractDiscoveryPolicy):
|
|
23
25
|
return min(
|
24
26
|
filter(
|
25
27
|
None,
|
26
|
-
(
|
28
|
+
(
|
29
|
+
DEFAULT_MAX_N_FILES_FOR_STREAM_SCHEMA_INFERENCE,
|
30
|
+
parser.parser_max_n_files_for_schema_inference,
|
31
|
+
),
|
27
32
|
)
|
28
33
|
)
|