airbyte-cdk 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +358 -0
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +104 -0
- airbyte_cdk/connector.py +123 -0
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/__init__.py +3 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
- airbyte_cdk/connector_builder/main.py +107 -0
- airbyte_cdk/connector_builder/models.py +73 -0
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +83 -0
- airbyte_cdk/destinations/__init__.py +8 -0
- airbyte_cdk/destinations/destination.py +154 -0
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
- airbyte_cdk/destinations/vector_db_based/config.py +298 -0
- airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
- airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
- airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
- airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
- airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
- airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
- airbyte_cdk/entrypoint.py +414 -0
- airbyte_cdk/exception_handler.py +56 -0
- airbyte_cdk/logger.py +109 -0
- airbyte_cdk/models/__init__.py +72 -0
- airbyte_cdk/models/airbyte_protocol.py +88 -0
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/well_known_types.py +5 -0
- airbyte_cdk/py.typed +0 -0
- airbyte_cdk/sources/__init__.py +26 -0
- airbyte_cdk/sources/abstract_source.py +326 -0
- airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
- airbyte_cdk/sources/config.py +27 -0
- airbyte_cdk/sources/connector_state_manager.py +161 -0
- airbyte_cdk/sources/declarative/__init__.py +3 -0
- airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
- airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
- airbyte_cdk/sources/declarative/auth/token.py +267 -0
- airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
- airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
- airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
- airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
- airbyte_cdk/sources/declarative/declarative_source.py +36 -0
- airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
- airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
- airbyte_cdk/sources/declarative/exceptions.py +9 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
- airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
- airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
- airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
- airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
- airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
- airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
- airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
- airbyte_cdk/sources/declarative/spec/spec.py +48 -0
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
- airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
- airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
- airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
- airbyte_cdk/sources/declarative/types.py +25 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
- airbyte_cdk/sources/file_based/config/__init__.py +0 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
- airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
- airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
- airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
- airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
- airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
- airbyte_cdk/sources/file_based/exceptions.py +159 -0
- airbyte_cdk/sources/file_based/file_based_source.py +466 -0
- airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
- airbyte_cdk/sources/file_based/file_record_data.py +22 -0
- airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
- airbyte_cdk/sources/file_based/remote_file.py +18 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
- airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
- airbyte_cdk/sources/file_based/types.py +10 -0
- airbyte_cdk/sources/http_config.py +10 -0
- airbyte_cdk/sources/http_logger.py +55 -0
- airbyte_cdk/sources/message/__init__.py +19 -0
- airbyte_cdk/sources/message/repository.py +137 -0
- airbyte_cdk/sources/source.py +95 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/__init__.py +8 -0
- airbyte_cdk/sources/streams/availability_strategy.py +84 -0
- airbyte_cdk/sources/streams/call_rate.py +704 -0
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
- airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
- airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
- airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/core.py +703 -0
- airbyte_cdk/sources/streams/http/__init__.py +10 -0
- airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +61 -0
- airbyte_cdk/sources/streams/http/http.py +673 -0
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/sources/streams/utils/__init__.py +3 -0
- airbyte_cdk/sources/types.py +169 -0
- airbyte_cdk/sources/utils/__init__.py +7 -0
- airbyte_cdk/sources/utils/casing.py +12 -0
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +53 -0
- airbyte_cdk/sources/utils/schema_helpers.py +230 -0
- airbyte_cdk/sources/utils/slice_logger.py +57 -0
- airbyte_cdk/sources/utils/transform.py +277 -0
- airbyte_cdk/sources/utils/types.py +7 -0
- airbyte_cdk/sql/__init__.py +0 -0
- airbyte_cdk/sql/_util/__init__.py +0 -0
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/__init__.py +7 -0
- airbyte_cdk/test/catalog_builder.py +81 -0
- airbyte_cdk/test/entrypoint_wrapper.py +250 -0
- airbyte_cdk/test/mock_http/__init__.py +6 -0
- airbyte_cdk/test/mock_http/matcher.py +41 -0
- airbyte_cdk/test/mock_http/mocker.py +185 -0
- airbyte_cdk/test/mock_http/request.py +103 -0
- airbyte_cdk/test/mock_http/response.py +28 -0
- airbyte_cdk/test/mock_http/response_builder.py +237 -0
- airbyte_cdk/test/state_builder.py +33 -0
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +10 -0
- airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
- airbyte_cdk/utils/analytics_message.py +25 -0
- airbyte_cdk/utils/constants.py +5 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/event_timing.py +85 -0
- airbyte_cdk/utils/is_cloud_environment.py +18 -0
- airbyte_cdk/utils/mapping_helpers.py +162 -0
- airbyte_cdk/utils/message_utils.py +26 -0
- airbyte_cdk/utils/oneof_option_config.py +33 -0
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +270 -0
- airbyte_cdk/utils/slice_hasher.py +37 -0
- airbyte_cdk/utils/spec_schema_transformations.py +26 -0
- airbyte_cdk/utils/stream_status_utils.py +43 -0
- airbyte_cdk/utils/traced_exception.py +145 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
- airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
- airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
- airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
- airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from airbyte_cdk.sources.declarative.schema.default_schema_loader import DefaultSchemaLoader
|
|
6
|
+
from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import (
|
|
7
|
+
ComplexFieldType,
|
|
8
|
+
DynamicSchemaLoader,
|
|
9
|
+
SchemaTypeIdentifier,
|
|
10
|
+
TypesMap,
|
|
11
|
+
)
|
|
12
|
+
from airbyte_cdk.sources.declarative.schema.inline_schema_loader import InlineSchemaLoader
|
|
13
|
+
from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
|
|
14
|
+
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"JsonFileSchemaLoader",
|
|
18
|
+
"DefaultSchemaLoader",
|
|
19
|
+
"SchemaLoader",
|
|
20
|
+
"InlineSchemaLoader",
|
|
21
|
+
"DynamicSchemaLoader",
|
|
22
|
+
"ComplexFieldType",
|
|
23
|
+
"TypesMap",
|
|
24
|
+
"SchemaTypeIdentifier",
|
|
25
|
+
]
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from dataclasses import InitVar, dataclass
|
|
7
|
+
from typing import Any, Mapping
|
|
8
|
+
|
|
9
|
+
from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
|
|
10
|
+
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
|
11
|
+
from airbyte_cdk.sources.types import Config
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class DefaultSchemaLoader(SchemaLoader):
|
|
16
|
+
"""
|
|
17
|
+
Loads a schema from the default location or returns an empty schema for streams that have not defined their schema file yet.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
config (Config): The user-provided configuration as specified by the source's spec
|
|
21
|
+
parameters (Mapping[str, Any]): Additional arguments to pass to the string interpolation if needed
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
config: Config
|
|
25
|
+
parameters: InitVar[Mapping[str, Any]]
|
|
26
|
+
|
|
27
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
|
28
|
+
self._parameters = parameters
|
|
29
|
+
self.default_loader = JsonFileSchemaLoader(parameters=parameters, config=self.config)
|
|
30
|
+
|
|
31
|
+
def get_json_schema(self) -> Mapping[str, Any]:
|
|
32
|
+
"""
|
|
33
|
+
Attempts to retrieve a schema from the default filepath location or returns the empty schema if a schema cannot be found.
|
|
34
|
+
|
|
35
|
+
:return: The empty schema
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
return self.default_loader.get_json_schema()
|
|
40
|
+
except OSError:
|
|
41
|
+
# A slight hack since we don't directly have the stream name. However, when building the default filepath we assume the
|
|
42
|
+
# runtime options stores stream name 'name' so we'll do the same here
|
|
43
|
+
stream_name = self._parameters.get("name", "")
|
|
44
|
+
logging.info(
|
|
45
|
+
f"Could not find schema for stream {stream_name}, defaulting to the empty schema"
|
|
46
|
+
)
|
|
47
|
+
return {}
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
from copy import deepcopy
|
|
7
|
+
from dataclasses import InitVar, dataclass, field
|
|
8
|
+
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
|
9
|
+
|
|
10
|
+
import dpath
|
|
11
|
+
from typing_extensions import deprecated
|
|
12
|
+
|
|
13
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
|
14
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
|
15
|
+
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
|
16
|
+
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
|
17
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
|
18
|
+
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
|
19
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
|
20
|
+
|
|
21
|
+
AIRBYTE_DATA_TYPES: Mapping[str, MutableMapping[str, Any]] = {
|
|
22
|
+
"string": {"type": ["null", "string"]},
|
|
23
|
+
"boolean": {"type": ["null", "boolean"]},
|
|
24
|
+
"date": {"type": ["null", "string"], "format": "date"},
|
|
25
|
+
"timestamp_without_timezone": {
|
|
26
|
+
"type": ["null", "string"],
|
|
27
|
+
"format": "date-time",
|
|
28
|
+
"airbyte_type": "timestamp_without_timezone",
|
|
29
|
+
},
|
|
30
|
+
"timestamp_with_timezone": {"type": ["null", "string"], "format": "date-time"},
|
|
31
|
+
"time_without_timezone": {
|
|
32
|
+
"type": ["null", "string"],
|
|
33
|
+
"format": "time",
|
|
34
|
+
"airbyte_type": "time_without_timezone",
|
|
35
|
+
},
|
|
36
|
+
"time_with_timezone": {
|
|
37
|
+
"type": ["null", "string"],
|
|
38
|
+
"format": "time",
|
|
39
|
+
"airbyte_type": "time_with_timezone",
|
|
40
|
+
},
|
|
41
|
+
"integer": {"type": ["null", "integer"]},
|
|
42
|
+
"number": {"type": ["null", "number"]},
|
|
43
|
+
"array": {"type": ["null", "array"]},
|
|
44
|
+
"object": {"type": ["null", "object"]},
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
|
49
|
+
@dataclass(frozen=True)
|
|
50
|
+
class ComplexFieldType:
|
|
51
|
+
"""
|
|
52
|
+
Identifies complex field type
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
field_type: str
|
|
56
|
+
items: Optional[Union[str, "ComplexFieldType"]] = None
|
|
57
|
+
|
|
58
|
+
def __post_init__(self) -> None:
|
|
59
|
+
"""
|
|
60
|
+
Enforces that `items` is only used when `field_type` is a array
|
|
61
|
+
"""
|
|
62
|
+
# `items_type` is valid only for array target types
|
|
63
|
+
if self.items and self.field_type != "array":
|
|
64
|
+
raise ValueError("'items' can only be used when 'field_type' is an array.")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
|
68
|
+
@dataclass(frozen=True)
|
|
69
|
+
class TypesMap:
|
|
70
|
+
"""
|
|
71
|
+
Represents a mapping between a current type and its corresponding target type.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
target_type: Union[List[str], str, ComplexFieldType]
|
|
75
|
+
current_type: Union[List[str], str]
|
|
76
|
+
condition: Optional[str]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
|
80
|
+
@dataclass
|
|
81
|
+
class SchemaTypeIdentifier:
|
|
82
|
+
"""
|
|
83
|
+
Identifies schema details for dynamic schema extraction and processing.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
key_pointer: List[Union[InterpolatedString, str]]
|
|
87
|
+
parameters: InitVar[Mapping[str, Any]]
|
|
88
|
+
type_pointer: Optional[List[Union[InterpolatedString, str]]] = None
|
|
89
|
+
types_mapping: Optional[List[TypesMap]] = None
|
|
90
|
+
schema_pointer: Optional[List[Union[InterpolatedString, str]]] = None
|
|
91
|
+
|
|
92
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
|
93
|
+
self.schema_pointer = (
|
|
94
|
+
self._update_pointer(self.schema_pointer, parameters) if self.schema_pointer else []
|
|
95
|
+
) # type: ignore[assignment] # This is reqired field in model
|
|
96
|
+
self.key_pointer = self._update_pointer(self.key_pointer, parameters) # type: ignore[assignment] # This is reqired field in model
|
|
97
|
+
self.type_pointer = (
|
|
98
|
+
self._update_pointer(self.type_pointer, parameters) if self.type_pointer else None
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
@staticmethod
|
|
102
|
+
def _update_pointer(
|
|
103
|
+
pointer: Optional[List[Union[InterpolatedString, str]]], parameters: Mapping[str, Any]
|
|
104
|
+
) -> Optional[List[Union[InterpolatedString, str]]]:
|
|
105
|
+
return (
|
|
106
|
+
[
|
|
107
|
+
InterpolatedString.create(path, parameters=parameters)
|
|
108
|
+
if isinstance(path, str)
|
|
109
|
+
else path
|
|
110
|
+
for path in pointer
|
|
111
|
+
]
|
|
112
|
+
if pointer
|
|
113
|
+
else None
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
|
118
|
+
@dataclass
|
|
119
|
+
class DynamicSchemaLoader(SchemaLoader):
|
|
120
|
+
"""
|
|
121
|
+
Dynamically loads a JSON Schema by extracting data from retrieved records.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
retriever: Retriever
|
|
125
|
+
config: Config
|
|
126
|
+
parameters: InitVar[Mapping[str, Any]]
|
|
127
|
+
schema_type_identifier: SchemaTypeIdentifier
|
|
128
|
+
schema_transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
|
129
|
+
|
|
130
|
+
def get_json_schema(self) -> Mapping[str, Any]:
|
|
131
|
+
"""
|
|
132
|
+
Constructs a JSON Schema based on retrieved data.
|
|
133
|
+
"""
|
|
134
|
+
properties = {}
|
|
135
|
+
retrieved_record = next(self.retriever.read_records({}), None) # type: ignore[call-overload] # read_records return Iterable data type
|
|
136
|
+
|
|
137
|
+
raw_schema = (
|
|
138
|
+
self._extract_data(
|
|
139
|
+
retrieved_record, # type: ignore[arg-type] # Expected that retrieved_record will be only Mapping[str, Any]
|
|
140
|
+
self.schema_type_identifier.schema_pointer,
|
|
141
|
+
)
|
|
142
|
+
if retrieved_record
|
|
143
|
+
else []
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
for property_definition in raw_schema:
|
|
147
|
+
key = self._get_key(property_definition, self.schema_type_identifier.key_pointer)
|
|
148
|
+
value = self._get_type(
|
|
149
|
+
property_definition,
|
|
150
|
+
self.schema_type_identifier.type_pointer,
|
|
151
|
+
)
|
|
152
|
+
properties[key] = value
|
|
153
|
+
|
|
154
|
+
transformed_properties = self._transform(properties, {})
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
"$schema": "https://json-schema.org/draft-07/schema#",
|
|
158
|
+
"type": "object",
|
|
159
|
+
"additionalProperties": True,
|
|
160
|
+
"properties": transformed_properties,
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
def _transform(
|
|
164
|
+
self,
|
|
165
|
+
properties: Mapping[str, Any],
|
|
166
|
+
stream_state: StreamState,
|
|
167
|
+
stream_slice: Optional[StreamSlice] = None,
|
|
168
|
+
) -> Mapping[str, Any]:
|
|
169
|
+
for transformation in self.schema_transformations:
|
|
170
|
+
transformation.transform(
|
|
171
|
+
properties, # type: ignore # properties has type Mapping[str, Any], but Dict[str, Any] expected
|
|
172
|
+
config=self.config,
|
|
173
|
+
)
|
|
174
|
+
return properties
|
|
175
|
+
|
|
176
|
+
def _get_key(
|
|
177
|
+
self,
|
|
178
|
+
raw_schema: MutableMapping[str, Any],
|
|
179
|
+
field_key_path: List[Union[InterpolatedString, str]],
|
|
180
|
+
) -> str:
|
|
181
|
+
"""
|
|
182
|
+
Extracts the key field from the schema using the specified path.
|
|
183
|
+
"""
|
|
184
|
+
field_key = self._extract_data(raw_schema, field_key_path)
|
|
185
|
+
if not isinstance(field_key, str):
|
|
186
|
+
raise ValueError(f"Expected key to be a string. Got {field_key}")
|
|
187
|
+
return field_key
|
|
188
|
+
|
|
189
|
+
def _get_type(
|
|
190
|
+
self,
|
|
191
|
+
raw_schema: MutableMapping[str, Any],
|
|
192
|
+
field_type_path: Optional[List[Union[InterpolatedString, str]]],
|
|
193
|
+
) -> Union[Mapping[str, Any], List[Mapping[str, Any]]]:
|
|
194
|
+
"""
|
|
195
|
+
Determines the JSON Schema type for a field, supporting nullable and combined types.
|
|
196
|
+
"""
|
|
197
|
+
raw_field_type = (
|
|
198
|
+
self._extract_data(raw_schema, field_type_path, default="string")
|
|
199
|
+
if field_type_path
|
|
200
|
+
else "string"
|
|
201
|
+
)
|
|
202
|
+
mapped_field_type = self._replace_type_if_not_valid(raw_field_type, raw_schema)
|
|
203
|
+
if (
|
|
204
|
+
isinstance(mapped_field_type, list)
|
|
205
|
+
and len(mapped_field_type) == 2
|
|
206
|
+
and all(isinstance(item, str) for item in mapped_field_type)
|
|
207
|
+
):
|
|
208
|
+
first_type = self._get_airbyte_type(mapped_field_type[0])
|
|
209
|
+
second_type = self._get_airbyte_type(mapped_field_type[1])
|
|
210
|
+
return {"oneOf": [first_type, second_type]}
|
|
211
|
+
|
|
212
|
+
elif isinstance(mapped_field_type, str):
|
|
213
|
+
return self._get_airbyte_type(mapped_field_type)
|
|
214
|
+
|
|
215
|
+
elif isinstance(mapped_field_type, ComplexFieldType):
|
|
216
|
+
return self._resolve_complex_type(mapped_field_type)
|
|
217
|
+
|
|
218
|
+
else:
|
|
219
|
+
raise ValueError(
|
|
220
|
+
f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
def _resolve_complex_type(self, complex_type: ComplexFieldType) -> Mapping[str, Any]:
|
|
224
|
+
if not complex_type.items:
|
|
225
|
+
return self._get_airbyte_type(complex_type.field_type)
|
|
226
|
+
|
|
227
|
+
field_type = self._get_airbyte_type(complex_type.field_type)
|
|
228
|
+
|
|
229
|
+
field_type["items"] = (
|
|
230
|
+
self._get_airbyte_type(complex_type.items)
|
|
231
|
+
if isinstance(complex_type.items, str)
|
|
232
|
+
else self._resolve_complex_type(complex_type.items)
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
return field_type
|
|
236
|
+
|
|
237
|
+
def _replace_type_if_not_valid(
|
|
238
|
+
self,
|
|
239
|
+
field_type: Union[List[str], str],
|
|
240
|
+
raw_schema: MutableMapping[str, Any],
|
|
241
|
+
) -> Union[List[str], str, ComplexFieldType]:
|
|
242
|
+
"""
|
|
243
|
+
Replaces a field type if it matches a type mapping in `types_map`.
|
|
244
|
+
"""
|
|
245
|
+
if self.schema_type_identifier.types_mapping:
|
|
246
|
+
for types_map in self.schema_type_identifier.types_mapping:
|
|
247
|
+
# conditional is optional param, setting to true if not provided
|
|
248
|
+
condition = InterpolatedBoolean(
|
|
249
|
+
condition=types_map.condition if types_map.condition is not None else "True",
|
|
250
|
+
parameters={},
|
|
251
|
+
).eval(config=self.config, raw_schema=raw_schema)
|
|
252
|
+
|
|
253
|
+
if field_type == types_map.current_type and condition:
|
|
254
|
+
return types_map.target_type
|
|
255
|
+
return field_type
|
|
256
|
+
|
|
257
|
+
@staticmethod
|
|
258
|
+
def _get_airbyte_type(field_type: str) -> MutableMapping[str, Any]:
|
|
259
|
+
"""
|
|
260
|
+
Maps a field type to its corresponding Airbyte type definition.
|
|
261
|
+
"""
|
|
262
|
+
if field_type not in AIRBYTE_DATA_TYPES:
|
|
263
|
+
raise ValueError(f"Invalid Airbyte data type: {field_type}")
|
|
264
|
+
|
|
265
|
+
return deepcopy(AIRBYTE_DATA_TYPES[field_type])
|
|
266
|
+
|
|
267
|
+
def _extract_data(
|
|
268
|
+
self,
|
|
269
|
+
body: Mapping[str, Any],
|
|
270
|
+
extraction_path: Optional[List[Union[InterpolatedString, str]]] = None,
|
|
271
|
+
default: Any = None,
|
|
272
|
+
) -> Any:
|
|
273
|
+
"""
|
|
274
|
+
Extracts data from the body based on the provided extraction path.
|
|
275
|
+
"""
|
|
276
|
+
|
|
277
|
+
if not extraction_path:
|
|
278
|
+
return body
|
|
279
|
+
|
|
280
|
+
path = [
|
|
281
|
+
node.eval(self.config) if not isinstance(node, str) else node
|
|
282
|
+
for node in extraction_path
|
|
283
|
+
]
|
|
284
|
+
|
|
285
|
+
return dpath.get(body, path, default=default) # type: ignore # extracted will be a MutableMapping, given input data structure
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from dataclasses import InitVar, dataclass
|
|
6
|
+
from typing import Any, Dict, Mapping
|
|
7
|
+
|
|
8
|
+
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class InlineSchemaLoader(SchemaLoader):
|
|
13
|
+
"""Describes a stream's schema"""
|
|
14
|
+
|
|
15
|
+
schema: Dict[str, Any]
|
|
16
|
+
parameters: InitVar[Mapping[str, Any]]
|
|
17
|
+
|
|
18
|
+
def get_json_schema(self) -> Mapping[str, Any]:
|
|
19
|
+
return self.schema
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import pkgutil
|
|
7
|
+
import sys
|
|
8
|
+
from dataclasses import InitVar, dataclass, field
|
|
9
|
+
from typing import Any, Mapping, Tuple, Union
|
|
10
|
+
|
|
11
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
|
12
|
+
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
|
13
|
+
from airbyte_cdk.sources.types import Config
|
|
14
|
+
from airbyte_cdk.sources.utils.schema_helpers import ResourceSchemaLoader
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _default_file_path() -> str:
|
|
18
|
+
# Schema files are always in "source_<connector_name>/schemas/<stream_name>.json
|
|
19
|
+
# The connector's module name can be inferred by looking at the modules loaded and look for the one starting with source_
|
|
20
|
+
source_modules = [
|
|
21
|
+
k for k, v in sys.modules.items() if "source_" in k and "airbyte_cdk" not in k
|
|
22
|
+
] # example: ['source_exchange_rates', 'source_exchange_rates.source']
|
|
23
|
+
if source_modules:
|
|
24
|
+
module = source_modules[0].split(".")[0]
|
|
25
|
+
return f"./{module}/schemas/{{{{parameters['name']}}}}.json"
|
|
26
|
+
|
|
27
|
+
# If we are not in a source_ module, the most likely scenario is we're processing a manifest from the connector builder
|
|
28
|
+
# server which does not require a json schema to be defined.
|
|
29
|
+
return "./{{parameters['name']}}.json"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class JsonFileSchemaLoader(ResourceSchemaLoader, SchemaLoader):
|
|
34
|
+
"""
|
|
35
|
+
Loads the schema from a json file
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
file_path (Union[InterpolatedString, str]): The path to the json file describing the schema
|
|
39
|
+
name (str): The stream's name
|
|
40
|
+
config (Config): The user-provided configuration as specified by the source's spec
|
|
41
|
+
parameters (Mapping[str, Any]): Additional arguments to pass to the string interpolation if needed
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
config: Config
|
|
45
|
+
parameters: InitVar[Mapping[str, Any]]
|
|
46
|
+
file_path: Union[InterpolatedString, str] = field(default="")
|
|
47
|
+
|
|
48
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
|
49
|
+
if not self.file_path:
|
|
50
|
+
self.file_path = _default_file_path()
|
|
51
|
+
self.file_path = InterpolatedString.create(self.file_path, parameters=parameters)
|
|
52
|
+
|
|
53
|
+
def get_json_schema(self) -> Mapping[str, Any]:
|
|
54
|
+
# todo: It is worth revisiting if we can replace file_path with just file_name if every schema is in the /schemas directory
|
|
55
|
+
# this would require that we find a creative solution to store or retrieve source_name in here since the files are mounted there
|
|
56
|
+
json_schema_path = self._get_json_filepath()
|
|
57
|
+
resource, schema_path = self.extract_resource_and_schema_path(json_schema_path)
|
|
58
|
+
raw_json_file = pkgutil.get_data(resource, schema_path)
|
|
59
|
+
|
|
60
|
+
if not raw_json_file:
|
|
61
|
+
raise IOError(f"Cannot find file {json_schema_path}")
|
|
62
|
+
try:
|
|
63
|
+
raw_schema = json.loads(raw_json_file)
|
|
64
|
+
except ValueError as err:
|
|
65
|
+
raise RuntimeError(f"Invalid JSON file format for file {json_schema_path}") from err
|
|
66
|
+
self.package_name = resource
|
|
67
|
+
return self._resolve_schema_references(raw_schema)
|
|
68
|
+
|
|
69
|
+
def _get_json_filepath(self) -> Any:
|
|
70
|
+
return self.file_path.eval(self.config) # type: ignore # file_path is always cast to an interpolated string
|
|
71
|
+
|
|
72
|
+
@staticmethod
|
|
73
|
+
def extract_resource_and_schema_path(json_schema_path: str) -> Tuple[str, str]:
|
|
74
|
+
"""
|
|
75
|
+
When the connector is running on a docker container, package_data is accessible from the resource (source_<name>), so we extract
|
|
76
|
+
the resource from the first part of the schema path and the remaining path is used to find the schema file. This is a slight
|
|
77
|
+
hack to identify the source name while we are in the airbyte_cdk module.
|
|
78
|
+
:param json_schema_path: The path to the schema JSON file
|
|
79
|
+
:return: Tuple of the resource name and the path to the schema file
|
|
80
|
+
"""
|
|
81
|
+
split_path = json_schema_path.split("/")
|
|
82
|
+
|
|
83
|
+
if split_path[0] == "" or split_path[0] == ".":
|
|
84
|
+
split_path = split_path[1:]
|
|
85
|
+
|
|
86
|
+
if len(split_path) == 0:
|
|
87
|
+
return "", ""
|
|
88
|
+
|
|
89
|
+
if len(split_path) == 1:
|
|
90
|
+
return "", split_path[0]
|
|
91
|
+
|
|
92
|
+
return split_path[0], "/".join(split_path[1:])
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from abc import abstractmethod
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Mapping
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class SchemaLoader:
|
|
12
|
+
"""Describes a stream's schema"""
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def get_json_schema(self) -> Mapping[str, Any]:
|
|
16
|
+
"""Returns a mapping describing the stream's schema"""
|
|
17
|
+
pass
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from dataclasses import InitVar, dataclass
|
|
6
|
+
from typing import Any, Mapping, Optional
|
|
7
|
+
|
|
8
|
+
from airbyte_cdk.models import (
|
|
9
|
+
AdvancedAuth,
|
|
10
|
+
ConnectorSpecification,
|
|
11
|
+
ConnectorSpecificationSerializer,
|
|
12
|
+
)
|
|
13
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import AuthFlow
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class Spec:
|
|
18
|
+
"""
|
|
19
|
+
Returns a connection specification made up of information about the connector and how it can be configured
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
connection_specification (Mapping[str, Any]): information related to how a connector can be configured
|
|
23
|
+
documentation_url (Optional[str]): The link the Airbyte documentation about this connector
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
connection_specification: Mapping[str, Any]
|
|
27
|
+
parameters: InitVar[Mapping[str, Any]]
|
|
28
|
+
documentation_url: Optional[str] = None
|
|
29
|
+
advanced_auth: Optional[AuthFlow] = None
|
|
30
|
+
|
|
31
|
+
def generate_spec(self) -> ConnectorSpecification:
|
|
32
|
+
"""
|
|
33
|
+
Returns the connector specification according the spec block defined in the low code connector manifest.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
obj: dict[str, Mapping[str, Any] | str | AdvancedAuth] = {
|
|
37
|
+
"connectionSpecification": self.connection_specification
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if self.documentation_url:
|
|
41
|
+
obj["documentationUrl"] = self.documentation_url
|
|
42
|
+
if self.advanced_auth:
|
|
43
|
+
self.advanced_auth.auth_flow_type = self.advanced_auth.auth_flow_type.value # type: ignore # We know this is always assigned to an AuthFlow which has the auth_flow_type field
|
|
44
|
+
# Map CDK AuthFlow model to protocol AdvancedAuth model
|
|
45
|
+
obj["advanced_auth"] = self.advanced_auth.dict()
|
|
46
|
+
|
|
47
|
+
# We remap these keys to camel case because that's the existing format expected by the rest of the platform
|
|
48
|
+
return ConnectorSpecificationSerializer.load(obj)
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
2
|
+
|
|
3
|
+
from typing import Any, Iterable, Mapping, Optional
|
|
4
|
+
|
|
5
|
+
from airbyte_cdk.sources.declarative.retrievers import Retriever
|
|
6
|
+
from airbyte_cdk.sources.message import MessageRepository
|
|
7
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
|
9
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
|
10
|
+
from airbyte_cdk.sources.types import Record, StreamSlice
|
|
11
|
+
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DeclarativePartitionFactory:
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
stream_name: str,
|
|
18
|
+
json_schema: Mapping[str, Any],
|
|
19
|
+
retriever: Retriever,
|
|
20
|
+
message_repository: MessageRepository,
|
|
21
|
+
) -> None:
|
|
22
|
+
"""
|
|
23
|
+
The DeclarativePartitionFactory takes a retriever_factory and not a retriever directly. The reason is that our components are not
|
|
24
|
+
thread safe and classes like `DefaultPaginator` may not work because multiple threads can access and modify a shared field across each other.
|
|
25
|
+
In order to avoid these problems, we will create one retriever per thread which should make the processing thread-safe.
|
|
26
|
+
"""
|
|
27
|
+
self._stream_name = stream_name
|
|
28
|
+
self._json_schema = json_schema
|
|
29
|
+
self._retriever = retriever
|
|
30
|
+
self._message_repository = message_repository
|
|
31
|
+
|
|
32
|
+
def create(self, stream_slice: StreamSlice) -> Partition:
|
|
33
|
+
return DeclarativePartition(
|
|
34
|
+
self._stream_name,
|
|
35
|
+
self._json_schema,
|
|
36
|
+
self._retriever,
|
|
37
|
+
self._message_repository,
|
|
38
|
+
stream_slice,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class DeclarativePartition(Partition):
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
stream_name: str,
|
|
46
|
+
json_schema: Mapping[str, Any],
|
|
47
|
+
retriever: Retriever,
|
|
48
|
+
message_repository: MessageRepository,
|
|
49
|
+
stream_slice: StreamSlice,
|
|
50
|
+
):
|
|
51
|
+
self._stream_name = stream_name
|
|
52
|
+
self._json_schema = json_schema
|
|
53
|
+
self._retriever = retriever
|
|
54
|
+
self._message_repository = message_repository
|
|
55
|
+
self._stream_slice = stream_slice
|
|
56
|
+
self._hash = SliceHasher.hash(self._stream_name, self._stream_slice)
|
|
57
|
+
|
|
58
|
+
def read(self) -> Iterable[Record]:
|
|
59
|
+
for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
|
|
60
|
+
if isinstance(stream_data, Mapping):
|
|
61
|
+
record = (
|
|
62
|
+
stream_data
|
|
63
|
+
if isinstance(stream_data, Record)
|
|
64
|
+
else Record(
|
|
65
|
+
data=stream_data,
|
|
66
|
+
stream_name=self.stream_name(),
|
|
67
|
+
associated_slice=self._stream_slice,
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
yield record
|
|
71
|
+
else:
|
|
72
|
+
self._message_repository.emit_message(stream_data)
|
|
73
|
+
|
|
74
|
+
def to_slice(self) -> Optional[Mapping[str, Any]]:
|
|
75
|
+
return self._stream_slice
|
|
76
|
+
|
|
77
|
+
def stream_name(self) -> str:
|
|
78
|
+
return self._stream_name
|
|
79
|
+
|
|
80
|
+
def __hash__(self) -> int:
|
|
81
|
+
return self._hash
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class StreamSlicerPartitionGenerator(PartitionGenerator):
|
|
85
|
+
def __init__(
|
|
86
|
+
self, partition_factory: DeclarativePartitionFactory, stream_slicer: StreamSlicer
|
|
87
|
+
) -> None:
|
|
88
|
+
self._partition_factory = partition_factory
|
|
89
|
+
self._stream_slicer = stream_slicer
|
|
90
|
+
|
|
91
|
+
def generate(self) -> Iterable[Partition]:
|
|
92
|
+
for stream_slice in self._stream_slicer.stream_slices():
|
|
93
|
+
yield self._partition_factory.create(stream_slice)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from abc import ABC
|
|
6
|
+
|
|
7
|
+
from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
|
|
8
|
+
RequestOptionsProvider,
|
|
9
|
+
)
|
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import (
|
|
11
|
+
StreamSlicer as ConcurrentStreamSlicer,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class StreamSlicer(ConcurrentStreamSlicer, RequestOptionsProvider, ABC):
|
|
16
|
+
"""
|
|
17
|
+
Slices the stream into a subset of records.
|
|
18
|
+
Slices enable state checkpointing and data retrieval parallelization.
|
|
19
|
+
|
|
20
|
+
The stream slicer keeps track of the cursor state as a dict of cursor_field -> cursor_value
|
|
21
|
+
|
|
22
|
+
See the stream slicing section of the docs for more information.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
pass
|