airbyte-cdk 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +358 -0
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +104 -0
- airbyte_cdk/connector.py +123 -0
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/__init__.py +3 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
- airbyte_cdk/connector_builder/main.py +107 -0
- airbyte_cdk/connector_builder/models.py +73 -0
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +83 -0
- airbyte_cdk/destinations/__init__.py +8 -0
- airbyte_cdk/destinations/destination.py +154 -0
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
- airbyte_cdk/destinations/vector_db_based/config.py +298 -0
- airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
- airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
- airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
- airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
- airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
- airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
- airbyte_cdk/entrypoint.py +414 -0
- airbyte_cdk/exception_handler.py +56 -0
- airbyte_cdk/logger.py +109 -0
- airbyte_cdk/models/__init__.py +72 -0
- airbyte_cdk/models/airbyte_protocol.py +88 -0
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/well_known_types.py +5 -0
- airbyte_cdk/py.typed +0 -0
- airbyte_cdk/sources/__init__.py +26 -0
- airbyte_cdk/sources/abstract_source.py +326 -0
- airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
- airbyte_cdk/sources/config.py +27 -0
- airbyte_cdk/sources/connector_state_manager.py +161 -0
- airbyte_cdk/sources/declarative/__init__.py +3 -0
- airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
- airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
- airbyte_cdk/sources/declarative/auth/token.py +267 -0
- airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
- airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
- airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
- airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
- airbyte_cdk/sources/declarative/declarative_source.py +36 -0
- airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
- airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
- airbyte_cdk/sources/declarative/exceptions.py +9 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
- airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
- airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
- airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
- airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
- airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
- airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
- airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
- airbyte_cdk/sources/declarative/spec/spec.py +48 -0
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
- airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
- airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
- airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
- airbyte_cdk/sources/declarative/types.py +25 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
- airbyte_cdk/sources/file_based/config/__init__.py +0 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
- airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
- airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
- airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
- airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
- airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
- airbyte_cdk/sources/file_based/exceptions.py +159 -0
- airbyte_cdk/sources/file_based/file_based_source.py +466 -0
- airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
- airbyte_cdk/sources/file_based/file_record_data.py +22 -0
- airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
- airbyte_cdk/sources/file_based/remote_file.py +18 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
- airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
- airbyte_cdk/sources/file_based/types.py +10 -0
- airbyte_cdk/sources/http_config.py +10 -0
- airbyte_cdk/sources/http_logger.py +55 -0
- airbyte_cdk/sources/message/__init__.py +19 -0
- airbyte_cdk/sources/message/repository.py +137 -0
- airbyte_cdk/sources/source.py +95 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/__init__.py +8 -0
- airbyte_cdk/sources/streams/availability_strategy.py +84 -0
- airbyte_cdk/sources/streams/call_rate.py +704 -0
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
- airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
- airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
- airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/core.py +703 -0
- airbyte_cdk/sources/streams/http/__init__.py +10 -0
- airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +61 -0
- airbyte_cdk/sources/streams/http/http.py +673 -0
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/sources/streams/utils/__init__.py +3 -0
- airbyte_cdk/sources/types.py +169 -0
- airbyte_cdk/sources/utils/__init__.py +7 -0
- airbyte_cdk/sources/utils/casing.py +12 -0
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +53 -0
- airbyte_cdk/sources/utils/schema_helpers.py +230 -0
- airbyte_cdk/sources/utils/slice_logger.py +57 -0
- airbyte_cdk/sources/utils/transform.py +277 -0
- airbyte_cdk/sources/utils/types.py +7 -0
- airbyte_cdk/sql/__init__.py +0 -0
- airbyte_cdk/sql/_util/__init__.py +0 -0
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/__init__.py +7 -0
- airbyte_cdk/test/catalog_builder.py +81 -0
- airbyte_cdk/test/entrypoint_wrapper.py +250 -0
- airbyte_cdk/test/mock_http/__init__.py +6 -0
- airbyte_cdk/test/mock_http/matcher.py +41 -0
- airbyte_cdk/test/mock_http/mocker.py +185 -0
- airbyte_cdk/test/mock_http/request.py +103 -0
- airbyte_cdk/test/mock_http/response.py +28 -0
- airbyte_cdk/test/mock_http/response_builder.py +237 -0
- airbyte_cdk/test/state_builder.py +33 -0
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +10 -0
- airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
- airbyte_cdk/utils/analytics_message.py +25 -0
- airbyte_cdk/utils/constants.py +5 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/event_timing.py +85 -0
- airbyte_cdk/utils/is_cloud_environment.py +18 -0
- airbyte_cdk/utils/mapping_helpers.py +162 -0
- airbyte_cdk/utils/message_utils.py +26 -0
- airbyte_cdk/utils/oneof_option_config.py +33 -0
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +270 -0
- airbyte_cdk/utils/slice_hasher.py +37 -0
- airbyte_cdk/utils/spec_schema_transformations.py +26 -0
- airbyte_cdk/utils/stream_status_utils.py +43 -0
- airbyte_cdk/utils/traced_exception.py +145 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
- airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
- airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
- airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
- airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView
|
|
8
|
+
|
|
9
|
+
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
|
10
|
+
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
|
11
|
+
|
|
12
|
+
# A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2":
|
|
13
|
+
# "hello"}] returns "hello"
|
|
14
|
+
FieldPointer = List[str]
|
|
15
|
+
Config = Mapping[str, Any]
|
|
16
|
+
ConnectionDefinition = Mapping[str, Any]
|
|
17
|
+
StreamState = Mapping[str, Any]
|
|
18
|
+
EmptyString = str()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Record(Mapping[str, Any]):
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
data: Mapping[str, Any],
|
|
25
|
+
stream_name: str,
|
|
26
|
+
associated_slice: Optional[StreamSlice] = None,
|
|
27
|
+
file_reference: Optional[AirbyteRecordMessageFileReference] = None,
|
|
28
|
+
):
|
|
29
|
+
self._data = data
|
|
30
|
+
self._associated_slice = associated_slice
|
|
31
|
+
self.stream_name = stream_name
|
|
32
|
+
self._file_reference = file_reference
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def data(self) -> Mapping[str, Any]:
|
|
36
|
+
return self._data
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def associated_slice(self) -> Optional[StreamSlice]:
|
|
40
|
+
return self._associated_slice
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def file_reference(self) -> AirbyteRecordMessageFileReference:
|
|
44
|
+
return self._file_reference
|
|
45
|
+
|
|
46
|
+
@file_reference.setter
|
|
47
|
+
def file_reference(self, value: AirbyteRecordMessageFileReference) -> None:
|
|
48
|
+
self._file_reference = value
|
|
49
|
+
|
|
50
|
+
def __repr__(self) -> str:
|
|
51
|
+
return repr(self._data)
|
|
52
|
+
|
|
53
|
+
def __getitem__(self, key: str) -> Any:
|
|
54
|
+
return self._data[key]
|
|
55
|
+
|
|
56
|
+
def __len__(self) -> int:
|
|
57
|
+
return len(self._data)
|
|
58
|
+
|
|
59
|
+
def __iter__(self) -> Any:
|
|
60
|
+
return iter(self._data)
|
|
61
|
+
|
|
62
|
+
def __contains__(self, item: object) -> bool:
|
|
63
|
+
return item in self._data
|
|
64
|
+
|
|
65
|
+
def __eq__(self, other: object) -> bool:
|
|
66
|
+
if isinstance(other, Record):
|
|
67
|
+
# noinspection PyProtectedMember
|
|
68
|
+
return self._data == other._data
|
|
69
|
+
return False
|
|
70
|
+
|
|
71
|
+
def __ne__(self, other: object) -> bool:
|
|
72
|
+
return not self.__eq__(other)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class StreamSlice(Mapping[str, Any]):
|
|
76
|
+
def __init__(
|
|
77
|
+
self,
|
|
78
|
+
*,
|
|
79
|
+
partition: Mapping[str, Any],
|
|
80
|
+
cursor_slice: Mapping[str, Any],
|
|
81
|
+
extra_fields: Optional[Mapping[str, Any]] = None,
|
|
82
|
+
) -> None:
|
|
83
|
+
"""
|
|
84
|
+
:param partition: The partition keys representing a unique partition in the stream.
|
|
85
|
+
:param cursor_slice: The incremental cursor slice keys, such as dates or pagination tokens.
|
|
86
|
+
:param extra_fields: Additional fields that should not be part of the partition but passed along, such as metadata from the parent stream.
|
|
87
|
+
"""
|
|
88
|
+
self._partition = partition
|
|
89
|
+
self._cursor_slice = cursor_slice
|
|
90
|
+
self._extra_fields = extra_fields or {}
|
|
91
|
+
|
|
92
|
+
# Ensure that partition keys do not overlap with cursor slice keys
|
|
93
|
+
if partition.keys() & cursor_slice.keys():
|
|
94
|
+
raise ValueError("Keys for partition and incremental sync cursor should not overlap")
|
|
95
|
+
|
|
96
|
+
self._stream_slice = dict(partition) | dict(cursor_slice)
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def partition(self) -> Mapping[str, Any]:
|
|
100
|
+
"""Returns the partition portion of the stream slice."""
|
|
101
|
+
p = self._partition
|
|
102
|
+
while isinstance(p, StreamSlice):
|
|
103
|
+
p = p.partition
|
|
104
|
+
return p
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def cursor_slice(self) -> Mapping[str, Any]:
|
|
108
|
+
"""Returns the cursor slice portion of the stream slice."""
|
|
109
|
+
c = self._cursor_slice
|
|
110
|
+
while isinstance(c, StreamSlice):
|
|
111
|
+
c = c.cursor_slice
|
|
112
|
+
return c
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def extra_fields(self) -> Mapping[str, Any]:
|
|
116
|
+
"""Returns the extra fields that are not part of the partition."""
|
|
117
|
+
return self._extra_fields
|
|
118
|
+
|
|
119
|
+
def __repr__(self) -> str:
|
|
120
|
+
return repr(self._stream_slice)
|
|
121
|
+
|
|
122
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
|
123
|
+
raise ValueError("StreamSlice is immutable")
|
|
124
|
+
|
|
125
|
+
def __getitem__(self, key: str) -> Any:
|
|
126
|
+
return self._stream_slice[key]
|
|
127
|
+
|
|
128
|
+
def __len__(self) -> int:
|
|
129
|
+
return len(self._stream_slice)
|
|
130
|
+
|
|
131
|
+
def __iter__(self) -> Iterator[str]:
|
|
132
|
+
return iter(self._stream_slice)
|
|
133
|
+
|
|
134
|
+
def __contains__(self, item: Any) -> bool:
|
|
135
|
+
return item in self._stream_slice
|
|
136
|
+
|
|
137
|
+
def keys(self) -> KeysView[str]:
|
|
138
|
+
return self._stream_slice.keys()
|
|
139
|
+
|
|
140
|
+
def items(self) -> ItemsView[str, Any]:
|
|
141
|
+
return self._stream_slice.items()
|
|
142
|
+
|
|
143
|
+
def values(self) -> ValuesView[Any]:
|
|
144
|
+
return self._stream_slice.values()
|
|
145
|
+
|
|
146
|
+
def get(self, key: str, default: Any = None) -> Optional[Any]:
|
|
147
|
+
return self._stream_slice.get(key, default)
|
|
148
|
+
|
|
149
|
+
def __eq__(self, other: Any) -> bool:
|
|
150
|
+
if isinstance(other, dict):
|
|
151
|
+
return self._stream_slice == other
|
|
152
|
+
if isinstance(other, StreamSlice):
|
|
153
|
+
# noinspection PyProtectedMember
|
|
154
|
+
return self._partition == other._partition and self._cursor_slice == other._cursor_slice
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
def __ne__(self, other: Any) -> bool:
|
|
158
|
+
return not self.__eq__(other)
|
|
159
|
+
|
|
160
|
+
def __json_serializable__(self) -> Any:
|
|
161
|
+
return self._stream_slice
|
|
162
|
+
|
|
163
|
+
def __hash__(self) -> int:
|
|
164
|
+
return SliceHasher.hash(
|
|
165
|
+
stream_slice=self._stream_slice
|
|
166
|
+
) # no need to provide stream_name here as this is used for slicing the cursor
|
|
167
|
+
|
|
168
|
+
def __bool__(self) -> bool:
|
|
169
|
+
return bool(self._stream_slice) or bool(self._extra_fields)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# https://stackoverflow.com/a/1176023
|
|
10
|
+
def camel_to_snake(s: str) -> str:
|
|
11
|
+
s = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", s)
|
|
12
|
+
return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s).lower()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
AIRBYTE_STAGING_DIRECTORY = os.getenv("AIRBYTE_STAGING_DIRECTORY", "/staging/files")
|
|
7
|
+
DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_files_directory() -> str:
|
|
11
|
+
return (
|
|
12
|
+
AIRBYTE_STAGING_DIRECTORY
|
|
13
|
+
if os.path.exists(AIRBYTE_STAGING_DIRECTORY)
|
|
14
|
+
else DEFAULT_LOCAL_DIRECTORY
|
|
15
|
+
)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
import time
|
|
5
|
+
from collections.abc import Mapping as ABCMapping
|
|
6
|
+
from typing import Any, Mapping, Optional
|
|
7
|
+
|
|
8
|
+
from airbyte_cdk.models import (
|
|
9
|
+
AirbyteLogMessage,
|
|
10
|
+
AirbyteMessage,
|
|
11
|
+
AirbyteRecordMessage,
|
|
12
|
+
AirbyteRecordMessageFileReference,
|
|
13
|
+
AirbyteTraceMessage,
|
|
14
|
+
)
|
|
15
|
+
from airbyte_cdk.models import Type as MessageType
|
|
16
|
+
from airbyte_cdk.sources.streams.core import StreamData
|
|
17
|
+
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def stream_data_to_airbyte_message(
|
|
21
|
+
stream_name: str,
|
|
22
|
+
data_or_message: StreamData,
|
|
23
|
+
transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform),
|
|
24
|
+
schema: Optional[Mapping[str, Any]] = None,
|
|
25
|
+
file_reference: Optional[AirbyteRecordMessageFileReference] = None,
|
|
26
|
+
) -> AirbyteMessage:
|
|
27
|
+
if schema is None:
|
|
28
|
+
schema = {}
|
|
29
|
+
|
|
30
|
+
match data_or_message:
|
|
31
|
+
case ABCMapping():
|
|
32
|
+
data = dict(data_or_message)
|
|
33
|
+
now_millis = time.time_ns() // 1_000_000
|
|
34
|
+
# Transform object fields according to config. Most likely you will
|
|
35
|
+
# need it to normalize values against json schema. By default no action
|
|
36
|
+
# taken unless configured. See
|
|
37
|
+
# docs/connector-development/cdk-python/schemas.md for details.
|
|
38
|
+
transformer.transform(data, schema)
|
|
39
|
+
message = AirbyteRecordMessage(
|
|
40
|
+
stream=stream_name,
|
|
41
|
+
data=data,
|
|
42
|
+
emitted_at=now_millis,
|
|
43
|
+
file_reference=file_reference,
|
|
44
|
+
)
|
|
45
|
+
return AirbyteMessage(type=MessageType.RECORD, record=message)
|
|
46
|
+
case AirbyteTraceMessage():
|
|
47
|
+
return AirbyteMessage(type=MessageType.TRACE, trace=data_or_message)
|
|
48
|
+
case AirbyteLogMessage():
|
|
49
|
+
return AirbyteMessage(type=MessageType.LOG, log=data_or_message)
|
|
50
|
+
case _:
|
|
51
|
+
raise ValueError(
|
|
52
|
+
f"Unexpected type for data_or_message: {type(data_or_message)}: {data_or_message}"
|
|
53
|
+
)
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import importlib
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import pkgutil
|
|
10
|
+
from typing import Any, ClassVar, Dict, List, Mapping, MutableMapping, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
import jsonref
|
|
13
|
+
from jsonschema import RefResolver, validate
|
|
14
|
+
from jsonschema.exceptions import ValidationError
|
|
15
|
+
from pydantic.v1 import BaseModel, Field
|
|
16
|
+
|
|
17
|
+
from airbyte_cdk.models import ConnectorSpecification, FailureType
|
|
18
|
+
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class JsonFileLoader:
|
|
22
|
+
"""
|
|
23
|
+
Custom json file loader to resolve references to resources located in "shared" directory.
|
|
24
|
+
We need this for compatability with existing schemas cause all of them have references
|
|
25
|
+
pointing to shared_schema.json file instead of shared/shared_schema.json
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, uri_base: str, shared: str):
|
|
29
|
+
self.shared = shared
|
|
30
|
+
self.uri_base = uri_base
|
|
31
|
+
|
|
32
|
+
def __call__(self, uri: str) -> Dict[str, Any]:
|
|
33
|
+
uri = uri.replace(self.uri_base, f"{self.uri_base}/{self.shared}/")
|
|
34
|
+
with open(uri) as f:
|
|
35
|
+
data = json.load(f)
|
|
36
|
+
if isinstance(data, dict):
|
|
37
|
+
return data
|
|
38
|
+
else:
|
|
39
|
+
raise ValueError(f"Expected to read a dictionary from {uri}. Got: {data}")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def resolve_ref_links(obj: Any) -> Any:
|
|
43
|
+
"""
|
|
44
|
+
Scan resolved schema and convert jsonref.JsonRef object to JSON serializable dict.
|
|
45
|
+
|
|
46
|
+
:param obj - jsonschema object with ref field resolved.
|
|
47
|
+
:return JSON serializable object with references without external dependencies.
|
|
48
|
+
"""
|
|
49
|
+
if isinstance(obj, jsonref.JsonRef):
|
|
50
|
+
obj = resolve_ref_links(obj.__subject__)
|
|
51
|
+
# Omit existing definitions for external resource since
|
|
52
|
+
# we dont need it anymore.
|
|
53
|
+
if isinstance(obj, dict):
|
|
54
|
+
obj.pop("definitions", None)
|
|
55
|
+
return obj
|
|
56
|
+
else:
|
|
57
|
+
raise ValueError(f"Expected obj to be a dict. Got {obj}")
|
|
58
|
+
elif isinstance(obj, dict):
|
|
59
|
+
return {k: resolve_ref_links(v) for k, v in obj.items()}
|
|
60
|
+
elif isinstance(obj, list):
|
|
61
|
+
return [resolve_ref_links(item) for item in obj]
|
|
62
|
+
else:
|
|
63
|
+
return obj
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _expand_refs(schema: Any, ref_resolver: Optional[RefResolver] = None) -> None:
|
|
67
|
+
"""Internal function to iterate over schema and replace all occurrences of $ref with their definitions. Recursive.
|
|
68
|
+
|
|
69
|
+
:param schema: schema that will be patched
|
|
70
|
+
:param ref_resolver: resolver to get definition from $ref, if None pass it will be instantiated
|
|
71
|
+
"""
|
|
72
|
+
ref_resolver = ref_resolver or RefResolver.from_schema(schema)
|
|
73
|
+
|
|
74
|
+
if isinstance(schema, MutableMapping):
|
|
75
|
+
if "$ref" in schema:
|
|
76
|
+
ref_url = schema.pop("$ref")
|
|
77
|
+
_, definition = ref_resolver.resolve(ref_url)
|
|
78
|
+
_expand_refs(
|
|
79
|
+
definition, ref_resolver=ref_resolver
|
|
80
|
+
) # expand refs in definitions as well
|
|
81
|
+
schema.update(definition)
|
|
82
|
+
else:
|
|
83
|
+
for key, value in schema.items():
|
|
84
|
+
_expand_refs(value, ref_resolver=ref_resolver)
|
|
85
|
+
elif isinstance(schema, List):
|
|
86
|
+
for value in schema:
|
|
87
|
+
_expand_refs(value, ref_resolver=ref_resolver)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def expand_refs(schema: Any) -> None:
|
|
91
|
+
"""Iterate over schema and replace all occurrences of $ref with their definitions.
|
|
92
|
+
|
|
93
|
+
:param schema: schema that will be patched
|
|
94
|
+
"""
|
|
95
|
+
_expand_refs(schema)
|
|
96
|
+
schema.pop("definitions", None) # remove definitions created by $ref
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def rename_key(schema: Any, old_key: str, new_key: str) -> None:
|
|
100
|
+
"""Iterate over nested dictionary and replace one key with another. Used to replace anyOf with oneOf. Recursive."
|
|
101
|
+
|
|
102
|
+
:param schema: schema that will be patched
|
|
103
|
+
:param old_key: name of the key to replace
|
|
104
|
+
:param new_key: new name of the key
|
|
105
|
+
"""
|
|
106
|
+
if not isinstance(schema, MutableMapping):
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
for key, value in schema.items():
|
|
110
|
+
rename_key(value, old_key, new_key)
|
|
111
|
+
if old_key in schema:
|
|
112
|
+
schema[new_key] = schema.pop(old_key)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class ResourceSchemaLoader:
|
|
116
|
+
"""JSONSchema loader from package resources"""
|
|
117
|
+
|
|
118
|
+
def __init__(self, package_name: str):
|
|
119
|
+
self.package_name = package_name
|
|
120
|
+
|
|
121
|
+
def get_schema(self, name: str) -> dict[str, Any]:
|
|
122
|
+
"""
|
|
123
|
+
This method retrieves a JSON schema from the schemas/ folder.
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
The expected file structure is to have all top-level schemas (corresponding to streams) in the "schemas/" folder, with any shared $refs
|
|
127
|
+
living inside the "schemas/shared/" folder. For example:
|
|
128
|
+
|
|
129
|
+
schemas/shared/<shared_definition>.json
|
|
130
|
+
schemas/<name>.json # contains a $ref to shared_definition
|
|
131
|
+
schemas/<name2>.json # contains a $ref to shared_definition
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
schema_filename = f"schemas/{name}.json"
|
|
135
|
+
raw_file = pkgutil.get_data(self.package_name, schema_filename)
|
|
136
|
+
if not raw_file:
|
|
137
|
+
raise IOError(f"Cannot find file {schema_filename}")
|
|
138
|
+
try:
|
|
139
|
+
raw_schema = json.loads(raw_file)
|
|
140
|
+
except ValueError as err:
|
|
141
|
+
raise RuntimeError(f"Invalid JSON file format for file {schema_filename}") from err
|
|
142
|
+
|
|
143
|
+
return self._resolve_schema_references(raw_schema)
|
|
144
|
+
|
|
145
|
+
def _resolve_schema_references(self, raw_schema: dict[str, Any]) -> dict[str, Any]:
|
|
146
|
+
"""
|
|
147
|
+
Resolve links to external references and move it to local "definitions" map.
|
|
148
|
+
|
|
149
|
+
:param raw_schema jsonschema to lookup for external links.
|
|
150
|
+
:return JSON serializable object with references without external dependencies.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
package = importlib.import_module(self.package_name)
|
|
154
|
+
if package.__file__:
|
|
155
|
+
base = os.path.dirname(package.__file__) + "/"
|
|
156
|
+
else:
|
|
157
|
+
raise ValueError(f"Package {package} does not have a valid __file__ field")
|
|
158
|
+
resolved = jsonref.JsonRef.replace_refs(
|
|
159
|
+
raw_schema, loader=JsonFileLoader(base, "schemas/shared"), base_uri=base
|
|
160
|
+
)
|
|
161
|
+
resolved = resolve_ref_links(resolved)
|
|
162
|
+
if isinstance(resolved, dict):
|
|
163
|
+
return resolved
|
|
164
|
+
else:
|
|
165
|
+
raise ValueError(f"Expected resolved to be a dict. Got {resolved}")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def check_config_against_spec_or_exit(
|
|
169
|
+
config: Mapping[str, Any], spec: ConnectorSpecification
|
|
170
|
+
) -> None:
|
|
171
|
+
"""
|
|
172
|
+
Check config object against spec. In case of spec is invalid, throws
|
|
173
|
+
an exception with validation error description.
|
|
174
|
+
|
|
175
|
+
:param config - config loaded from file specified over command line
|
|
176
|
+
:param spec - spec object generated by connector
|
|
177
|
+
"""
|
|
178
|
+
spec_schema = spec.connectionSpecification
|
|
179
|
+
try:
|
|
180
|
+
validate(instance=config, schema=spec_schema)
|
|
181
|
+
except ValidationError as validation_error:
|
|
182
|
+
raise AirbyteTracedException(
|
|
183
|
+
message="Config validation error: " + validation_error.message,
|
|
184
|
+
internal_message=validation_error.message,
|
|
185
|
+
failure_type=FailureType.config_error,
|
|
186
|
+
) from None # required to prevent logging config secrets from the ValidationError's stacktrace
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class InternalConfig(BaseModel):
|
|
190
|
+
KEYWORDS: ClassVar[set[str]] = {"_limit", "_page_size"}
|
|
191
|
+
limit: int = Field(None, alias="_limit")
|
|
192
|
+
page_size: int = Field(None, alias="_page_size")
|
|
193
|
+
|
|
194
|
+
def dict(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
|
|
195
|
+
kwargs["by_alias"] = True
|
|
196
|
+
kwargs["exclude_unset"] = True
|
|
197
|
+
return super().dict(*args, **kwargs)
|
|
198
|
+
|
|
199
|
+
def is_limit_reached(self, records_counter: int) -> bool:
|
|
200
|
+
"""
|
|
201
|
+
Check if record count reached limit set by internal config.
|
|
202
|
+
:param records_counter - number of records already red
|
|
203
|
+
:return True if limit reached, False otherwise
|
|
204
|
+
"""
|
|
205
|
+
if self.limit:
|
|
206
|
+
if records_counter >= self.limit:
|
|
207
|
+
return True
|
|
208
|
+
return False
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def split_config(config: Mapping[str, Any]) -> Tuple[dict[str, Any], InternalConfig]:
|
|
212
|
+
"""
|
|
213
|
+
Break config map object into 2 instances: first is a dict with user defined
|
|
214
|
+
configuration and second is internal config that contains private keys for
|
|
215
|
+
acceptance test configuration.
|
|
216
|
+
|
|
217
|
+
:param
|
|
218
|
+
config - Dict object that has been loaded from config file.
|
|
219
|
+
|
|
220
|
+
:return tuple of user defined config dict with filtered out internal
|
|
221
|
+
parameters and connector acceptance test internal config object.
|
|
222
|
+
"""
|
|
223
|
+
main_config = {}
|
|
224
|
+
internal_config = {}
|
|
225
|
+
for k, v in config.items():
|
|
226
|
+
if k in InternalConfig.KEYWORDS:
|
|
227
|
+
internal_config[k] = v
|
|
228
|
+
else:
|
|
229
|
+
main_config[k] = v
|
|
230
|
+
return main_config, InternalConfig.parse_obj(internal_config)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import Any, Mapping, Optional
|
|
9
|
+
|
|
10
|
+
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
|
|
11
|
+
from airbyte_cdk.models import Type as MessageType
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SliceLogger(ABC):
|
|
15
|
+
"""
|
|
16
|
+
SliceLogger is an interface that allows us to log slices of data in a uniform way.
|
|
17
|
+
It is responsible for determining whether or not a slice should be logged and for creating the log message.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
SLICE_LOG_PREFIX = "slice:"
|
|
21
|
+
|
|
22
|
+
def create_slice_log_message(self, _slice: Optional[Mapping[str, Any]]) -> AirbyteMessage:
|
|
23
|
+
"""
|
|
24
|
+
Mapping is an interface that can be implemented in various ways. However, json.dumps will just do a `str(<object>)` if
|
|
25
|
+
the slice is a class implementing Mapping. Therefore, we want to cast this as a dict before passing this to json.dump
|
|
26
|
+
"""
|
|
27
|
+
printable_slice = dict(_slice) if _slice else _slice
|
|
28
|
+
return AirbyteMessage(
|
|
29
|
+
type=MessageType.LOG,
|
|
30
|
+
log=AirbyteLogMessage(
|
|
31
|
+
level=Level.INFO,
|
|
32
|
+
message=f"{SliceLogger.SLICE_LOG_PREFIX}{json.dumps(printable_slice, default=str)}",
|
|
33
|
+
),
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def should_log_slice_message(self, logger: logging.Logger) -> bool:
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
:param logger:
|
|
41
|
+
:return:
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class DebugSliceLogger(SliceLogger):
|
|
46
|
+
def should_log_slice_message(self, logger: logging.Logger) -> bool:
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
:param logger:
|
|
50
|
+
:return:
|
|
51
|
+
"""
|
|
52
|
+
return logger.isEnabledFor(logging.DEBUG)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class AlwaysLogSliceLogger(SliceLogger):
|
|
56
|
+
def should_log_slice_message(self, logger: logging.Logger) -> bool:
|
|
57
|
+
return True
|