airbyte-cdk 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +358 -0
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +104 -0
- airbyte_cdk/connector.py +123 -0
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/__init__.py +3 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
- airbyte_cdk/connector_builder/main.py +107 -0
- airbyte_cdk/connector_builder/models.py +73 -0
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +83 -0
- airbyte_cdk/destinations/__init__.py +8 -0
- airbyte_cdk/destinations/destination.py +154 -0
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
- airbyte_cdk/destinations/vector_db_based/config.py +298 -0
- airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
- airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
- airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
- airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
- airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
- airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
- airbyte_cdk/entrypoint.py +414 -0
- airbyte_cdk/exception_handler.py +56 -0
- airbyte_cdk/logger.py +109 -0
- airbyte_cdk/models/__init__.py +72 -0
- airbyte_cdk/models/airbyte_protocol.py +88 -0
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/well_known_types.py +5 -0
- airbyte_cdk/py.typed +0 -0
- airbyte_cdk/sources/__init__.py +26 -0
- airbyte_cdk/sources/abstract_source.py +326 -0
- airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
- airbyte_cdk/sources/config.py +27 -0
- airbyte_cdk/sources/connector_state_manager.py +161 -0
- airbyte_cdk/sources/declarative/__init__.py +3 -0
- airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
- airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
- airbyte_cdk/sources/declarative/auth/token.py +267 -0
- airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
- airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
- airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
- airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
- airbyte_cdk/sources/declarative/declarative_source.py +36 -0
- airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
- airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
- airbyte_cdk/sources/declarative/exceptions.py +9 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
- airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
- airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
- airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
- airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
- airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
- airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
- airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
- airbyte_cdk/sources/declarative/spec/spec.py +48 -0
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
- airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
- airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
- airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
- airbyte_cdk/sources/declarative/types.py +25 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
- airbyte_cdk/sources/file_based/config/__init__.py +0 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
- airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
- airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
- airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
- airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
- airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
- airbyte_cdk/sources/file_based/exceptions.py +159 -0
- airbyte_cdk/sources/file_based/file_based_source.py +466 -0
- airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
- airbyte_cdk/sources/file_based/file_record_data.py +22 -0
- airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
- airbyte_cdk/sources/file_based/remote_file.py +18 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
- airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
- airbyte_cdk/sources/file_based/types.py +10 -0
- airbyte_cdk/sources/http_config.py +10 -0
- airbyte_cdk/sources/http_logger.py +55 -0
- airbyte_cdk/sources/message/__init__.py +19 -0
- airbyte_cdk/sources/message/repository.py +137 -0
- airbyte_cdk/sources/source.py +95 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/__init__.py +8 -0
- airbyte_cdk/sources/streams/availability_strategy.py +84 -0
- airbyte_cdk/sources/streams/call_rate.py +704 -0
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
- airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
- airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
- airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/core.py +703 -0
- airbyte_cdk/sources/streams/http/__init__.py +10 -0
- airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +61 -0
- airbyte_cdk/sources/streams/http/http.py +673 -0
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/sources/streams/utils/__init__.py +3 -0
- airbyte_cdk/sources/types.py +169 -0
- airbyte_cdk/sources/utils/__init__.py +7 -0
- airbyte_cdk/sources/utils/casing.py +12 -0
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +53 -0
- airbyte_cdk/sources/utils/schema_helpers.py +230 -0
- airbyte_cdk/sources/utils/slice_logger.py +57 -0
- airbyte_cdk/sources/utils/transform.py +277 -0
- airbyte_cdk/sources/utils/types.py +7 -0
- airbyte_cdk/sql/__init__.py +0 -0
- airbyte_cdk/sql/_util/__init__.py +0 -0
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/__init__.py +7 -0
- airbyte_cdk/test/catalog_builder.py +81 -0
- airbyte_cdk/test/entrypoint_wrapper.py +250 -0
- airbyte_cdk/test/mock_http/__init__.py +6 -0
- airbyte_cdk/test/mock_http/matcher.py +41 -0
- airbyte_cdk/test/mock_http/mocker.py +185 -0
- airbyte_cdk/test/mock_http/request.py +103 -0
- airbyte_cdk/test/mock_http/response.py +28 -0
- airbyte_cdk/test/mock_http/response_builder.py +237 -0
- airbyte_cdk/test/state_builder.py +33 -0
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +10 -0
- airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
- airbyte_cdk/utils/analytics_message.py +25 -0
- airbyte_cdk/utils/constants.py +5 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/event_timing.py +85 -0
- airbyte_cdk/utils/is_cloud_environment.py +18 -0
- airbyte_cdk/utils/mapping_helpers.py +162 -0
- airbyte_cdk/utils/message_utils.py +26 -0
- airbyte_cdk/utils/oneof_option_config.py +33 -0
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +270 -0
- airbyte_cdk/utils/slice_hasher.py +37 -0
- airbyte_cdk/utils/spec_schema_transformations.py +26 -0
- airbyte_cdk/utils/stream_status_utils.py +43 -0
- airbyte_cdk/utils/traced_exception.py +145 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
- airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
- airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
- airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
- airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from enum import Flag, auto
|
|
7
|
+
from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast
|
|
8
|
+
|
|
9
|
+
from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators
|
|
10
|
+
|
|
11
|
+
MAX_NESTING_DEPTH = 3
|
|
12
|
+
json_to_python_simple = {
|
|
13
|
+
"string": str,
|
|
14
|
+
"number": float,
|
|
15
|
+
"integer": int,
|
|
16
|
+
"boolean": bool,
|
|
17
|
+
"null": type(None),
|
|
18
|
+
}
|
|
19
|
+
json_to_python = {**json_to_python_simple, **{"object": dict, "array": list}}
|
|
20
|
+
python_to_json = {v: k for k, v in json_to_python.items()}
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger("airbyte")
|
|
23
|
+
|
|
24
|
+
_TRUTHY_STRINGS = ("y", "yes", "t", "true", "on", "1")
|
|
25
|
+
_FALSEY_STRINGS = ("n", "no", "f", "false", "off", "0")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _strtobool(value: str, /) -> int:
|
|
29
|
+
"""Mimic the behavior of distutils.util.strtobool.
|
|
30
|
+
|
|
31
|
+
From: https://docs.python.org/2/distutils/apiref.html#distutils.util.strtobool
|
|
32
|
+
|
|
33
|
+
> Convert a string representation of truth to true (1) or false (0).
|
|
34
|
+
> True values are y, yes, t, true, on and 1; false values are n, no, f, false, off and 0. Raises
|
|
35
|
+
> `ValueError` if val is anything else.
|
|
36
|
+
"""
|
|
37
|
+
normalized_str = value.lower().strip()
|
|
38
|
+
if normalized_str in _TRUTHY_STRINGS:
|
|
39
|
+
return 1
|
|
40
|
+
|
|
41
|
+
if normalized_str in _FALSEY_STRINGS:
|
|
42
|
+
return 0
|
|
43
|
+
|
|
44
|
+
raise ValueError(f"Invalid boolean value: {normalized_str}")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class TransformConfig(Flag):
|
|
48
|
+
"""
|
|
49
|
+
TypeTransformer class config. Configs can be combined using bitwise or operator e.g.
|
|
50
|
+
```
|
|
51
|
+
TransformConfig.DefaultSchemaNormalization | TransformConfig.CustomSchemaNormalization
|
|
52
|
+
```
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
# No action taken, default behavior. Cannot be combined with any other options.
|
|
56
|
+
NoTransform = auto()
|
|
57
|
+
# Applies default type casting with default_convert method which converts
|
|
58
|
+
# values by applying simple type casting to specified jsonschema type.
|
|
59
|
+
DefaultSchemaNormalization = auto()
|
|
60
|
+
# Allow registering custom type transformation callback. Can be combined
|
|
61
|
+
# with DefaultSchemaNormalization. In this case default type casting would
|
|
62
|
+
# be applied before custom one.
|
|
63
|
+
CustomSchemaNormalization = auto()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TypeTransformer:
|
|
67
|
+
"""
|
|
68
|
+
Class for transforming object before output.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
_custom_normalizer: Optional[Callable[[Any, Dict[str, Any]], Any]] = None
|
|
72
|
+
|
|
73
|
+
def __init__(self, config: TransformConfig):
|
|
74
|
+
"""
|
|
75
|
+
Initialize TypeTransformer instance.
|
|
76
|
+
:param config Transform config that would be applied to object
|
|
77
|
+
"""
|
|
78
|
+
if TransformConfig.NoTransform in config and config != TransformConfig.NoTransform:
|
|
79
|
+
raise Exception("NoTransform option cannot be combined with other flags.")
|
|
80
|
+
self._config = config
|
|
81
|
+
all_validators = {
|
|
82
|
+
key: self.__get_normalizer(key, orig_validator)
|
|
83
|
+
for key, orig_validator in Draft7Validator.VALIDATORS.items()
|
|
84
|
+
# Do not validate field we do not transform for maximum performance.
|
|
85
|
+
if key in ["type", "array", "$ref", "properties", "items"]
|
|
86
|
+
}
|
|
87
|
+
self._normalizer = validators.create(
|
|
88
|
+
meta_schema=Draft7Validator.META_SCHEMA, validators=all_validators
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def registerCustomTransform(
|
|
92
|
+
self, normalization_callback: Callable[[Any, dict[str, Any]], Any]
|
|
93
|
+
) -> Callable[[Any, dict[str, Any]], Any]:
|
|
94
|
+
"""
|
|
95
|
+
Register custom normalization callback.
|
|
96
|
+
:param normalization_callback function to be used for value
|
|
97
|
+
normalization. Takes original value and part type schema. Should return
|
|
98
|
+
normalized value. See docs/connector-development/cdk-python/schemas.md
|
|
99
|
+
for details.
|
|
100
|
+
:return Same callback, this is useful for using registerCustomTransform function as decorator.
|
|
101
|
+
"""
|
|
102
|
+
if TransformConfig.CustomSchemaNormalization not in self._config:
|
|
103
|
+
raise Exception(
|
|
104
|
+
"Please set TransformConfig.CustomSchemaNormalization config before registering custom normalizer"
|
|
105
|
+
)
|
|
106
|
+
self._custom_normalizer = normalization_callback
|
|
107
|
+
return normalization_callback
|
|
108
|
+
|
|
109
|
+
def __normalize(self, original_item: Any, subschema: Dict[str, Any]) -> Any:
|
|
110
|
+
"""
|
|
111
|
+
Applies different transform function to object's field according to config.
|
|
112
|
+
:param original_item original value of field.
|
|
113
|
+
:param subschema part of the jsonschema containing field type/format data.
|
|
114
|
+
:return Final field value.
|
|
115
|
+
"""
|
|
116
|
+
if TransformConfig.DefaultSchemaNormalization in self._config:
|
|
117
|
+
original_item = self.default_convert(original_item, subschema)
|
|
118
|
+
|
|
119
|
+
if self._custom_normalizer:
|
|
120
|
+
original_item = self._custom_normalizer(original_item, subschema)
|
|
121
|
+
return original_item
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def default_convert(original_item: Any, subschema: Dict[str, Any]) -> Any:
|
|
125
|
+
"""
|
|
126
|
+
Default transform function that is used when TransformConfig.DefaultSchemaNormalization flag set.
|
|
127
|
+
:param original_item original value of field.
|
|
128
|
+
:param subschema part of the jsonschema containing field type/format data.
|
|
129
|
+
:return transformed field value.
|
|
130
|
+
"""
|
|
131
|
+
target_type = subschema.get("type", [])
|
|
132
|
+
if original_item is None and "null" in target_type:
|
|
133
|
+
return None
|
|
134
|
+
if isinstance(target_type, list):
|
|
135
|
+
# jsonschema type could either be a single string or array of type
|
|
136
|
+
# strings. In case if there is some disambigous and more than one
|
|
137
|
+
# type (except null) do not do any conversion and return original
|
|
138
|
+
# value. If type array has one type and null i.e. {"type":
|
|
139
|
+
# ["integer", "null"]}, convert value to specified type.
|
|
140
|
+
target_type = [t for t in target_type if t != "null"]
|
|
141
|
+
if len(target_type) != 1:
|
|
142
|
+
return original_item
|
|
143
|
+
target_type = target_type[0]
|
|
144
|
+
try:
|
|
145
|
+
if target_type == "string":
|
|
146
|
+
return str(original_item)
|
|
147
|
+
elif target_type == "number":
|
|
148
|
+
return float(original_item)
|
|
149
|
+
elif target_type == "integer":
|
|
150
|
+
return int(original_item)
|
|
151
|
+
elif target_type == "boolean":
|
|
152
|
+
if isinstance(original_item, str):
|
|
153
|
+
return _strtobool(original_item) == 1
|
|
154
|
+
return bool(original_item)
|
|
155
|
+
elif target_type == "array":
|
|
156
|
+
item_types = set(subschema.get("items", {}).get("type", set()))
|
|
157
|
+
if (
|
|
158
|
+
item_types.issubset(json_to_python_simple)
|
|
159
|
+
and type(original_item) in json_to_python_simple.values()
|
|
160
|
+
):
|
|
161
|
+
return [original_item]
|
|
162
|
+
except (ValueError, TypeError):
|
|
163
|
+
return original_item
|
|
164
|
+
return original_item
|
|
165
|
+
|
|
166
|
+
def __get_normalizer(
|
|
167
|
+
self,
|
|
168
|
+
schema_key: str,
|
|
169
|
+
original_validator: Callable, # type: ignore[type-arg]
|
|
170
|
+
) -> Callable[[Any, Any, Any, dict[str, Any]], Generator[Any, Any, None]]:
|
|
171
|
+
"""
|
|
172
|
+
Traverse through object fields using native jsonschema validator and apply normalization function.
|
|
173
|
+
:param schema_key related json schema key that currently being validated/normalized.
|
|
174
|
+
:original_validator: native jsonschema validator callback.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
def normalizator(
|
|
178
|
+
validator_instance: Validator,
|
|
179
|
+
property_value: Any,
|
|
180
|
+
instance: Any,
|
|
181
|
+
schema: Dict[str, Any],
|
|
182
|
+
) -> Generator[Any, Any, None]:
|
|
183
|
+
"""
|
|
184
|
+
Jsonschema validator callable it uses for validating instance. We
|
|
185
|
+
override default Draft7Validator to perform value transformation
|
|
186
|
+
before validation take place. We do not take any action except
|
|
187
|
+
logging warn if object does not conform to json schema, just using
|
|
188
|
+
jsonschema algorithm to traverse through object fields.
|
|
189
|
+
Look
|
|
190
|
+
https://python-jsonschema.readthedocs.io/en/stable/creating/?highlight=validators.create#jsonschema.validators.create
|
|
191
|
+
validators parameter for detailed description.
|
|
192
|
+
:
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
def resolve(subschema: dict[str, Any]) -> dict[str, Any]:
|
|
196
|
+
if "$ref" in subschema:
|
|
197
|
+
_, resolved = cast(
|
|
198
|
+
RefResolver,
|
|
199
|
+
validator_instance.resolver,
|
|
200
|
+
).resolve(subschema["$ref"])
|
|
201
|
+
return cast(dict[str, Any], resolved)
|
|
202
|
+
return subschema
|
|
203
|
+
|
|
204
|
+
# Transform object and array values before running json schema type checking for each element.
|
|
205
|
+
# Recursively normalize every value of the "instance" sub-object,
|
|
206
|
+
# if "instance" is an incorrect type - skip recursive normalization of "instance"
|
|
207
|
+
if schema_key == "properties" and isinstance(instance, dict):
|
|
208
|
+
for k, subschema in property_value.items():
|
|
209
|
+
if k in instance:
|
|
210
|
+
subschema = resolve(subschema)
|
|
211
|
+
instance[k] = self.__normalize(instance[k], subschema)
|
|
212
|
+
# Recursively normalize every item of the "instance" sub-array,
|
|
213
|
+
# if "instance" is an incorrect type - skip recursive normalization of "instance"
|
|
214
|
+
elif schema_key == "items" and isinstance(instance, list):
|
|
215
|
+
subschema = resolve(property_value)
|
|
216
|
+
for index, item in enumerate(instance):
|
|
217
|
+
instance[index] = self.__normalize(item, subschema)
|
|
218
|
+
|
|
219
|
+
# Running native jsonschema traverse algorithm after field normalization is done.
|
|
220
|
+
yield from original_validator(
|
|
221
|
+
validator_instance,
|
|
222
|
+
property_value,
|
|
223
|
+
instance,
|
|
224
|
+
schema,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
return normalizator
|
|
228
|
+
|
|
229
|
+
def transform(
|
|
230
|
+
self,
|
|
231
|
+
record: Dict[str, Any],
|
|
232
|
+
schema: Mapping[str, Any],
|
|
233
|
+
) -> None:
|
|
234
|
+
"""
|
|
235
|
+
Normalize and validate according to config.
|
|
236
|
+
:param record: record instance for normalization/transformation. All modification are done by modifying existent object.
|
|
237
|
+
:param schema: object's jsonschema for normalization.
|
|
238
|
+
"""
|
|
239
|
+
if TransformConfig.NoTransform in self._config:
|
|
240
|
+
return
|
|
241
|
+
normalizer = self._normalizer(schema)
|
|
242
|
+
for e in normalizer.iter_errors(record):
|
|
243
|
+
"""
|
|
244
|
+
just calling normalizer.validate() would throw an exception on
|
|
245
|
+
first validation occurrences and stop processing rest of schema.
|
|
246
|
+
"""
|
|
247
|
+
logger.warning(self.get_error_message(e))
|
|
248
|
+
|
|
249
|
+
def get_error_message(self, e: ValidationError) -> str:
|
|
250
|
+
"""
|
|
251
|
+
Construct a sanitized error message from a ValidationError instance.
|
|
252
|
+
"""
|
|
253
|
+
field_path = ".".join(map(str, e.path))
|
|
254
|
+
type_structure = self._get_type_structure(e.instance)
|
|
255
|
+
|
|
256
|
+
return f"Failed to transform value from type '{type_structure}' to type '{e.validator_value}' at path: '{field_path}'"
|
|
257
|
+
|
|
258
|
+
def _get_type_structure(self, input_data: Any, current_depth: int = 0) -> Any:
|
|
259
|
+
"""
|
|
260
|
+
Get the structure of a given input data for use in error message construction.
|
|
261
|
+
"""
|
|
262
|
+
# Handle null values
|
|
263
|
+
if input_data is None:
|
|
264
|
+
return "null"
|
|
265
|
+
|
|
266
|
+
# Avoid recursing too deep
|
|
267
|
+
if current_depth >= MAX_NESTING_DEPTH:
|
|
268
|
+
return "object" if isinstance(input_data, dict) else python_to_json[type(input_data)]
|
|
269
|
+
|
|
270
|
+
if isinstance(input_data, dict):
|
|
271
|
+
return {
|
|
272
|
+
key: self._get_type_structure(field_value, current_depth + 1)
|
|
273
|
+
for key, field_value in input_data.items()
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
else:
|
|
277
|
+
return python_to_json[type(input_data)]
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
2
|
+
"""Hashing utils for Airbyte."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import hashlib
|
|
7
|
+
from collections.abc import Mapping
|
|
8
|
+
|
|
9
|
+
HASH_SEED = "Airbyte:"
|
|
10
|
+
"""Additional seed for randomizing one-way hashed strings."""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def one_way_hash(
|
|
14
|
+
obj: Mapping[str, str] | list[str] | object,
|
|
15
|
+
/,
|
|
16
|
+
) -> str:
|
|
17
|
+
"""Return a one-way hash of the given string.
|
|
18
|
+
|
|
19
|
+
To ensure a unique domain of hashes, we prepend a seed to the string before hashing.
|
|
20
|
+
"""
|
|
21
|
+
string_to_hash: str
|
|
22
|
+
if isinstance(obj, Mapping):
|
|
23
|
+
# Recursively sort and convert nested dictionaries to tuples of key-value pairs
|
|
24
|
+
string_to_hash = str(sorted((k, one_way_hash(v)) for k, v in obj.items()))
|
|
25
|
+
|
|
26
|
+
elif isinstance(obj, list):
|
|
27
|
+
# Recursively hash elements of the list
|
|
28
|
+
string_to_hash = str([one_way_hash(item) for item in obj])
|
|
29
|
+
|
|
30
|
+
else:
|
|
31
|
+
# Convert the object to a string
|
|
32
|
+
string_to_hash = str(obj)
|
|
33
|
+
|
|
34
|
+
return hashlib.sha256((HASH_SEED + str(string_to_hash)).encode()).hexdigest()
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
2
|
+
"""Name normalizer classes."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import abc
|
|
7
|
+
import functools
|
|
8
|
+
import re
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
from airbyte_cdk.sql import exceptions as exc
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Iterable
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class NameNormalizerBase(abc.ABC):
|
|
18
|
+
"""Abstract base class for name normalizers."""
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
@abc.abstractmethod
|
|
22
|
+
def normalize(name: str) -> str:
|
|
23
|
+
"""Return the normalized name."""
|
|
24
|
+
...
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def normalize_set(cls, str_iter: Iterable[str]) -> set[str]:
|
|
28
|
+
"""Converts string iterable to a set of lower case strings."""
|
|
29
|
+
return {cls.normalize(s) for s in str_iter}
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def normalize_list(cls, str_iter: Iterable[str]) -> list[str]:
|
|
33
|
+
"""Converts string iterable to a list of lower case strings."""
|
|
34
|
+
return [cls.normalize(s) for s in str_iter]
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def check_matched(cls, name1: str, name2: str) -> bool:
|
|
38
|
+
"""Return True if the two names match after each is normalized."""
|
|
39
|
+
return cls.normalize(name1) == cls.normalize(name2)
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def check_normalized(cls, name: str) -> bool:
|
|
43
|
+
"""Return True if the name is already normalized."""
|
|
44
|
+
return cls.normalize(name) == name
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class LowerCaseNormalizer(NameNormalizerBase):
|
|
48
|
+
"""A name normalizer that converts names to lower case."""
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
@functools.cache
|
|
52
|
+
def normalize(name: str) -> str:
|
|
53
|
+
"""Return the normalized name.
|
|
54
|
+
|
|
55
|
+
- All non-alphanumeric characters are replaced with underscores.
|
|
56
|
+
- Any names that start with a numeric ("1", "2", "123", "1b" etc.) are prefixed
|
|
57
|
+
with and underscore ("_1", "_2", "_123", "_1b" etc.)
|
|
58
|
+
|
|
59
|
+
Examples:
|
|
60
|
+
- "Hello World!" -> "hello_world"
|
|
61
|
+
- "Hello, World!" -> "hello__world"
|
|
62
|
+
- "Hello - World" -> "hello___world"
|
|
63
|
+
- "___Hello, World___" -> "___hello__world___"
|
|
64
|
+
- "Average Sales (%)" -> "average_sales____"
|
|
65
|
+
- "Average Sales (#)" -> "average_sales____"
|
|
66
|
+
- "+1" -> "_1"
|
|
67
|
+
- "-1" -> "_1"
|
|
68
|
+
"""
|
|
69
|
+
result = name
|
|
70
|
+
|
|
71
|
+
# Replace all non-alphanumeric characters with underscores.
|
|
72
|
+
result = re.sub("[^A-Za-z0-9]", "_", result.lower())
|
|
73
|
+
|
|
74
|
+
# Check if name starts with a number and prepend "_" if it does.
|
|
75
|
+
if result and result[0].isdigit():
|
|
76
|
+
# Most databases do not allow identifiers to start with a number.
|
|
77
|
+
result = f"_{result}"
|
|
78
|
+
|
|
79
|
+
if not result.replace("_", ""):
|
|
80
|
+
raise exc.AirbyteNameNormalizationError(
|
|
81
|
+
message="Name cannot be empty after normalization.",
|
|
82
|
+
raw_name=name,
|
|
83
|
+
normalization_result=result,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
return result
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
__all__ = [
|
|
90
|
+
"NameNormalizerBase",
|
|
91
|
+
"LowerCaseNormalizer",
|
|
92
|
+
]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
2
|
+
"""Constants shared across the Airbyte codebase."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
DEBUG_MODE = False # Set to True to enable additional debug logging.
|
|
7
|
+
|
|
8
|
+
AB_EXTRACTED_AT_COLUMN = "_airbyte_extracted_at"
|
|
9
|
+
"""A column that stores the timestamp when the record was extracted."""
|
|
10
|
+
|
|
11
|
+
AB_META_COLUMN = "_airbyte_meta"
|
|
12
|
+
"""A column that stores metadata about the record."""
|
|
13
|
+
|
|
14
|
+
AB_RAW_ID_COLUMN = "_airbyte_raw_id"
|
|
15
|
+
"""A column that stores a unique identifier for each row in the source data.
|
|
16
|
+
|
|
17
|
+
Note: The interpretation of this column is slightly different from in Airbyte Dv2 destinations.
|
|
18
|
+
In Airbyte Dv2 destinations, this column points to a row in a separate 'raw' table. In Airbyte,
|
|
19
|
+
this column is simply used as a unique identifier for each record as it is received.
|
|
20
|
+
|
|
21
|
+
Airbyte uses ULIDs for this column, which are identifiers that can be sorted by time
|
|
22
|
+
received. This allows us to determine the debug the order of records as they are received, even if
|
|
23
|
+
the source provides records that are tied or received out of order from the perspective of their
|
|
24
|
+
`emitted_at` (`_airbyte_extracted_at`) timestamps.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
AB_INTERNAL_COLUMNS = {
|
|
28
|
+
AB_RAW_ID_COLUMN,
|
|
29
|
+
AB_EXTRACTED_AT_COLUMN,
|
|
30
|
+
AB_META_COLUMN,
|
|
31
|
+
}
|
|
32
|
+
"""A set of internal columns that are reserved for Airbyte's internal use."""
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
2
|
+
|
|
3
|
+
"""All exceptions used in Airbyte.
|
|
4
|
+
|
|
5
|
+
This design is modeled after structlog's exceptions, in that we bias towards auto-generated
|
|
6
|
+
property prints rather than sentence-like string concatenation.
|
|
7
|
+
|
|
8
|
+
E.g. Instead of this:
|
|
9
|
+
|
|
10
|
+
> `Subprocess failed with exit code '1'`
|
|
11
|
+
|
|
12
|
+
We do this:
|
|
13
|
+
|
|
14
|
+
> `Subprocess failed. (exit_code=1)`
|
|
15
|
+
|
|
16
|
+
The benefit of this approach is that we can easily support structured logging, and we can
|
|
17
|
+
easily add new properties to exceptions without having to update all the places where they
|
|
18
|
+
are raised. We can also support any arbitrary number of properties in exceptions, without spending
|
|
19
|
+
time on building sentence-like string constructions with optional inputs.
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
In addition, the following principles are applied for exception class design:
|
|
23
|
+
|
|
24
|
+
- All exceptions inherit from a common base class.
|
|
25
|
+
- All exceptions have a message attribute.
|
|
26
|
+
- The first line of the docstring is used as the default message.
|
|
27
|
+
- The default message can be overridden by explicitly setting the message attribute.
|
|
28
|
+
- Exceptions may optionally have a guidance attribute.
|
|
29
|
+
- Exceptions may optionally have a help_url attribute.
|
|
30
|
+
- Rendering is automatically handled by the base class.
|
|
31
|
+
- Any helpful context not defined by the exception class can be passed in the `context` dict arg.
|
|
32
|
+
- Within reason, avoid sending PII to the exception constructor.
|
|
33
|
+
- Exceptions are dataclasses, so they can be instantiated with keyword arguments.
|
|
34
|
+
- Use the 'from' syntax to chain exceptions when it is helpful to do so.
|
|
35
|
+
E.g. `raise AirbyteConnectorNotFoundError(...) from FileNotFoundError(connector_path)`
|
|
36
|
+
- Any exception that adds a new property should also be decorated as `@dataclass`.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
from __future__ import annotations
|
|
40
|
+
|
|
41
|
+
import logging
|
|
42
|
+
from dataclasses import dataclass
|
|
43
|
+
from pathlib import Path
|
|
44
|
+
from textwrap import indent
|
|
45
|
+
from typing import Any
|
|
46
|
+
|
|
47
|
+
NEW_ISSUE_URL = "https://github.com/airbytehq/airbyte/issues/new/choose"
|
|
48
|
+
DOCS_URL_BASE = "https://https://docs.airbyte.com/"
|
|
49
|
+
DOCS_URL = f"{DOCS_URL_BASE}/airbyte.html"
|
|
50
|
+
|
|
51
|
+
VERTICAL_SEPARATOR = "\n" + "-" * 60
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# Base error class
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class AirbyteError(Exception):
|
|
59
|
+
"""Base class for exceptions in Airbyte."""
|
|
60
|
+
|
|
61
|
+
guidance: str | None = None
|
|
62
|
+
help_url: str | None = None
|
|
63
|
+
log_text: str | list[str] | None = None
|
|
64
|
+
log_file: Path | None = None
|
|
65
|
+
context: dict[str, Any] | None = None
|
|
66
|
+
message: str | None = None
|
|
67
|
+
original_exception: Exception | None = None
|
|
68
|
+
|
|
69
|
+
def get_message(self) -> str:
|
|
70
|
+
"""Return the best description for the exception.
|
|
71
|
+
|
|
72
|
+
We resolve the following in order:
|
|
73
|
+
1. The message sent to the exception constructor (if provided).
|
|
74
|
+
2. The first line of the class's docstring.
|
|
75
|
+
"""
|
|
76
|
+
if self.message:
|
|
77
|
+
return self.message
|
|
78
|
+
|
|
79
|
+
return self.__doc__.split("\n")[0] if self.__doc__ else ""
|
|
80
|
+
|
|
81
|
+
def __str__(self) -> str:
|
|
82
|
+
"""Return a string representation of the exception."""
|
|
83
|
+
special_properties = [
|
|
84
|
+
"message",
|
|
85
|
+
"guidance",
|
|
86
|
+
"help_url",
|
|
87
|
+
"log_text",
|
|
88
|
+
"context",
|
|
89
|
+
"log_file",
|
|
90
|
+
"original_exception",
|
|
91
|
+
]
|
|
92
|
+
display_properties = {
|
|
93
|
+
k: v
|
|
94
|
+
for k, v in self.__dict__.items()
|
|
95
|
+
if k not in special_properties and not k.startswith("_") and v is not None
|
|
96
|
+
}
|
|
97
|
+
display_properties.update(self.context or {})
|
|
98
|
+
context_str = "\n ".join(
|
|
99
|
+
f"{str(k).replace('_', ' ').title()}: {v!r}" for k, v in display_properties.items()
|
|
100
|
+
)
|
|
101
|
+
exception_str = (
|
|
102
|
+
f"{self.get_message()} ({self.__class__.__name__})"
|
|
103
|
+
+ VERTICAL_SEPARATOR
|
|
104
|
+
+ f"\n{self.__class__.__name__}: {self.get_message()}"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if self.guidance:
|
|
108
|
+
exception_str += f"\n {self.guidance}"
|
|
109
|
+
|
|
110
|
+
if self.help_url:
|
|
111
|
+
exception_str += f"\n More info: {self.help_url}"
|
|
112
|
+
|
|
113
|
+
if context_str:
|
|
114
|
+
exception_str += "\n " + context_str
|
|
115
|
+
|
|
116
|
+
if self.log_file:
|
|
117
|
+
exception_str += f"\n Log file: {self.log_file.absolute()!s}"
|
|
118
|
+
|
|
119
|
+
if self.log_text:
|
|
120
|
+
if isinstance(self.log_text, list):
|
|
121
|
+
self.log_text = "\n".join(self.log_text)
|
|
122
|
+
|
|
123
|
+
exception_str += f"\n Log output: \n {indent(self.log_text, ' ')}"
|
|
124
|
+
|
|
125
|
+
if self.original_exception:
|
|
126
|
+
exception_str += VERTICAL_SEPARATOR + f"\nCaused by: {self.original_exception!s}"
|
|
127
|
+
|
|
128
|
+
return exception_str
|
|
129
|
+
|
|
130
|
+
def __repr__(self) -> str:
|
|
131
|
+
"""Return a string representation of the exception."""
|
|
132
|
+
class_name = self.__class__.__name__
|
|
133
|
+
properties_str = ", ".join(
|
|
134
|
+
f"{k}={v!r}" for k, v in self.__dict__.items() if not k.startswith("_")
|
|
135
|
+
)
|
|
136
|
+
return f"{class_name}({properties_str})"
|
|
137
|
+
|
|
138
|
+
def safe_logging_dict(self) -> dict[str, Any]:
|
|
139
|
+
"""Return a dictionary of the exception's properties which is safe for logging.
|
|
140
|
+
|
|
141
|
+
We avoid any properties which could potentially contain PII.
|
|
142
|
+
"""
|
|
143
|
+
result = {
|
|
144
|
+
# The class name is safe to log:
|
|
145
|
+
"class": self.__class__.__name__,
|
|
146
|
+
# We discourage interpolated strings in 'message' so that this should never contain PII:
|
|
147
|
+
"message": self.get_message(),
|
|
148
|
+
}
|
|
149
|
+
safe_attrs = ["connector_name", "stream_name", "violation", "exit_code"]
|
|
150
|
+
for attr in safe_attrs:
|
|
151
|
+
if hasattr(self, attr):
|
|
152
|
+
result[attr] = getattr(self, attr)
|
|
153
|
+
|
|
154
|
+
return result
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# Airbyte Internal Errors (these are probably bugs)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@dataclass
|
|
161
|
+
class AirbyteInternalError(AirbyteError):
|
|
162
|
+
"""An internal error occurred in Airbyte."""
|
|
163
|
+
|
|
164
|
+
guidance = "Please consider reporting this error to the Airbyte team."
|
|
165
|
+
help_url = NEW_ISSUE_URL
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# Airbyte Input Errors (replaces ValueError for user input)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@dataclass
|
|
172
|
+
class AirbyteInputError(AirbyteError, ValueError):
|
|
173
|
+
"""The input provided to Airbyte did not match expected validation rules.
|
|
174
|
+
|
|
175
|
+
This inherits from ValueError so that it can be used as a drop-in replacement for
|
|
176
|
+
ValueError in the Airbyte API.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
guidance = "Please check the provided value and try again."
|
|
180
|
+
help_url = DOCS_URL
|
|
181
|
+
input_value: str | None = None
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# Normalization Errors
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@dataclass
|
|
188
|
+
class AirbyteNameNormalizationError(AirbyteError, ValueError):
|
|
189
|
+
"""Error occurred while normalizing a table or column name."""
|
|
190
|
+
|
|
191
|
+
guidance = (
|
|
192
|
+
"Please consider renaming the source object if possible, or "
|
|
193
|
+
"raise an issue in GitHub if not."
|
|
194
|
+
)
|
|
195
|
+
help_url = NEW_ISSUE_URL
|
|
196
|
+
|
|
197
|
+
raw_name: str | None = None
|
|
198
|
+
normalization_result: str | None = None
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
@dataclass
|
|
202
|
+
class AirbyteConnectorError(AirbyteError):
|
|
203
|
+
"""Error when running the connector."""
|
|
204
|
+
|
|
205
|
+
connector_name: str | None = None
|
|
206
|
+
|
|
207
|
+
def __post_init__(self) -> None:
|
|
208
|
+
"""Set the log file path for the connector."""
|
|
209
|
+
self.log_file = self._get_log_file()
|
|
210
|
+
if not self.guidance and self.log_file:
|
|
211
|
+
self.guidance = "Please review the log file for more information."
|
|
212
|
+
|
|
213
|
+
def _get_log_file(self) -> Path | None:
|
|
214
|
+
"""Return the log file path for the connector."""
|
|
215
|
+
if self.connector_name:
|
|
216
|
+
logger = logging.getLogger(f"airbyte.{self.connector_name}")
|
|
217
|
+
|
|
218
|
+
log_paths: list[Path] = [
|
|
219
|
+
Path(handler.baseFilename).absolute()
|
|
220
|
+
for handler in logger.handlers
|
|
221
|
+
if isinstance(handler, logging.FileHandler)
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
if log_paths:
|
|
225
|
+
return log_paths[0]
|
|
226
|
+
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
@dataclass
|
|
231
|
+
class AirbyteStreamNotFoundError(AirbyteConnectorError):
|
|
232
|
+
"""Connector stream not found."""
|
|
233
|
+
|
|
234
|
+
stream_name: str | None = None
|
|
235
|
+
available_streams: list[str] | None = None
|