airbyte-cdk 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +358 -0
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +104 -0
- airbyte_cdk/connector.py +123 -0
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/__init__.py +3 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
- airbyte_cdk/connector_builder/main.py +107 -0
- airbyte_cdk/connector_builder/models.py +73 -0
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +83 -0
- airbyte_cdk/destinations/__init__.py +8 -0
- airbyte_cdk/destinations/destination.py +154 -0
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
- airbyte_cdk/destinations/vector_db_based/config.py +298 -0
- airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
- airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
- airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
- airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
- airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
- airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
- airbyte_cdk/entrypoint.py +414 -0
- airbyte_cdk/exception_handler.py +56 -0
- airbyte_cdk/logger.py +109 -0
- airbyte_cdk/models/__init__.py +72 -0
- airbyte_cdk/models/airbyte_protocol.py +88 -0
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/well_known_types.py +5 -0
- airbyte_cdk/py.typed +0 -0
- airbyte_cdk/sources/__init__.py +26 -0
- airbyte_cdk/sources/abstract_source.py +326 -0
- airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
- airbyte_cdk/sources/config.py +27 -0
- airbyte_cdk/sources/connector_state_manager.py +161 -0
- airbyte_cdk/sources/declarative/__init__.py +3 -0
- airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
- airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
- airbyte_cdk/sources/declarative/auth/token.py +267 -0
- airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
- airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
- airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
- airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
- airbyte_cdk/sources/declarative/declarative_source.py +36 -0
- airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
- airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
- airbyte_cdk/sources/declarative/exceptions.py +9 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
- airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
- airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
- airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
- airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
- airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
- airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
- airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
- airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
- airbyte_cdk/sources/declarative/spec/spec.py +48 -0
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
- airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
- airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
- airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
- airbyte_cdk/sources/declarative/types.py +25 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
- airbyte_cdk/sources/file_based/config/__init__.py +0 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
- airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
- airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
- airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
- airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
- airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
- airbyte_cdk/sources/file_based/exceptions.py +159 -0
- airbyte_cdk/sources/file_based/file_based_source.py +466 -0
- airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
- airbyte_cdk/sources/file_based/file_record_data.py +22 -0
- airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
- airbyte_cdk/sources/file_based/remote_file.py +18 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
- airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
- airbyte_cdk/sources/file_based/types.py +10 -0
- airbyte_cdk/sources/http_config.py +10 -0
- airbyte_cdk/sources/http_logger.py +55 -0
- airbyte_cdk/sources/message/__init__.py +19 -0
- airbyte_cdk/sources/message/repository.py +137 -0
- airbyte_cdk/sources/source.py +95 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/__init__.py +8 -0
- airbyte_cdk/sources/streams/availability_strategy.py +84 -0
- airbyte_cdk/sources/streams/call_rate.py +704 -0
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
- airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
- airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
- airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/core.py +703 -0
- airbyte_cdk/sources/streams/http/__init__.py +10 -0
- airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +61 -0
- airbyte_cdk/sources/streams/http/http.py +673 -0
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/sources/streams/utils/__init__.py +3 -0
- airbyte_cdk/sources/types.py +169 -0
- airbyte_cdk/sources/utils/__init__.py +7 -0
- airbyte_cdk/sources/utils/casing.py +12 -0
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +53 -0
- airbyte_cdk/sources/utils/schema_helpers.py +230 -0
- airbyte_cdk/sources/utils/slice_logger.py +57 -0
- airbyte_cdk/sources/utils/transform.py +277 -0
- airbyte_cdk/sources/utils/types.py +7 -0
- airbyte_cdk/sql/__init__.py +0 -0
- airbyte_cdk/sql/_util/__init__.py +0 -0
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/__init__.py +7 -0
- airbyte_cdk/test/catalog_builder.py +81 -0
- airbyte_cdk/test/entrypoint_wrapper.py +250 -0
- airbyte_cdk/test/mock_http/__init__.py +6 -0
- airbyte_cdk/test/mock_http/matcher.py +41 -0
- airbyte_cdk/test/mock_http/mocker.py +185 -0
- airbyte_cdk/test/mock_http/request.py +103 -0
- airbyte_cdk/test/mock_http/response.py +28 -0
- airbyte_cdk/test/mock_http/response_builder.py +237 -0
- airbyte_cdk/test/state_builder.py +33 -0
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +10 -0
- airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
- airbyte_cdk/utils/analytics_message.py +25 -0
- airbyte_cdk/utils/constants.py +5 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/event_timing.py +85 -0
- airbyte_cdk/utils/is_cloud_environment.py +18 -0
- airbyte_cdk/utils/mapping_helpers.py +162 -0
- airbyte_cdk/utils/message_utils.py +26 -0
- airbyte_cdk/utils/oneof_option_config.py +33 -0
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +270 -0
- airbyte_cdk/utils/slice_hasher.py +37 -0
- airbyte_cdk/utils/spec_schema_transformations.py +26 -0
- airbyte_cdk/utils/stream_status_utils.py +43 -0
- airbyte_cdk/utils/traced_exception.py +145 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
- airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
- airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
- airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
- airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
- airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# RecordTransformation is depended upon by every class in this module (since it's the abc everything implements). For this reason,
|
|
7
|
+
# the order of imports matters i.e: this file must fully import RecordTransformation before importing anything which depends on RecordTransformation
|
|
8
|
+
# Otherwise there will be a circular dependency (load order will be init.py --> RemoveFields (which tries to import RecordTransformation) -->
|
|
9
|
+
# init.py --> circular dep error, since loading this file causes it to try to import itself down the line.
|
|
10
|
+
# so we add the split directive below to tell isort to sort imports while keeping RecordTransformation as the first import
|
|
11
|
+
from .transformation import RecordTransformation
|
|
12
|
+
|
|
13
|
+
# isort: split
|
|
14
|
+
from .add_fields import AddFields
|
|
15
|
+
from .remove_fields import RemoveFields
|
|
16
|
+
|
|
17
|
+
__all__ = ["AddFields", "RecordTransformation", "RemoveFields"]
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from dataclasses import InitVar, dataclass, field
|
|
6
|
+
from typing import Any, Dict, List, Mapping, Optional, Type, Union
|
|
7
|
+
|
|
8
|
+
import dpath
|
|
9
|
+
|
|
10
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
|
11
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
|
12
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
|
13
|
+
from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class AddedFieldDefinition:
|
|
18
|
+
"""Defines the field to add on a record"""
|
|
19
|
+
|
|
20
|
+
path: FieldPointer
|
|
21
|
+
value: Union[InterpolatedString, str]
|
|
22
|
+
value_type: Optional[Type[Any]]
|
|
23
|
+
parameters: InitVar[Mapping[str, Any]]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class ParsedAddFieldDefinition:
|
|
28
|
+
"""Defines the field to add on a record"""
|
|
29
|
+
|
|
30
|
+
path: FieldPointer
|
|
31
|
+
value: InterpolatedString
|
|
32
|
+
value_type: Optional[Type[Any]]
|
|
33
|
+
parameters: InitVar[Mapping[str, Any]]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class AddFields(RecordTransformation):
|
|
38
|
+
"""
|
|
39
|
+
Transformation which adds field to an output record. The path of the added field can be nested. Adding nested fields will create all
|
|
40
|
+
necessary parent objects (like mkdir -p). Adding fields to an array will extend the array to that index (filling intermediate
|
|
41
|
+
indices with null values). So if you add a field at index 5 to the array ["value"], it will become ["value", null, null, null, null,
|
|
42
|
+
"new_value"].
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
This transformation has access to the following contextual values:
|
|
46
|
+
record: the record about to be output by the connector
|
|
47
|
+
config: the input configuration provided to a connector
|
|
48
|
+
stream_state: the current state of the stream
|
|
49
|
+
stream_slice: the current stream slice being read
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
Examples of instantiating this transformation via YAML:
|
|
54
|
+
- type: AddFields
|
|
55
|
+
fields:
|
|
56
|
+
# hardcoded constant
|
|
57
|
+
- path: ["path"]
|
|
58
|
+
value: "static_value"
|
|
59
|
+
|
|
60
|
+
# nested path
|
|
61
|
+
- path: ["path", "to", "field"]
|
|
62
|
+
value: "static"
|
|
63
|
+
|
|
64
|
+
# from config
|
|
65
|
+
- path: ["shop_id"]
|
|
66
|
+
value: "{{ config.shop_id }}"
|
|
67
|
+
|
|
68
|
+
# from stream_interval
|
|
69
|
+
- path: ["date"]
|
|
70
|
+
value: "{{ stream_interval.start_date }}"
|
|
71
|
+
|
|
72
|
+
# from record
|
|
73
|
+
- path: ["unnested_value"]
|
|
74
|
+
value: {{ record.nested.field }}
|
|
75
|
+
|
|
76
|
+
# from stream_slice
|
|
77
|
+
- path: ["start_date"]
|
|
78
|
+
value: {{ stream_slice.start_date }}
|
|
79
|
+
|
|
80
|
+
# by supplying any valid Jinja template directive or expression https://jinja.palletsprojects.com/en/3.1.x/templates/#
|
|
81
|
+
- path: ["two_times_two"]
|
|
82
|
+
value: {{ 2 * 2 }}
|
|
83
|
+
|
|
84
|
+
Attributes:
|
|
85
|
+
fields (List[AddedFieldDefinition]): A list of transformations (path and corresponding value) that will be added to the record
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
fields: List[AddedFieldDefinition]
|
|
89
|
+
parameters: InitVar[Mapping[str, Any]]
|
|
90
|
+
condition: str = ""
|
|
91
|
+
_parsed_fields: List[ParsedAddFieldDefinition] = field(
|
|
92
|
+
init=False, repr=False, default_factory=list
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
|
96
|
+
self._filter_interpolator = InterpolatedBoolean(
|
|
97
|
+
condition=self.condition, parameters=parameters
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
for add_field in self.fields:
|
|
101
|
+
if len(add_field.path) < 1:
|
|
102
|
+
raise ValueError(
|
|
103
|
+
f"Expected a non-zero-length path for the AddFields transformation {add_field}"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
if not isinstance(add_field.value, InterpolatedString):
|
|
107
|
+
if not isinstance(add_field.value, str):
|
|
108
|
+
raise f"Expected a string value for the AddFields transformation: {add_field}"
|
|
109
|
+
else:
|
|
110
|
+
self._parsed_fields.append(
|
|
111
|
+
ParsedAddFieldDefinition(
|
|
112
|
+
add_field.path,
|
|
113
|
+
InterpolatedString.create(add_field.value, parameters=parameters),
|
|
114
|
+
value_type=add_field.value_type,
|
|
115
|
+
parameters=parameters,
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
self._parsed_fields.append(
|
|
120
|
+
ParsedAddFieldDefinition(
|
|
121
|
+
add_field.path,
|
|
122
|
+
add_field.value,
|
|
123
|
+
value_type=add_field.value_type,
|
|
124
|
+
parameters={},
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
def transform(
|
|
129
|
+
self,
|
|
130
|
+
record: Dict[str, Any],
|
|
131
|
+
config: Optional[Config] = None,
|
|
132
|
+
stream_state: Optional[StreamState] = None,
|
|
133
|
+
stream_slice: Optional[StreamSlice] = None,
|
|
134
|
+
) -> None:
|
|
135
|
+
if config is None:
|
|
136
|
+
config = {}
|
|
137
|
+
kwargs = {"record": record, "stream_slice": stream_slice}
|
|
138
|
+
for parsed_field in self._parsed_fields:
|
|
139
|
+
valid_types = (parsed_field.value_type,) if parsed_field.value_type else None
|
|
140
|
+
value = parsed_field.value.eval(config, valid_types=valid_types, **kwargs)
|
|
141
|
+
is_empty_condition = not self.condition
|
|
142
|
+
if is_empty_condition or self._filter_interpolator.eval(config, value=value, **kwargs):
|
|
143
|
+
dpath.new(record, parsed_field.path, value)
|
|
144
|
+
|
|
145
|
+
def __eq__(self, other: Any) -> bool:
|
|
146
|
+
return bool(self.__dict__ == other.__dict__)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from dataclasses import InitVar, dataclass
|
|
2
|
+
from typing import Any, Dict, List, Mapping, Optional, Union
|
|
3
|
+
|
|
4
|
+
import dpath
|
|
5
|
+
|
|
6
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
|
7
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
|
8
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class DpathFlattenFields(RecordTransformation):
|
|
13
|
+
"""
|
|
14
|
+
Flatten fields only for provided path.
|
|
15
|
+
|
|
16
|
+
field_path: List[Union[InterpolatedString, str]] path to the field to flatten.
|
|
17
|
+
delete_origin_value: bool = False whether to delete origin field or keep it. Default is False.
|
|
18
|
+
replace_record: bool = False whether to replace origin record or not. Default is False.
|
|
19
|
+
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
config: Config
|
|
23
|
+
field_path: List[Union[InterpolatedString, str]]
|
|
24
|
+
parameters: InitVar[Mapping[str, Any]]
|
|
25
|
+
delete_origin_value: bool = False
|
|
26
|
+
replace_record: bool = False
|
|
27
|
+
|
|
28
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
|
29
|
+
self._field_path = [
|
|
30
|
+
InterpolatedString.create(path, parameters=parameters) for path in self.field_path
|
|
31
|
+
]
|
|
32
|
+
for path_index in range(len(self.field_path)):
|
|
33
|
+
if isinstance(self.field_path[path_index], str):
|
|
34
|
+
self._field_path[path_index] = InterpolatedString.create(
|
|
35
|
+
self.field_path[path_index], parameters=parameters
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def transform(
|
|
39
|
+
self,
|
|
40
|
+
record: Dict[str, Any],
|
|
41
|
+
config: Optional[Config] = None,
|
|
42
|
+
stream_state: Optional[StreamState] = None,
|
|
43
|
+
stream_slice: Optional[StreamSlice] = None,
|
|
44
|
+
) -> None:
|
|
45
|
+
path = [path.eval(self.config) for path in self._field_path]
|
|
46
|
+
if "*" in path:
|
|
47
|
+
matched = dpath.values(record, path)
|
|
48
|
+
extracted = matched[0] if matched else None
|
|
49
|
+
else:
|
|
50
|
+
extracted = dpath.get(record, path, default=[])
|
|
51
|
+
|
|
52
|
+
if isinstance(extracted, dict):
|
|
53
|
+
if self.replace_record and extracted:
|
|
54
|
+
dpath.delete(record, "**")
|
|
55
|
+
record.update(extracted)
|
|
56
|
+
else:
|
|
57
|
+
conflicts = set(extracted.keys()) & set(record.keys())
|
|
58
|
+
if not conflicts:
|
|
59
|
+
if self.delete_origin_value:
|
|
60
|
+
dpath.delete(record, path)
|
|
61
|
+
record.update(extracted)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, Dict, Optional
|
|
7
|
+
|
|
8
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
|
9
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class FlattenFields(RecordTransformation):
|
|
14
|
+
flatten_lists: bool = True
|
|
15
|
+
|
|
16
|
+
def transform(
|
|
17
|
+
self,
|
|
18
|
+
record: Dict[str, Any],
|
|
19
|
+
config: Optional[Config] = None,
|
|
20
|
+
stream_state: Optional[StreamState] = None,
|
|
21
|
+
stream_slice: Optional[StreamSlice] = None,
|
|
22
|
+
) -> None:
|
|
23
|
+
transformed_record = self.flatten_record(record)
|
|
24
|
+
record.clear()
|
|
25
|
+
record.update(transformed_record)
|
|
26
|
+
|
|
27
|
+
def flatten_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
|
|
28
|
+
stack = [(record, "_")]
|
|
29
|
+
transformed_record: Dict[str, Any] = {}
|
|
30
|
+
force_with_parent_name = False
|
|
31
|
+
|
|
32
|
+
while stack:
|
|
33
|
+
current_record, parent_key = stack.pop()
|
|
34
|
+
|
|
35
|
+
if isinstance(current_record, dict):
|
|
36
|
+
for current_key, value in current_record.items():
|
|
37
|
+
new_key = (
|
|
38
|
+
f"{parent_key}.{current_key}"
|
|
39
|
+
if (current_key in transformed_record or force_with_parent_name)
|
|
40
|
+
else current_key
|
|
41
|
+
)
|
|
42
|
+
stack.append((value, new_key))
|
|
43
|
+
|
|
44
|
+
elif isinstance(current_record, list) and self.flatten_lists:
|
|
45
|
+
for i, item in enumerate(current_record):
|
|
46
|
+
force_with_parent_name = True
|
|
47
|
+
stack.append((item, f"{parent_key}.{i}"))
|
|
48
|
+
|
|
49
|
+
else:
|
|
50
|
+
transformed_record[parent_key] = current_record
|
|
51
|
+
|
|
52
|
+
return transformed_record
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from dataclasses import InitVar, dataclass
|
|
6
|
+
from typing import Any, Dict, Mapping, Optional
|
|
7
|
+
|
|
8
|
+
from airbyte_cdk import InterpolatedString
|
|
9
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
|
10
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class KeysReplaceTransformation(RecordTransformation):
|
|
15
|
+
"""
|
|
16
|
+
Transformation that applies keys names replacement.
|
|
17
|
+
|
|
18
|
+
Example usage:
|
|
19
|
+
- type: KeysReplace
|
|
20
|
+
old: " "
|
|
21
|
+
new: "_"
|
|
22
|
+
Result:
|
|
23
|
+
from: {"created time": ..., "customer id": ..., "user id": ...}
|
|
24
|
+
to: {"created_time": ..., "customer_id": ..., "user_id": ...}
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
old: str
|
|
28
|
+
new: str
|
|
29
|
+
parameters: InitVar[Mapping[str, Any]]
|
|
30
|
+
|
|
31
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
|
32
|
+
self._old = InterpolatedString.create(self.old, parameters=parameters)
|
|
33
|
+
self._new = InterpolatedString.create(self.new, parameters=parameters)
|
|
34
|
+
|
|
35
|
+
def transform(
|
|
36
|
+
self,
|
|
37
|
+
record: Dict[str, Any],
|
|
38
|
+
config: Optional[Config] = None,
|
|
39
|
+
stream_state: Optional[StreamState] = None,
|
|
40
|
+
stream_slice: Optional[StreamSlice] = None,
|
|
41
|
+
) -> None:
|
|
42
|
+
if config is None:
|
|
43
|
+
config = {}
|
|
44
|
+
|
|
45
|
+
kwargs = {"record": record, "stream_state": stream_state, "stream_slice": stream_slice}
|
|
46
|
+
old_key = str(self._old.eval(config, **kwargs))
|
|
47
|
+
new_key = str(self._new.eval(config, **kwargs))
|
|
48
|
+
|
|
49
|
+
def _transform(data: Dict[str, Any]) -> Dict[str, Any]:
|
|
50
|
+
result = {}
|
|
51
|
+
for key, value in data.items():
|
|
52
|
+
updated_key = key.replace(old_key, new_key)
|
|
53
|
+
if isinstance(value, dict):
|
|
54
|
+
result[updated_key] = _transform(value)
|
|
55
|
+
else:
|
|
56
|
+
result[updated_key] = value
|
|
57
|
+
return result
|
|
58
|
+
|
|
59
|
+
transformed_record = _transform(record)
|
|
60
|
+
record.clear()
|
|
61
|
+
record.update(transformed_record)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, Dict, Optional
|
|
7
|
+
|
|
8
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
|
9
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class KeysToLowerTransformation(RecordTransformation):
|
|
14
|
+
def transform(
|
|
15
|
+
self,
|
|
16
|
+
record: Dict[str, Any],
|
|
17
|
+
config: Optional[Config] = None,
|
|
18
|
+
stream_state: Optional[StreamState] = None,
|
|
19
|
+
stream_slice: Optional[StreamSlice] = None,
|
|
20
|
+
) -> None:
|
|
21
|
+
for key in set(record.keys()):
|
|
22
|
+
record[key.lower()] = record.pop(key)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
import anyascii
|
|
10
|
+
|
|
11
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
|
12
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class KeysToSnakeCaseTransformation(RecordTransformation):
|
|
17
|
+
token_pattern: re.Pattern[str] = re.compile(
|
|
18
|
+
r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
def transform(
|
|
22
|
+
self,
|
|
23
|
+
record: Dict[str, Any],
|
|
24
|
+
config: Optional[Config] = None,
|
|
25
|
+
stream_state: Optional[StreamState] = None,
|
|
26
|
+
stream_slice: Optional[StreamSlice] = None,
|
|
27
|
+
) -> None:
|
|
28
|
+
transformed_record = self._transform_record(record)
|
|
29
|
+
record.clear()
|
|
30
|
+
record.update(transformed_record)
|
|
31
|
+
|
|
32
|
+
def _transform_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
|
|
33
|
+
transformed_record = {}
|
|
34
|
+
for key, value in record.items():
|
|
35
|
+
transformed_key = self.process_key(key)
|
|
36
|
+
transformed_value = value
|
|
37
|
+
|
|
38
|
+
if isinstance(value, dict):
|
|
39
|
+
transformed_value = self._transform_record(value)
|
|
40
|
+
|
|
41
|
+
transformed_record[transformed_key] = transformed_value
|
|
42
|
+
return transformed_record
|
|
43
|
+
|
|
44
|
+
def process_key(self, key: str) -> str:
|
|
45
|
+
key = self.normalize_key(key)
|
|
46
|
+
tokens = self.tokenize_key(key)
|
|
47
|
+
tokens = self.filter_tokens(tokens)
|
|
48
|
+
return self.tokens_to_snake_case(tokens)
|
|
49
|
+
|
|
50
|
+
def normalize_key(self, key: str) -> str:
|
|
51
|
+
return str(anyascii.anyascii(key))
|
|
52
|
+
|
|
53
|
+
def tokenize_key(self, key: str) -> List[str]:
|
|
54
|
+
tokens = []
|
|
55
|
+
for match in self.token_pattern.finditer(key):
|
|
56
|
+
token = match.group(0) if match.group("NoToken") is None else ""
|
|
57
|
+
tokens.append(token)
|
|
58
|
+
return tokens
|
|
59
|
+
|
|
60
|
+
def filter_tokens(self, tokens: List[str]) -> List[str]:
|
|
61
|
+
if len(tokens) >= 3:
|
|
62
|
+
tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
|
|
63
|
+
if tokens and tokens[0].isdigit():
|
|
64
|
+
tokens.insert(0, "")
|
|
65
|
+
return tokens
|
|
66
|
+
|
|
67
|
+
def tokens_to_snake_case(self, tokens: List[str]) -> str:
|
|
68
|
+
return "_".join(token.lower() for token in tokens)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from dataclasses import InitVar, dataclass
|
|
6
|
+
from typing import Any, Dict, List, Mapping, Optional
|
|
7
|
+
|
|
8
|
+
import dpath
|
|
9
|
+
import dpath.exceptions
|
|
10
|
+
|
|
11
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
|
12
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
|
13
|
+
from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class RemoveFields(RecordTransformation):
|
|
18
|
+
"""
|
|
19
|
+
A transformation which removes fields from a record. The fields removed are designated using FieldPointers.
|
|
20
|
+
During transformation, if a field or any of its parents does not exist in the record, no error is thrown.
|
|
21
|
+
|
|
22
|
+
If an input field pointer references an item in a list (e.g: ["k", 0] in the object {"k": ["a", "b", "c"]}) then
|
|
23
|
+
the object at that index is set to None rather than being not entirely removed from the list. TODO change this behavior.
|
|
24
|
+
|
|
25
|
+
It's possible to remove objects nested in lists e.g: removing [".", 0, "k"] from {".": [{"k": "V"}]} results in {".": [{}]}
|
|
26
|
+
|
|
27
|
+
Usage syntax:
|
|
28
|
+
|
|
29
|
+
```yaml
|
|
30
|
+
my_stream:
|
|
31
|
+
<other parameters..>
|
|
32
|
+
transformations:
|
|
33
|
+
- type: RemoveFields
|
|
34
|
+
field_pointers:
|
|
35
|
+
- ["path", "to", "field1"]
|
|
36
|
+
- ["path2"]
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
field_pointers (List[FieldPointer]): pointers to the fields that should be removed
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
field_pointers: List[FieldPointer]
|
|
44
|
+
parameters: InitVar[Mapping[str, Any]]
|
|
45
|
+
condition: str = ""
|
|
46
|
+
|
|
47
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
|
48
|
+
self._filter_interpolator = InterpolatedBoolean(
|
|
49
|
+
condition=self.condition, parameters=parameters
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
def transform(
|
|
53
|
+
self,
|
|
54
|
+
record: Dict[str, Any],
|
|
55
|
+
config: Optional[Config] = None,
|
|
56
|
+
stream_state: Optional[StreamState] = None,
|
|
57
|
+
stream_slice: Optional[StreamSlice] = None,
|
|
58
|
+
) -> None:
|
|
59
|
+
"""
|
|
60
|
+
:param record: The record to be transformed
|
|
61
|
+
:return: the input record with the requested fields removed
|
|
62
|
+
"""
|
|
63
|
+
for pointer in self.field_pointers:
|
|
64
|
+
# the dpath library by default doesn't delete fields from arrays
|
|
65
|
+
try:
|
|
66
|
+
dpath.delete(
|
|
67
|
+
record,
|
|
68
|
+
pointer,
|
|
69
|
+
afilter=(lambda x: self._filter_interpolator.eval(config or {}, property=x))
|
|
70
|
+
if self.condition
|
|
71
|
+
else None,
|
|
72
|
+
)
|
|
73
|
+
except dpath.exceptions.PathNotFound:
|
|
74
|
+
# if the (potentially nested) property does not exist, silently skip
|
|
75
|
+
pass
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from abc import abstractmethod
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class RecordTransformation:
|
|
14
|
+
"""
|
|
15
|
+
Implementations of this class define transformations that can be applied to records of a stream.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def transform(
|
|
20
|
+
self,
|
|
21
|
+
record: Dict[str, Any],
|
|
22
|
+
config: Optional[Config] = None,
|
|
23
|
+
stream_state: Optional[StreamState] = None,
|
|
24
|
+
stream_slice: Optional[StreamSlice] = None,
|
|
25
|
+
) -> None:
|
|
26
|
+
"""
|
|
27
|
+
Transform a record by adding, deleting, or mutating fields directly from the record reference passed in argument.
|
|
28
|
+
|
|
29
|
+
:param record: The input record to be transformed
|
|
30
|
+
:param config: The user-provided configuration as specified by the source's spec
|
|
31
|
+
:param stream_state: The stream state
|
|
32
|
+
:param stream_slice: The stream slice
|
|
33
|
+
:return: The transformed record
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __eq__(self, other: object) -> bool:
|
|
37
|
+
return other.__dict__ == self.__dict__
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from airbyte_cdk.sources.types import (
|
|
8
|
+
Config,
|
|
9
|
+
ConnectionDefinition,
|
|
10
|
+
FieldPointer,
|
|
11
|
+
Record,
|
|
12
|
+
StreamSlice,
|
|
13
|
+
StreamState,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
# Note: This package originally contained class definitions for low-code CDK types, but we promoted them into the Python CDK.
|
|
17
|
+
# We've migrated connectors in the repository to reference the new location, but these assignments are used to retain backwards
|
|
18
|
+
# compatibility for sources created by OSS customers or on forks. This can be removed when we start bumping major versions.
|
|
19
|
+
|
|
20
|
+
FieldPointer = FieldPointer
|
|
21
|
+
Config = Config
|
|
22
|
+
ConnectionDefinition = ConnectionDefinition
|
|
23
|
+
StreamState = StreamState
|
|
24
|
+
Record = Record
|
|
25
|
+
StreamSlice = StreamSlice
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import pkgutil
|
|
6
|
+
from typing import Any, List, Mapping, Optional
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
from airbyte_cdk.models import AirbyteStateMessage, ConfiguredAirbyteCatalog
|
|
11
|
+
from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
|
|
12
|
+
ConcurrentDeclarativeSource,
|
|
13
|
+
)
|
|
14
|
+
from airbyte_cdk.sources.types import ConnectionDefinition
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class YamlDeclarativeSource(ConcurrentDeclarativeSource[List[AirbyteStateMessage]]):
|
|
18
|
+
"""Declarative source defined by a yaml file"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
path_to_yaml: str,
|
|
23
|
+
debug: bool = False,
|
|
24
|
+
catalog: Optional[ConfiguredAirbyteCatalog] = None,
|
|
25
|
+
config: Optional[Mapping[str, Any]] = None,
|
|
26
|
+
state: Optional[List[AirbyteStateMessage]] = None,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""
|
|
29
|
+
:param path_to_yaml: Path to the yaml file describing the source
|
|
30
|
+
"""
|
|
31
|
+
self._path_to_yaml = path_to_yaml
|
|
32
|
+
source_config = self._read_and_parse_yaml_file(path_to_yaml)
|
|
33
|
+
|
|
34
|
+
super().__init__(
|
|
35
|
+
catalog=catalog or ConfiguredAirbyteCatalog(streams=[]),
|
|
36
|
+
config=config or {},
|
|
37
|
+
state=state or [],
|
|
38
|
+
source_config=source_config,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def _read_and_parse_yaml_file(self, path_to_yaml_file: str) -> ConnectionDefinition:
|
|
42
|
+
try:
|
|
43
|
+
# For testing purposes, we want to allow to just pass a file
|
|
44
|
+
with open(path_to_yaml_file, "r") as f:
|
|
45
|
+
return yaml.safe_load(f) # type: ignore # we assume the yaml represents a ConnectionDefinition
|
|
46
|
+
except FileNotFoundError:
|
|
47
|
+
# Running inside the container, the working directory during an operation is not structured the same as the static files
|
|
48
|
+
package = self.__class__.__module__.split(".")[0]
|
|
49
|
+
|
|
50
|
+
yaml_config = pkgutil.get_data(package, path_to_yaml_file)
|
|
51
|
+
if yaml_config:
|
|
52
|
+
decoded_yaml = yaml_config.decode()
|
|
53
|
+
return self._parse(decoded_yaml)
|
|
54
|
+
return {}
|
|
55
|
+
|
|
56
|
+
def _emit_manifest_debug_message(self, extra_args: dict[str, Any]) -> None:
|
|
57
|
+
extra_args["path_to_yaml"] = self._path_to_yaml
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def _parse(connection_definition_str: str) -> ConnectionDefinition:
|
|
61
|
+
"""
|
|
62
|
+
Parses a yaml file into a manifest. Component references still exist in the manifest which will be
|
|
63
|
+
resolved during the creating of the DeclarativeSource.
|
|
64
|
+
:param connection_definition_str: yaml string to parse
|
|
65
|
+
:return: The ConnectionDefinition parsed from connection_definition_str
|
|
66
|
+
"""
|
|
67
|
+
return yaml.safe_load(connection_definition_str) # type: ignore # yaml.safe_load doesn't return a type but know it is a Mapping
|