airbyte-cdk 6.20.2.dev0__py3-none-any.whl → 6.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/auth/oauth.py +34 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +18 -2
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +51 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +16 -80
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +123 -21
- airbyte_cdk/sources/declarative/decoders/__init__.py +9 -1
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +43 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +59 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -15
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +112 -27
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +127 -106
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +33 -4
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +13 -3
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +11 -0
- airbyte_cdk/sources/file_based/exceptions.py +34 -0
- airbyte_cdk/sources/file_based/file_based_source.py +28 -5
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +18 -4
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +25 -2
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +21 -30
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +33 -4
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +42 -4
- airbyte_cdk/sources/types.py +3 -0
- airbyte_cdk/sources/utils/transform.py +29 -3
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/RECORD +35 -33
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -331
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/entry_points.txt +0 -0
| @@ -111,6 +111,40 @@ class ErrorListingFiles(BaseFileBasedSourceError): | |
| 111 111 | 
             
                pass
         | 
| 112 112 |  | 
| 113 113 |  | 
| 114 | 
            +
            class DuplicatedFilesError(BaseFileBasedSourceError):
         | 
| 115 | 
            +
                def __init__(self, duplicated_files_names: List[dict[str, List[str]]], **kwargs: Any):
         | 
| 116 | 
            +
                    self._duplicated_files_names = duplicated_files_names
         | 
| 117 | 
            +
                    self._stream_name: str = kwargs["stream"]
         | 
| 118 | 
            +
                    super().__init__(self._format_duplicate_files_error_message(), **kwargs)
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                def _format_duplicate_files_error_message(self) -> str:
         | 
| 121 | 
            +
                    duplicated_files_messages = []
         | 
| 122 | 
            +
                    for duplicated_file in self._duplicated_files_names:
         | 
| 123 | 
            +
                        for duplicated_file_name, file_paths in duplicated_file.items():
         | 
| 124 | 
            +
                            file_duplicated_message = (
         | 
| 125 | 
            +
                                f"{len(file_paths)} duplicates found for file name {duplicated_file_name}:\n\n"
         | 
| 126 | 
            +
                                + "".join(f"\n - {file_paths}")
         | 
| 127 | 
            +
                            )
         | 
| 128 | 
            +
                            duplicated_files_messages.append(file_duplicated_message)
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                    error_message = (
         | 
| 131 | 
            +
                        f"ERROR: Duplicate filenames found for stream {self._stream_name}. "
         | 
| 132 | 
            +
                        "Duplicate file names are not allowed if the Preserve Sub-Directories in File Paths option is disabled. "
         | 
| 133 | 
            +
                        "Please remove or rename the duplicate files before attempting to re-run the sync.\n\n"
         | 
| 134 | 
            +
                        + "\n".join(duplicated_files_messages)
         | 
| 135 | 
            +
                    )
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                    return error_message
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                def __repr__(self) -> str:
         | 
| 140 | 
            +
                    """Return a string representation of the exception."""
         | 
| 141 | 
            +
                    class_name = self.__class__.__name__
         | 
| 142 | 
            +
                    properties_str = ", ".join(
         | 
| 143 | 
            +
                        f"{k}={v!r}" for k, v in self.__dict__.items() if not k.startswith("_")
         | 
| 144 | 
            +
                    )
         | 
| 145 | 
            +
                    return f"{class_name}({properties_str})"
         | 
| 146 | 
            +
             | 
| 147 | 
            +
             | 
| 114 148 | 
             
            class CustomFileBasedException(AirbyteTracedException):
         | 
| 115 149 | 
             
                """
         | 
| 116 150 | 
             
                A specialized exception for file-based connectors.
         | 
| @@ -242,7 +242,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC): | |
| 242 242 | 
             
                                    stream=self._make_default_stream(
         | 
| 243 243 | 
             
                                        stream_config=stream_config,
         | 
| 244 244 | 
             
                                        cursor=cursor,
         | 
| 245 | 
            -
                                         | 
| 245 | 
            +
                                        parsed_config=parsed_config,
         | 
| 246 246 | 
             
                                    ),
         | 
| 247 247 | 
             
                                    source=self,
         | 
| 248 248 | 
             
                                    logger=self.logger,
         | 
| @@ -273,7 +273,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC): | |
| 273 273 | 
             
                                    stream=self._make_default_stream(
         | 
| 274 274 | 
             
                                        stream_config=stream_config,
         | 
| 275 275 | 
             
                                        cursor=cursor,
         | 
| 276 | 
            -
                                         | 
| 276 | 
            +
                                        parsed_config=parsed_config,
         | 
| 277 277 | 
             
                                    ),
         | 
| 278 278 | 
             
                                    source=self,
         | 
| 279 279 | 
             
                                    logger=self.logger,
         | 
| @@ -285,7 +285,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC): | |
| 285 285 | 
             
                                stream = self._make_default_stream(
         | 
| 286 286 | 
             
                                    stream_config=stream_config,
         | 
| 287 287 | 
             
                                    cursor=cursor,
         | 
| 288 | 
            -
                                     | 
| 288 | 
            +
                                    parsed_config=parsed_config,
         | 
| 289 289 | 
             
                                )
         | 
| 290 290 |  | 
| 291 291 | 
             
                            streams.append(stream)
         | 
| @@ -298,7 +298,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC): | |
| 298 298 | 
             
                    self,
         | 
| 299 299 | 
             
                    stream_config: FileBasedStreamConfig,
         | 
| 300 300 | 
             
                    cursor: Optional[AbstractFileBasedCursor],
         | 
| 301 | 
            -
                     | 
| 301 | 
            +
                    parsed_config: AbstractFileBasedSpec,
         | 
| 302 302 | 
             
                ) -> AbstractFileBasedStream:
         | 
| 303 303 | 
             
                    return DefaultFileBasedStream(
         | 
| 304 304 | 
             
                        config=stream_config,
         | 
| @@ -310,7 +310,8 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC): | |
| 310 310 | 
             
                        validation_policy=self._validate_and_get_validation_policy(stream_config),
         | 
| 311 311 | 
             
                        errors_collector=self.errors_collector,
         | 
| 312 312 | 
             
                        cursor=cursor,
         | 
| 313 | 
            -
                        use_file_transfer= | 
| 313 | 
            +
                        use_file_transfer=self._use_file_transfer(parsed_config),
         | 
| 314 | 
            +
                        preserve_directory_structure=self._preserve_directory_structure(parsed_config),
         | 
| 314 315 | 
             
                    )
         | 
| 315 316 |  | 
| 316 317 | 
             
                def _get_stream_from_catalog(
         | 
| @@ -385,3 +386,25 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC): | |
| 385 386 | 
             
                        and parsed_config.delivery_method.delivery_type == "use_file_transfer"
         | 
| 386 387 | 
             
                    )
         | 
| 387 388 | 
             
                    return use_file_transfer
         | 
| 389 | 
            +
             | 
| 390 | 
            +
                @staticmethod
         | 
| 391 | 
            +
                def _preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
         | 
| 392 | 
            +
                    """
         | 
| 393 | 
            +
                    Determines whether to preserve directory structure during file transfer.
         | 
| 394 | 
            +
             | 
| 395 | 
            +
                    When enabled, files maintain their subdirectory paths in the destination.
         | 
| 396 | 
            +
                    When disabled, files are flattened to the root of the destination.
         | 
| 397 | 
            +
             | 
| 398 | 
            +
                    Args:
         | 
| 399 | 
            +
                        parsed_config: The parsed configuration containing delivery method settings
         | 
| 400 | 
            +
             | 
| 401 | 
            +
                    Returns:
         | 
| 402 | 
            +
                        True if directory structure should be preserved (default), False otherwise
         | 
| 403 | 
            +
                    """
         | 
| 404 | 
            +
                    if (
         | 
| 405 | 
            +
                        FileBasedSource._use_file_transfer(parsed_config)
         | 
| 406 | 
            +
                        and hasattr(parsed_config.delivery_method, "preserve_directory_structure")
         | 
| 407 | 
            +
                        and parsed_config.delivery_method.preserve_directory_structure is not None
         | 
| 408 | 
            +
                    ):
         | 
| 409 | 
            +
                        return parsed_config.delivery_method.preserve_directory_structure
         | 
| 410 | 
            +
                    return True
         | 
| @@ -135,6 +135,17 @@ class AbstractFileBasedStreamReader(ABC): | |
| 135 135 | 
             
                        return use_file_transfer
         | 
| 136 136 | 
             
                    return False
         | 
| 137 137 |  | 
| 138 | 
            +
                def preserve_directory_structure(self) -> bool:
         | 
| 139 | 
            +
                    # fall back to preserve subdirectories if config is not present or incomplete
         | 
| 140 | 
            +
                    if (
         | 
| 141 | 
            +
                        self.use_file_transfer()
         | 
| 142 | 
            +
                        and self.config
         | 
| 143 | 
            +
                        and hasattr(self.config.delivery_method, "preserve_directory_structure")
         | 
| 144 | 
            +
                        and self.config.delivery_method.preserve_directory_structure is not None
         | 
| 145 | 
            +
                    ):
         | 
| 146 | 
            +
                        return self.config.delivery_method.preserve_directory_structure
         | 
| 147 | 
            +
                    return True
         | 
| 148 | 
            +
             | 
| 138 149 | 
             
                @abstractmethod
         | 
| 139 150 | 
             
                def get_file(
         | 
| 140 151 | 
             
                    self, file: RemoteFile, local_directory: str, logger: logging.Logger
         | 
| @@ -159,10 +170,13 @@ class AbstractFileBasedStreamReader(ABC): | |
| 159 170 | 
             
                    """
         | 
| 160 171 | 
             
                    ...
         | 
| 161 172 |  | 
| 162 | 
            -
                 | 
| 163 | 
            -
             | 
| 164 | 
            -
                     | 
| 165 | 
            -
             | 
| 173 | 
            +
                def _get_file_transfer_paths(self, file: RemoteFile, local_directory: str) -> List[str]:
         | 
| 174 | 
            +
                    preserve_directory_structure = self.preserve_directory_structure()
         | 
| 175 | 
            +
                    if preserve_directory_structure:
         | 
| 176 | 
            +
                        # Remove left slashes from source path format to make relative path for writing locally
         | 
| 177 | 
            +
                        file_relative_path = file.uri.lstrip("/")
         | 
| 178 | 
            +
                    else:
         | 
| 179 | 
            +
                        file_relative_path = path.basename(file.uri)
         | 
| 166 180 | 
             
                    local_file_path = path.join(local_directory, file_relative_path)
         | 
| 167 181 |  | 
| 168 182 | 
             
                    # Ensure the local directory exists
         | 
| @@ -2,6 +2,7 @@ | |
| 2 2 | 
             
            # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
         | 
| 3 3 | 
             
            #
         | 
| 4 4 | 
             
            import logging
         | 
| 5 | 
            +
            import os
         | 
| 5 6 | 
             
            import traceback
         | 
| 6 7 | 
             
            from datetime import datetime
         | 
| 7 8 | 
             
            from io import BytesIO, IOBase
         | 
| @@ -42,12 +43,34 @@ unstructured_partition_pdf = None | |
| 42 43 | 
             
            unstructured_partition_docx = None
         | 
| 43 44 | 
             
            unstructured_partition_pptx = None
         | 
| 44 45 |  | 
| 46 | 
            +
            AIRBYTE_NLTK_DATA_DIR = "/airbyte/nltk_data"
         | 
| 47 | 
            +
            TMP_NLTK_DATA_DIR = "/tmp/nltk_data"
         | 
| 48 | 
            +
             | 
| 49 | 
            +
             | 
| 50 | 
            +
            def get_nltk_temp_folder() -> str:
         | 
| 51 | 
            +
                """
         | 
| 52 | 
            +
                For non-root connectors /tmp is not currently writable, but we should allow it in the future.
         | 
| 53 | 
            +
                It's safe to use /airbyte for now. Fallback to /tmp for local development.
         | 
| 54 | 
            +
                """
         | 
| 55 | 
            +
                try:
         | 
| 56 | 
            +
                    nltk_data_dir = AIRBYTE_NLTK_DATA_DIR
         | 
| 57 | 
            +
                    os.makedirs(nltk_data_dir, exist_ok=True)
         | 
| 58 | 
            +
                except OSError:
         | 
| 59 | 
            +
                    nltk_data_dir = TMP_NLTK_DATA_DIR
         | 
| 60 | 
            +
                    os.makedirs(nltk_data_dir, exist_ok=True)
         | 
| 61 | 
            +
                return nltk_data_dir
         | 
| 62 | 
            +
             | 
| 63 | 
            +
             | 
| 45 64 | 
             
            try:
         | 
| 65 | 
            +
                nltk_data_dir = get_nltk_temp_folder()
         | 
| 66 | 
            +
                nltk.data.path.append(nltk_data_dir)
         | 
| 46 67 | 
             
                nltk.data.find("tokenizers/punkt.zip")
         | 
| 47 68 | 
             
                nltk.data.find("tokenizers/punkt_tab.zip")
         | 
| 69 | 
            +
                nltk.data.find("tokenizers/averaged_perceptron_tagger_eng.zip")
         | 
| 48 70 | 
             
            except LookupError:
         | 
| 49 | 
            -
                nltk.download("punkt")
         | 
| 50 | 
            -
                nltk.download("punkt_tab")
         | 
| 71 | 
            +
                nltk.download("punkt", download_dir=nltk_data_dir, quiet=True)
         | 
| 72 | 
            +
                nltk.download("punkt_tab", download_dir=nltk_data_dir, quiet=True)
         | 
| 73 | 
            +
                nltk.download("averaged_perceptron_tagger_eng", download_dir=nltk_data_dir, quiet=True)
         | 
| 51 74 |  | 
| 52 75 |  | 
| 53 76 | 
             
            def optional_decode(contents: Union[str, bytes]) -> str:
         | 
| @@ -5,14 +5,17 @@ | |
| 5 5 | 
             
            import asyncio
         | 
| 6 6 | 
             
            import itertools
         | 
| 7 7 | 
             
            import traceback
         | 
| 8 | 
            +
            from collections import defaultdict
         | 
| 8 9 | 
             
            from copy import deepcopy
         | 
| 9 10 | 
             
            from functools import cache
         | 
| 10 | 
            -
            from  | 
| 11 | 
            +
            from os import path
         | 
| 12 | 
            +
            from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Set, Tuple, Union
         | 
| 11 13 |  | 
| 12 14 | 
             
            from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level
         | 
| 13 15 | 
             
            from airbyte_cdk.models import Type as MessageType
         | 
| 14 16 | 
             
            from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType
         | 
| 15 17 | 
             
            from airbyte_cdk.sources.file_based.exceptions import (
         | 
| 18 | 
            +
                DuplicatedFilesError,
         | 
| 16 19 | 
             
                FileBasedSourceError,
         | 
| 17 20 | 
             
                InvalidSchemaError,
         | 
| 18 21 | 
             
                MissingSchemaError,
         | 
| @@ -43,6 +46,8 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin): | |
| 43 46 | 
             
                """
         | 
| 44 47 |  | 
| 45 48 | 
             
                FILE_TRANSFER_KW = "use_file_transfer"
         | 
| 49 | 
            +
                PRESERVE_DIRECTORY_STRUCTURE_KW = "preserve_directory_structure"
         | 
| 50 | 
            +
                FILES_KEY = "files"
         | 
| 46 51 | 
             
                DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
         | 
| 47 52 | 
             
                ab_last_mod_col = "_ab_source_file_last_modified"
         | 
| 48 53 | 
             
                ab_file_name_col = "_ab_source_file_url"
         | 
| @@ -50,10 +55,15 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin): | |
| 50 55 | 
             
                source_file_url = "source_file_url"
         | 
| 51 56 | 
             
                airbyte_columns = [ab_last_mod_col, ab_file_name_col]
         | 
| 52 57 | 
             
                use_file_transfer = False
         | 
| 58 | 
            +
                preserve_directory_structure = True
         | 
| 53 59 |  | 
| 54 60 | 
             
                def __init__(self, **kwargs: Any):
         | 
| 55 61 | 
             
                    if self.FILE_TRANSFER_KW in kwargs:
         | 
| 56 62 | 
             
                        self.use_file_transfer = kwargs.pop(self.FILE_TRANSFER_KW, False)
         | 
| 63 | 
            +
                    if self.PRESERVE_DIRECTORY_STRUCTURE_KW in kwargs:
         | 
| 64 | 
            +
                        self.preserve_directory_structure = kwargs.pop(
         | 
| 65 | 
            +
                            self.PRESERVE_DIRECTORY_STRUCTURE_KW, True
         | 
| 66 | 
            +
                        )
         | 
| 57 67 | 
             
                    super().__init__(**kwargs)
         | 
| 58 68 |  | 
| 59 69 | 
             
                @property
         | 
| @@ -98,15 +108,33 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin): | |
| 98 108 | 
             
                    else:
         | 
| 99 109 | 
             
                        return super()._filter_schema_invalid_properties(configured_catalog_json_schema)
         | 
| 100 110 |  | 
| 111 | 
            +
                def _duplicated_files_names(
         | 
| 112 | 
            +
                    self, slices: List[dict[str, List[RemoteFile]]]
         | 
| 113 | 
            +
                ) -> List[dict[str, List[str]]]:
         | 
| 114 | 
            +
                    seen_file_names: Dict[str, List[str]] = defaultdict(list)
         | 
| 115 | 
            +
                    for file_slice in slices:
         | 
| 116 | 
            +
                        for file_found in file_slice[self.FILES_KEY]:
         | 
| 117 | 
            +
                            file_name = path.basename(file_found.uri)
         | 
| 118 | 
            +
                            seen_file_names[file_name].append(file_found.uri)
         | 
| 119 | 
            +
                    return [
         | 
| 120 | 
            +
                        {file_name: paths} for file_name, paths in seen_file_names.items() if len(paths) > 1
         | 
| 121 | 
            +
                    ]
         | 
| 122 | 
            +
             | 
| 101 123 | 
             
                def compute_slices(self) -> Iterable[Optional[Mapping[str, Any]]]:
         | 
| 102 124 | 
             
                    # Sort files by last_modified, uri and return them grouped by last_modified
         | 
| 103 125 | 
             
                    all_files = self.list_files()
         | 
| 104 126 | 
             
                    files_to_read = self._cursor.get_files_to_sync(all_files, self.logger)
         | 
| 105 127 | 
             
                    sorted_files_to_read = sorted(files_to_read, key=lambda f: (f.last_modified, f.uri))
         | 
| 106 128 | 
             
                    slices = [
         | 
| 107 | 
            -
                        { | 
| 129 | 
            +
                        {self.FILES_KEY: list(group[1])}
         | 
| 108 130 | 
             
                        for group in itertools.groupby(sorted_files_to_read, lambda f: f.last_modified)
         | 
| 109 131 | 
             
                    ]
         | 
| 132 | 
            +
                    if slices and not self.preserve_directory_structure:
         | 
| 133 | 
            +
                        duplicated_files_names = self._duplicated_files_names(slices)
         | 
| 134 | 
            +
                        if duplicated_files_names:
         | 
| 135 | 
            +
                            raise DuplicatedFilesError(
         | 
| 136 | 
            +
                                stream=self.name, duplicated_files_names=duplicated_files_names
         | 
| 137 | 
            +
                            )
         | 
| 110 138 | 
             
                    return slices
         | 
| 111 139 |  | 
| 112 140 | 
             
                def transform_record(
         | 
| @@ -196,9 +196,7 @@ class ConcurrentCursor(Cursor): | |
| 196 196 |  | 
| 197 197 | 
             
                @property
         | 
| 198 198 | 
             
                def state(self) -> MutableMapping[str, Any]:
         | 
| 199 | 
            -
                    return self. | 
| 200 | 
            -
                        self.cursor_field, self._concurrent_state
         | 
| 201 | 
            -
                    )
         | 
| 199 | 
            +
                    return self._concurrent_state
         | 
| 202 200 |  | 
| 203 201 | 
             
                @property
         | 
| 204 202 | 
             
                def cursor_field(self) -> CursorField:
         | 
| @@ -243,10 +241,10 @@ class ConcurrentCursor(Cursor): | |
| 243 241 | 
             
                    return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
         | 
| 244 242 |  | 
| 245 243 | 
             
                def close_partition(self, partition: Partition) -> None:
         | 
| 246 | 
            -
                    slice_count_before = len(self. | 
| 244 | 
            +
                    slice_count_before = len(self.state.get("slices", []))
         | 
| 247 245 | 
             
                    self._add_slice_to_state(partition)
         | 
| 248 246 | 
             
                    if slice_count_before < len(
         | 
| 249 | 
            -
                        self. | 
| 247 | 
            +
                        self.state["slices"]
         | 
| 250 248 | 
             
                    ):  # only emit if at least one slice has been processed
         | 
| 251 249 | 
             
                        self._merge_partitions()
         | 
| 252 250 | 
             
                        self._emit_state_message()
         | 
| @@ -258,11 +256,11 @@ class ConcurrentCursor(Cursor): | |
| 258 256 | 
             
                    )
         | 
| 259 257 |  | 
| 260 258 | 
             
                    if self._slice_boundary_fields:
         | 
| 261 | 
            -
                        if "slices" not in self. | 
| 259 | 
            +
                        if "slices" not in self.state:
         | 
| 262 260 | 
             
                            raise RuntimeError(
         | 
| 263 261 | 
             
                                f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
         | 
| 264 262 | 
             
                            )
         | 
| 265 | 
            -
                        self. | 
| 263 | 
            +
                        self.state["slices"].append(
         | 
| 266 264 | 
             
                            {
         | 
| 267 265 | 
             
                                self._connector_state_converter.START_KEY: self._extract_from_slice(
         | 
| 268 266 | 
             
                                    partition, self._slice_boundary_fields[self._START_BOUNDARY]
         | 
| @@ -290,7 +288,7 @@ class ConcurrentCursor(Cursor): | |
| 290 288 | 
             
                                "expected. Please contact the Airbyte team."
         | 
| 291 289 | 
             
                            )
         | 
| 292 290 |  | 
| 293 | 
            -
                        self. | 
| 291 | 
            +
                        self.state["slices"].append(
         | 
| 294 292 | 
             
                            {
         | 
| 295 293 | 
             
                                self._connector_state_converter.START_KEY: self.start,
         | 
| 296 294 | 
             
                                self._connector_state_converter.END_KEY: most_recent_cursor_value,
         | 
| @@ -302,7 +300,9 @@ class ConcurrentCursor(Cursor): | |
| 302 300 | 
             
                    self._connector_state_manager.update_state_for_stream(
         | 
| 303 301 | 
             
                        self._stream_name,
         | 
| 304 302 | 
             
                        self._stream_namespace,
         | 
| 305 | 
            -
                        self. | 
| 303 | 
            +
                        self._connector_state_converter.convert_to_state_message(
         | 
| 304 | 
            +
                            self._cursor_field, self.state
         | 
| 305 | 
            +
                        ),
         | 
| 306 306 | 
             
                    )
         | 
| 307 307 | 
             
                    state_message = self._connector_state_manager.create_state_message(
         | 
| 308 308 | 
             
                        self._stream_name, self._stream_namespace
         | 
| @@ -310,9 +310,7 @@ class ConcurrentCursor(Cursor): | |
| 310 310 | 
             
                    self._message_repository.emit_message(state_message)
         | 
| 311 311 |  | 
| 312 312 | 
             
                def _merge_partitions(self) -> None:
         | 
| 313 | 
            -
                    self. | 
| 314 | 
            -
                        self._concurrent_state["slices"]
         | 
| 315 | 
            -
                    )
         | 
| 313 | 
            +
                    self.state["slices"] = self._connector_state_converter.merge_intervals(self.state["slices"])
         | 
| 316 314 |  | 
| 317 315 | 
             
                def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
         | 
| 318 316 | 
             
                    try:
         | 
| @@ -349,42 +347,36 @@ class ConcurrentCursor(Cursor): | |
| 349 347 | 
             
                    if self._start is not None and self._is_start_before_first_slice():
         | 
| 350 348 | 
             
                        yield from self._split_per_slice_range(
         | 
| 351 349 | 
             
                            self._start,
         | 
| 352 | 
            -
                            self. | 
| 350 | 
            +
                            self.state["slices"][0][self._connector_state_converter.START_KEY],
         | 
| 353 351 | 
             
                            False,
         | 
| 354 352 | 
             
                        )
         | 
| 355 353 |  | 
| 356 | 
            -
                    if len(self. | 
| 354 | 
            +
                    if len(self.state["slices"]) == 1:
         | 
| 357 355 | 
             
                        yield from self._split_per_slice_range(
         | 
| 358 356 | 
             
                            self._calculate_lower_boundary_of_last_slice(
         | 
| 359 | 
            -
                                self. | 
| 357 | 
            +
                                self.state["slices"][0][self._connector_state_converter.END_KEY]
         | 
| 360 358 | 
             
                            ),
         | 
| 361 359 | 
             
                            self._end_provider(),
         | 
| 362 360 | 
             
                            True,
         | 
| 363 361 | 
             
                        )
         | 
| 364 | 
            -
                    elif len(self. | 
| 365 | 
            -
                        for i in range(len(self. | 
| 362 | 
            +
                    elif len(self.state["slices"]) > 1:
         | 
| 363 | 
            +
                        for i in range(len(self.state["slices"]) - 1):
         | 
| 366 364 | 
             
                            if self._cursor_granularity:
         | 
| 367 365 | 
             
                                yield from self._split_per_slice_range(
         | 
| 368 | 
            -
                                    self. | 
| 366 | 
            +
                                    self.state["slices"][i][self._connector_state_converter.END_KEY]
         | 
| 369 367 | 
             
                                    + self._cursor_granularity,
         | 
| 370 | 
            -
                                    self. | 
| 371 | 
            -
                                        self._connector_state_converter.START_KEY
         | 
| 372 | 
            -
                                    ],
         | 
| 368 | 
            +
                                    self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
         | 
| 373 369 | 
             
                                    False,
         | 
| 374 370 | 
             
                                )
         | 
| 375 371 | 
             
                            else:
         | 
| 376 372 | 
             
                                yield from self._split_per_slice_range(
         | 
| 377 | 
            -
                                    self. | 
| 378 | 
            -
             | 
| 379 | 
            -
                                    ],
         | 
| 380 | 
            -
                                    self._concurrent_state["slices"][i + 1][
         | 
| 381 | 
            -
                                        self._connector_state_converter.START_KEY
         | 
| 382 | 
            -
                                    ],
         | 
| 373 | 
            +
                                    self.state["slices"][i][self._connector_state_converter.END_KEY],
         | 
| 374 | 
            +
                                    self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
         | 
| 383 375 | 
             
                                    False,
         | 
| 384 376 | 
             
                                )
         | 
| 385 377 | 
             
                        yield from self._split_per_slice_range(
         | 
| 386 378 | 
             
                            self._calculate_lower_boundary_of_last_slice(
         | 
| 387 | 
            -
                                self. | 
| 379 | 
            +
                                self.state["slices"][-1][self._connector_state_converter.END_KEY]
         | 
| 388 380 | 
             
                            ),
         | 
| 389 381 | 
             
                            self._end_provider(),
         | 
| 390 382 | 
             
                            True,
         | 
| @@ -395,8 +387,7 @@ class ConcurrentCursor(Cursor): | |
| 395 387 | 
             
                def _is_start_before_first_slice(self) -> bool:
         | 
| 396 388 | 
             
                    return (
         | 
| 397 389 | 
             
                        self._start is not None
         | 
| 398 | 
            -
                        and self._start
         | 
| 399 | 
            -
                        < self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
         | 
| 390 | 
            +
                        and self._start < self.state["slices"][0][self._connector_state_converter.START_KEY]
         | 
| 400 391 | 
             
                    )
         | 
| 401 392 |  | 
| 402 393 | 
             
                def _calculate_lower_boundary_of_last_slice(
         | 
| @@ -81,10 +81,10 @@ class AbstractOauth2Authenticator(AuthBase): | |
| 81 81 | 
             
                    Override to define additional parameters
         | 
| 82 82 | 
             
                    """
         | 
| 83 83 | 
             
                    payload: MutableMapping[str, Any] = {
         | 
| 84 | 
            -
                         | 
| 85 | 
            -
                         | 
| 86 | 
            -
                         | 
| 87 | 
            -
                         | 
| 84 | 
            +
                        self.get_grant_type_name(): self.get_grant_type(),
         | 
| 85 | 
            +
                        self.get_client_id_name(): self.get_client_id(),
         | 
| 86 | 
            +
                        self.get_client_secret_name(): self.get_client_secret(),
         | 
| 87 | 
            +
                        self.get_refresh_token_name(): self.get_refresh_token(),
         | 
| 88 88 | 
             
                    }
         | 
| 89 89 |  | 
| 90 90 | 
             
                    if self.get_scopes():
         | 
| @@ -98,6 +98,14 @@ class AbstractOauth2Authenticator(AuthBase): | |
| 98 98 |  | 
| 99 99 | 
             
                    return payload
         | 
| 100 100 |  | 
| 101 | 
            +
                def build_refresh_request_headers(self) -> Mapping[str, Any] | None:
         | 
| 102 | 
            +
                    """
         | 
| 103 | 
            +
                    Returns the request headers to set on the refresh request
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                    """
         | 
| 106 | 
            +
                    headers = self.get_refresh_request_headers()
         | 
| 107 | 
            +
                    return headers if headers else None
         | 
| 108 | 
            +
             | 
| 101 109 | 
             
                def _wrap_refresh_token_exception(
         | 
| 102 110 | 
             
                    self, exception: requests.exceptions.RequestException
         | 
| 103 111 | 
             
                ) -> bool:
         | 
| @@ -128,6 +136,7 @@ class AbstractOauth2Authenticator(AuthBase): | |
| 128 136 | 
             
                            method="POST",
         | 
| 129 137 | 
             
                            url=self.get_token_refresh_endpoint(),  # type: ignore # returns None, if not provided, but str | bytes is expected.
         | 
| 130 138 | 
             
                            data=self.build_refresh_request_body(),
         | 
| 139 | 
            +
                            headers=self.build_refresh_request_headers(),
         | 
| 131 140 | 
             
                        )
         | 
| 132 141 | 
             
                        if response.ok:
         | 
| 133 142 | 
             
                            response_json = response.json()
         | 
| @@ -206,14 +215,26 @@ class AbstractOauth2Authenticator(AuthBase): | |
| 206 215 | 
             
                def get_token_refresh_endpoint(self) -> Optional[str]:
         | 
| 207 216 | 
             
                    """Returns the endpoint to refresh the access token"""
         | 
| 208 217 |  | 
| 218 | 
            +
                @abstractmethod
         | 
| 219 | 
            +
                def get_client_id_name(self) -> str:
         | 
| 220 | 
            +
                    """The client id name to authenticate"""
         | 
| 221 | 
            +
             | 
| 209 222 | 
             
                @abstractmethod
         | 
| 210 223 | 
             
                def get_client_id(self) -> str:
         | 
| 211 224 | 
             
                    """The client id to authenticate"""
         | 
| 212 225 |  | 
| 226 | 
            +
                @abstractmethod
         | 
| 227 | 
            +
                def get_client_secret_name(self) -> str:
         | 
| 228 | 
            +
                    """The client secret name to authenticate"""
         | 
| 229 | 
            +
             | 
| 213 230 | 
             
                @abstractmethod
         | 
| 214 231 | 
             
                def get_client_secret(self) -> str:
         | 
| 215 232 | 
             
                    """The client secret to authenticate"""
         | 
| 216 233 |  | 
| 234 | 
            +
                @abstractmethod
         | 
| 235 | 
            +
                def get_refresh_token_name(self) -> str:
         | 
| 236 | 
            +
                    """The refresh token name to authenticate"""
         | 
| 237 | 
            +
             | 
| 217 238 | 
             
                @abstractmethod
         | 
| 218 239 | 
             
                def get_refresh_token(self) -> Optional[str]:
         | 
| 219 240 | 
             
                    """The token used to refresh the access token when it expires"""
         | 
| @@ -242,10 +263,18 @@ class AbstractOauth2Authenticator(AuthBase): | |
| 242 263 | 
             
                def get_refresh_request_body(self) -> Mapping[str, Any]:
         | 
| 243 264 | 
             
                    """Returns the request body to set on the refresh request"""
         | 
| 244 265 |  | 
| 266 | 
            +
                @abstractmethod
         | 
| 267 | 
            +
                def get_refresh_request_headers(self) -> Mapping[str, Any]:
         | 
| 268 | 
            +
                    """Returns the request headers to set on the refresh request"""
         | 
| 269 | 
            +
             | 
| 245 270 | 
             
                @abstractmethod
         | 
| 246 271 | 
             
                def get_grant_type(self) -> str:
         | 
| 247 272 | 
             
                    """Returns grant_type specified for requesting access_token"""
         | 
| 248 273 |  | 
| 274 | 
            +
                @abstractmethod
         | 
| 275 | 
            +
                def get_grant_type_name(self) -> str:
         | 
| 276 | 
            +
                    """Returns grant_type specified name for requesting access_token"""
         | 
| 277 | 
            +
             | 
| 249 278 | 
             
                @property
         | 
| 250 279 | 
             
                @abstractmethod
         | 
| 251 280 | 
             
                def access_token(self) -> str:
         | 
| @@ -30,12 +30,17 @@ class Oauth2Authenticator(AbstractOauth2Authenticator): | |
| 30 30 | 
             
                    client_id: str,
         | 
| 31 31 | 
             
                    client_secret: str,
         | 
| 32 32 | 
             
                    refresh_token: str,
         | 
| 33 | 
            +
                    client_id_name: str = "client_id",
         | 
| 34 | 
            +
                    client_secret_name: str = "client_secret",
         | 
| 35 | 
            +
                    refresh_token_name: str = "refresh_token",
         | 
| 33 36 | 
             
                    scopes: List[str] | None = None,
         | 
| 34 37 | 
             
                    token_expiry_date: pendulum.DateTime | None = None,
         | 
| 35 38 | 
             
                    token_expiry_date_format: str | None = None,
         | 
| 36 39 | 
             
                    access_token_name: str = "access_token",
         | 
| 37 40 | 
             
                    expires_in_name: str = "expires_in",
         | 
| 38 41 | 
             
                    refresh_request_body: Mapping[str, Any] | None = None,
         | 
| 42 | 
            +
                    refresh_request_headers: Mapping[str, Any] | None = None,
         | 
| 43 | 
            +
                    grant_type_name: str = "grant_type",
         | 
| 39 44 | 
             
                    grant_type: str = "refresh_token",
         | 
| 40 45 | 
             
                    token_expiry_is_time_of_expiration: bool = False,
         | 
| 41 46 | 
             
                    refresh_token_error_status_codes: Tuple[int, ...] = (),
         | 
| @@ -43,13 +48,18 @@ class Oauth2Authenticator(AbstractOauth2Authenticator): | |
| 43 48 | 
             
                    refresh_token_error_values: Tuple[str, ...] = (),
         | 
| 44 49 | 
             
                ):
         | 
| 45 50 | 
             
                    self._token_refresh_endpoint = token_refresh_endpoint
         | 
| 51 | 
            +
                    self._client_secret_name = client_secret_name
         | 
| 46 52 | 
             
                    self._client_secret = client_secret
         | 
| 53 | 
            +
                    self._client_id_name = client_id_name
         | 
| 47 54 | 
             
                    self._client_id = client_id
         | 
| 55 | 
            +
                    self._refresh_token_name = refresh_token_name
         | 
| 48 56 | 
             
                    self._refresh_token = refresh_token
         | 
| 49 57 | 
             
                    self._scopes = scopes
         | 
| 50 58 | 
             
                    self._access_token_name = access_token_name
         | 
| 51 59 | 
             
                    self._expires_in_name = expires_in_name
         | 
| 52 60 | 
             
                    self._refresh_request_body = refresh_request_body
         | 
| 61 | 
            +
                    self._refresh_request_headers = refresh_request_headers
         | 
| 62 | 
            +
                    self._grant_type_name = grant_type_name
         | 
| 53 63 | 
             
                    self._grant_type = grant_type
         | 
| 54 64 |  | 
| 55 65 | 
             
                    self._token_expiry_date = token_expiry_date or pendulum.now().subtract(days=1)  # type: ignore [no-untyped-call]
         | 
| @@ -63,12 +73,21 @@ class Oauth2Authenticator(AbstractOauth2Authenticator): | |
| 63 73 | 
             
                def get_token_refresh_endpoint(self) -> str:
         | 
| 64 74 | 
             
                    return self._token_refresh_endpoint
         | 
| 65 75 |  | 
| 76 | 
            +
                def get_client_id_name(self) -> str:
         | 
| 77 | 
            +
                    return self._client_id_name
         | 
| 78 | 
            +
             | 
| 66 79 | 
             
                def get_client_id(self) -> str:
         | 
| 67 80 | 
             
                    return self._client_id
         | 
| 68 81 |  | 
| 82 | 
            +
                def get_client_secret_name(self) -> str:
         | 
| 83 | 
            +
                    return self._client_secret_name
         | 
| 84 | 
            +
             | 
| 69 85 | 
             
                def get_client_secret(self) -> str:
         | 
| 70 86 | 
             
                    return self._client_secret
         | 
| 71 87 |  | 
| 88 | 
            +
                def get_refresh_token_name(self) -> str:
         | 
| 89 | 
            +
                    return self._refresh_token_name
         | 
| 90 | 
            +
             | 
| 72 91 | 
             
                def get_refresh_token(self) -> str:
         | 
| 73 92 | 
             
                    return self._refresh_token
         | 
| 74 93 |  | 
| @@ -84,6 +103,12 @@ class Oauth2Authenticator(AbstractOauth2Authenticator): | |
| 84 103 | 
             
                def get_refresh_request_body(self) -> Mapping[str, Any]:
         | 
| 85 104 | 
             
                    return self._refresh_request_body  # type: ignore [return-value]
         | 
| 86 105 |  | 
| 106 | 
            +
                def get_refresh_request_headers(self) -> Mapping[str, Any]:
         | 
| 107 | 
            +
                    return self._refresh_request_headers  # type: ignore [return-value]
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                def get_grant_type_name(self) -> str:
         | 
| 110 | 
            +
                    return self._grant_type_name
         | 
| 111 | 
            +
             | 
| 87 112 | 
             
                def get_grant_type(self) -> str:
         | 
| 88 113 | 
             
                    return self._grant_type
         | 
| 89 114 |  | 
| @@ -129,8 +154,12 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator): | |
| 129 154 | 
             
                    expires_in_name: str = "expires_in",
         | 
| 130 155 | 
             
                    refresh_token_name: str = "refresh_token",
         | 
| 131 156 | 
             
                    refresh_request_body: Mapping[str, Any] | None = None,
         | 
| 157 | 
            +
                    refresh_request_headers: Mapping[str, Any] | None = None,
         | 
| 158 | 
            +
                    grant_type_name: str = "grant_type",
         | 
| 132 159 | 
             
                    grant_type: str = "refresh_token",
         | 
| 160 | 
            +
                    client_id_name: str = "client_id",
         | 
| 133 161 | 
             
                    client_id: Optional[str] = None,
         | 
| 162 | 
            +
                    client_secret_name: str = "client_secret",
         | 
| 134 163 | 
             
                    client_secret: Optional[str] = None,
         | 
| 135 164 | 
             
                    access_token_config_path: Sequence[str] = ("credentials", "access_token"),
         | 
| 136 165 | 
             
                    refresh_token_config_path: Sequence[str] = ("credentials", "refresh_token"),
         | 
| @@ -151,6 +180,7 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator): | |
| 151 180 | 
             
                        expires_in_name (str, optional): Name of the name of the field that characterizes when the current access token will expire, used to parse the refresh token response. Defaults to "expires_in".
         | 
| 152 181 | 
             
                        refresh_token_name (str, optional): Name of the name of the refresh token field, used to parse the refresh token response. Defaults to "refresh_token".
         | 
| 153 182 | 
             
                        refresh_request_body (Mapping[str, Any], optional): Custom key value pair that will be added to the refresh token request body. Defaults to None.
         | 
| 183 | 
            +
                        refresh_request_headers (Mapping[str, Any], optional): Custom key value pair that will be added to the refresh token request headers. Defaults to None.
         | 
| 154 184 | 
             
                        grant_type (str, optional): OAuth grant type. Defaults to "refresh_token".
         | 
| 155 185 | 
             
                        client_id (Optional[str]): The client id to authenticate. If not specified, defaults to credentials.client_id in the config object.
         | 
| 156 186 | 
             
                        client_secret (Optional[str]): The client secret to authenticate. If not specified, defaults to credentials.client_secret in the config object.
         | 
| @@ -174,23 +204,31 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator): | |
| 174 204 | 
             
                            ("credentials", "client_secret"),
         | 
| 175 205 | 
             
                        )
         | 
| 176 206 | 
             
                    )
         | 
| 207 | 
            +
                    self._client_id_name = client_id_name
         | 
| 208 | 
            +
                    self._client_secret_name = client_secret_name
         | 
| 177 209 | 
             
                    self._access_token_config_path = access_token_config_path
         | 
| 178 210 | 
             
                    self._refresh_token_config_path = refresh_token_config_path
         | 
| 179 211 | 
             
                    self._token_expiry_date_config_path = token_expiry_date_config_path
         | 
| 180 212 | 
             
                    self._token_expiry_date_format = token_expiry_date_format
         | 
| 181 213 | 
             
                    self._refresh_token_name = refresh_token_name
         | 
| 214 | 
            +
                    self._grant_type_name = grant_type_name
         | 
| 182 215 | 
             
                    self._connector_config = connector_config
         | 
| 183 216 | 
             
                    self.__message_repository = message_repository
         | 
| 184 217 | 
             
                    super().__init__(
         | 
| 185 | 
            -
                        token_refresh_endpoint,
         | 
| 186 | 
            -
                        self. | 
| 187 | 
            -
                        self. | 
| 188 | 
            -
                        self. | 
| 218 | 
            +
                        token_refresh_endpoint=token_refresh_endpoint,
         | 
| 219 | 
            +
                        client_id_name=self._client_id_name,
         | 
| 220 | 
            +
                        client_id=self.get_client_id(),
         | 
| 221 | 
            +
                        client_secret_name=self._client_secret_name,
         | 
| 222 | 
            +
                        client_secret=self.get_client_secret(),
         | 
| 223 | 
            +
                        refresh_token=self.get_refresh_token(),
         | 
| 224 | 
            +
                        refresh_token_name=self._refresh_token_name,
         | 
| 189 225 | 
             
                        scopes=scopes,
         | 
| 190 226 | 
             
                        token_expiry_date=self.get_token_expiry_date(),
         | 
| 191 227 | 
             
                        access_token_name=access_token_name,
         | 
| 192 228 | 
             
                        expires_in_name=expires_in_name,
         | 
| 193 229 | 
             
                        refresh_request_body=refresh_request_body,
         | 
| 230 | 
            +
                        refresh_request_headers=refresh_request_headers,
         | 
| 231 | 
            +
                        grant_type_name=self._grant_type_name,
         | 
| 194 232 | 
             
                        grant_type=grant_type,
         | 
| 195 233 | 
             
                        token_expiry_date_format=token_expiry_date_format,
         | 
| 196 234 | 
             
                        token_expiry_is_time_of_expiration=token_expiry_is_time_of_expiration,
         | 
    
        airbyte_cdk/sources/types.py
    CHANGED
    
    
| @@ -9,6 +9,7 @@ from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast | |
| 9 9 |  | 
| 10 10 | 
             
            from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators
         | 
| 11 11 |  | 
| 12 | 
            +
            MAX_NESTING_DEPTH = 3
         | 
| 12 13 | 
             
            json_to_python_simple = {
         | 
| 13 14 | 
             
                "string": str,
         | 
| 14 15 | 
             
                "number": float,
         | 
| @@ -225,6 +226,31 @@ class TypeTransformer: | |
| 225 226 | 
             
                        logger.warning(self.get_error_message(e))
         | 
| 226 227 |  | 
| 227 228 | 
             
                def get_error_message(self, e: ValidationError) -> str:
         | 
| 228 | 
            -
                     | 
| 229 | 
            -
                     | 
| 230 | 
            -
                     | 
| 229 | 
            +
                    """
         | 
| 230 | 
            +
                    Construct a sanitized error message from a ValidationError instance.
         | 
| 231 | 
            +
                    """
         | 
| 232 | 
            +
                    field_path = ".".join(map(str, e.path))
         | 
| 233 | 
            +
                    type_structure = self._get_type_structure(e.instance)
         | 
| 234 | 
            +
             | 
| 235 | 
            +
                    return f"Failed to transform value from type '{type_structure}' to type '{e.validator_value}' at path: '{field_path}'"
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                def _get_type_structure(self, input_data: Any, current_depth: int = 0) -> Any:
         | 
| 238 | 
            +
                    """
         | 
| 239 | 
            +
                    Get the structure of a given input data for use in error message construction.
         | 
| 240 | 
            +
                    """
         | 
| 241 | 
            +
                    # Handle null values
         | 
| 242 | 
            +
                    if input_data is None:
         | 
| 243 | 
            +
                        return "null"
         | 
| 244 | 
            +
             | 
| 245 | 
            +
                    # Avoid recursing too deep
         | 
| 246 | 
            +
                    if current_depth >= MAX_NESTING_DEPTH:
         | 
| 247 | 
            +
                        return "object" if isinstance(input_data, dict) else python_to_json[type(input_data)]
         | 
| 248 | 
            +
             | 
| 249 | 
            +
                    if isinstance(input_data, dict):
         | 
| 250 | 
            +
                        return {
         | 
| 251 | 
            +
                            key: self._get_type_structure(field_value, current_depth + 1)
         | 
| 252 | 
            +
                            for key, field_value in input_data.items()
         | 
| 253 | 
            +
                        }
         | 
| 254 | 
            +
             | 
| 255 | 
            +
                    else:
         | 
| 256 | 
            +
                        return python_to_json[type(input_data)]
         |