airbyte-cdk 6.7.0rc1__py3-none-any.whl → 6.7.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-cdk might be problematic. Click here for more details.
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +33 -24
 - airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +69 -19
 - {airbyte_cdk-6.7.0rc1.dist-info → airbyte_cdk-6.7.0rc2.dist-info}/METADATA +3 -4
 - {airbyte_cdk-6.7.0rc1.dist-info → airbyte_cdk-6.7.0rc2.dist-info}/RECORD +6 -6
 - {airbyte_cdk-6.7.0rc1.dist-info → airbyte_cdk-6.7.0rc2.dist-info}/LICENSE.txt +0 -0
 - {airbyte_cdk-6.7.0rc1.dist-info → airbyte_cdk-6.7.0rc2.dist-info}/WHEEL +0 -0
 
| 
         @@ -5,10 +5,10 @@ 
     | 
|
| 
       5 
5 
     | 
    
         
             
            import json
         
     | 
| 
       6 
6 
     | 
    
         
             
            import logging
         
     | 
| 
       7 
7 
     | 
    
         
             
            import pkgutil
         
     | 
| 
       8 
     | 
    
         
            -
            import re
         
     | 
| 
       9 
8 
     | 
    
         
             
            from copy import deepcopy
         
     | 
| 
       10 
9 
     | 
    
         
             
            from importlib import metadata
         
     | 
| 
       11 
     | 
    
         
            -
            from typing import Any, Dict, Iterator, List, Mapping, Optional 
     | 
| 
      
 10 
     | 
    
         
            +
            from typing import Any, Dict, Iterator, List, Mapping, Optional
         
     | 
| 
      
 11 
     | 
    
         
            +
            from packaging.version import Version, InvalidVersion
         
     | 
| 
       12 
12 
     | 
    
         | 
| 
       13 
13 
     | 
    
         
             
            import yaml
         
     | 
| 
       14 
14 
     | 
    
         
             
            from airbyte_cdk.models import (
         
     | 
| 
         @@ -245,45 +245,54 @@ class ManifestDeclarativeSource(DeclarativeSource): 
     | 
|
| 
       245 
245 
     | 
    
         
             
                            "Validation against json schema defined in declarative_component_schema.yaml schema failed"
         
     | 
| 
       246 
246 
     | 
    
         
             
                        ) from e
         
     | 
| 
       247 
247 
     | 
    
         | 
| 
       248 
     | 
    
         
            -
                     
     | 
| 
       249 
     | 
    
         
            -
                     
     | 
| 
       250 
     | 
    
         
            -
                     
     | 
| 
       251 
     | 
    
         
            -
                    if  
     | 
| 
      
 248 
     | 
    
         
            +
                    cdk_version_str = metadata.version("airbyte_cdk")
         
     | 
| 
      
 249 
     | 
    
         
            +
                    cdk_version = self._parse_version(cdk_version_str, "airbyte-cdk")
         
     | 
| 
      
 250 
     | 
    
         
            +
                    manifest_version_str = self._source_config.get("version")
         
     | 
| 
      
 251 
     | 
    
         
            +
                    if manifest_version_str is None:
         
     | 
| 
       252 
252 
     | 
    
         
             
                        raise RuntimeError(
         
     | 
| 
       253 
253 
     | 
    
         
             
                            "Manifest version is not defined in the manifest. This is unexpected since it should be a required field. Please contact support."
         
     | 
| 
       254 
254 
     | 
    
         
             
                        )
         
     | 
| 
       255 
     | 
    
         
            -
                     
     | 
| 
       256 
     | 
    
         
            -
                        manifest_version, "manifest"
         
     | 
| 
       257 
     | 
    
         
            -
                    )
         
     | 
| 
      
 255 
     | 
    
         
            +
                    manifest_version = self._parse_version(manifest_version_str, "manifest")
         
     | 
| 
       258 
256 
     | 
    
         | 
| 
       259 
     | 
    
         
            -
                    if cdk_version. 
     | 
| 
      
 257 
     | 
    
         
            +
                    if (cdk_version.major, cdk_version.minor, cdk_version.micro) == (0, 0, 0):
         
     | 
| 
       260 
258 
     | 
    
         
             
                        # Skipping version compatibility check on unreleased dev branch
         
     | 
| 
       261 
259 
     | 
    
         
             
                        pass
         
     | 
| 
       262 
     | 
    
         
            -
                    elif  
     | 
| 
       263 
     | 
    
         
            -
                         
     | 
| 
      
 260 
     | 
    
         
            +
                    elif (cdk_version.major, cdk_version.minor) < (
         
     | 
| 
      
 261 
     | 
    
         
            +
                        manifest_version.major,
         
     | 
| 
      
 262 
     | 
    
         
            +
                        manifest_version.minor,
         
     | 
| 
       264 
263 
     | 
    
         
             
                    ):
         
     | 
| 
       265 
264 
     | 
    
         
             
                        raise ValidationError(
         
     | 
| 
       266 
     | 
    
         
            -
                            f"The manifest version {manifest_version} is greater than the airbyte-cdk package version ({cdk_version}). Your "
         
     | 
| 
      
 265 
     | 
    
         
            +
                            f"The manifest version {manifest_version!s} is greater than the airbyte-cdk package version ({cdk_version!s}). Your "
         
     | 
| 
       267 
266 
     | 
    
         
             
                            f"manifest may contain features that are not in the current CDK version."
         
     | 
| 
       268 
267 
     | 
    
         
             
                        )
         
     | 
| 
       269 
     | 
    
         
            -
                    elif  
     | 
| 
      
 268 
     | 
    
         
            +
                    elif (manifest_version.major, manifest_version.minor) < (0, 29):
         
     | 
| 
       270 
269 
     | 
    
         
             
                        raise ValidationError(
         
     | 
| 
       271 
270 
     | 
    
         
             
                            f"The low-code framework was promoted to Beta in airbyte-cdk version 0.29.0 and contains many breaking changes to the "
         
     | 
| 
       272 
     | 
    
         
            -
                            f"language. The manifest version {manifest_version} is incompatible with the airbyte-cdk package version "
         
     | 
| 
       273 
     | 
    
         
            -
                            f"{cdk_version} which contains these breaking changes."
         
     | 
| 
      
 271 
     | 
    
         
            +
                            f"language. The manifest version {manifest_version!s} is incompatible with the airbyte-cdk package version "
         
     | 
| 
      
 272 
     | 
    
         
            +
                            f"{cdk_version!s} which contains these breaking changes."
         
     | 
| 
       274 
273 
     | 
    
         
             
                        )
         
     | 
| 
       275 
274 
     | 
    
         | 
| 
       276 
275 
     | 
    
         
             
                @staticmethod
         
     | 
| 
       277 
     | 
    
         
            -
                def  
     | 
| 
       278 
     | 
    
         
            -
                     
     | 
| 
       279 
     | 
    
         
            -
                     
     | 
| 
      
 276 
     | 
    
         
            +
                def _parse_version(
         
     | 
| 
      
 277 
     | 
    
         
            +
                    version: str,
         
     | 
| 
      
 278 
     | 
    
         
            +
                    version_type: str,
         
     | 
| 
      
 279 
     | 
    
         
            +
                ) -> Version:
         
     | 
| 
      
 280 
     | 
    
         
            +
                    """Takes a semantic version represented as a string and splits it into a tuple.
         
     | 
| 
      
 281 
     | 
    
         
            +
             
     | 
| 
      
 282 
     | 
    
         
            +
                    The fourth part (prerelease) is not returned in the tuple.
         
     | 
| 
      
 283 
     | 
    
         
            +
             
     | 
| 
      
 284 
     | 
    
         
            +
                    Returns:
         
     | 
| 
      
 285 
     | 
    
         
            +
                        Version: the parsed version object
         
     | 
| 
       280 
286 
     | 
    
         
             
                    """
         
     | 
| 
       281 
     | 
    
         
            -
                     
     | 
| 
       282 
     | 
    
         
            -
             
     | 
| 
      
 287 
     | 
    
         
            +
                    try:
         
     | 
| 
      
 288 
     | 
    
         
            +
                        parsed_version = Version(version)
         
     | 
| 
      
 289 
     | 
    
         
            +
                    except InvalidVersion as ex:
         
     | 
| 
       283 
290 
     | 
    
         
             
                        raise ValidationError(
         
     | 
| 
       284 
     | 
    
         
            -
                            f"The {version_type} version {version}  
     | 
| 
       285 
     | 
    
         
            -
                        )
         
     | 
| 
       286 
     | 
    
         
            -
                     
     | 
| 
      
 291 
     | 
    
         
            +
                            f"The {version_type} version '{version}' is not a valid version format."
         
     | 
| 
      
 292 
     | 
    
         
            +
                        ) from ex
         
     | 
| 
      
 293 
     | 
    
         
            +
                    else:
         
     | 
| 
      
 294 
     | 
    
         
            +
                        # No exception
         
     | 
| 
      
 295 
     | 
    
         
            +
                        return parsed_version
         
     | 
| 
       287 
296 
     | 
    
         | 
| 
       288 
297 
     | 
    
         
             
                def _stream_configs(self, manifest: Mapping[str, Any]) -> List[Dict[str, Any]]:
         
     | 
| 
       289 
298 
     | 
    
         
             
                    # This has a warning flag for static, but after we finish part 4 we'll replace manifest with self._source_config
         
     | 
| 
         @@ -29,16 +29,25 @@ from airbyte_cdk.sources.file_based.schema_helpers import SchemaType 
     | 
|
| 
       29 
29 
     | 
    
         
             
            from airbyte_cdk.utils import is_cloud_environment
         
     | 
| 
       30 
30 
     | 
    
         
             
            from airbyte_cdk.utils.traced_exception import AirbyteTracedException
         
     | 
| 
       31 
31 
     | 
    
         
             
            from unstructured.file_utils.filetype import (
         
     | 
| 
      
 32 
     | 
    
         
            +
                EXT_TO_FILETYPE,
         
     | 
| 
       32 
33 
     | 
    
         
             
                FILETYPE_TO_MIMETYPE,
         
     | 
| 
       33 
34 
     | 
    
         
             
                STR_TO_FILETYPE,
         
     | 
| 
       34 
35 
     | 
    
         
             
                FileType,
         
     | 
| 
       35 
36 
     | 
    
         
             
                detect_filetype,
         
     | 
| 
       36 
37 
     | 
    
         
             
            )
         
     | 
| 
      
 38 
     | 
    
         
            +
            import nltk
         
     | 
| 
       37 
39 
     | 
    
         | 
| 
       38 
40 
     | 
    
         
             
            unstructured_partition_pdf = None
         
     | 
| 
       39 
41 
     | 
    
         
             
            unstructured_partition_docx = None
         
     | 
| 
       40 
42 
     | 
    
         
             
            unstructured_partition_pptx = None
         
     | 
| 
       41 
43 
     | 
    
         | 
| 
      
 44 
     | 
    
         
            +
            try:
         
     | 
| 
      
 45 
     | 
    
         
            +
                nltk.data.find("tokenizers/punkt.zip")
         
     | 
| 
      
 46 
     | 
    
         
            +
                nltk.data.find("tokenizers/punkt_tab.zip")
         
     | 
| 
      
 47 
     | 
    
         
            +
            except LookupError:
         
     | 
| 
      
 48 
     | 
    
         
            +
                nltk.download("punkt")
         
     | 
| 
      
 49 
     | 
    
         
            +
                nltk.download("punkt_tab")
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
       42 
51 
     | 
    
         | 
| 
       43 
52 
     | 
    
         
             
            def optional_decode(contents: Union[str, bytes]) -> str:
         
     | 
| 
       44 
53 
     | 
    
         
             
                if isinstance(contents, bytes):
         
     | 
| 
         @@ -108,9 +117,11 @@ class UnstructuredParser(FileTypeParser): 
     | 
|
| 
       108 
117 
     | 
    
         
             
                    format = _extract_format(config)
         
     | 
| 
       109 
118 
     | 
    
         
             
                    with stream_reader.open_file(file, self.file_read_mode, None, logger) as file_handle:
         
     | 
| 
       110 
119 
     | 
    
         
             
                        filetype = self._get_filetype(file_handle, file)
         
     | 
| 
       111 
     | 
    
         
            -
             
     | 
| 
       112 
120 
     | 
    
         
             
                        if filetype not in self._supported_file_types() and not format.skip_unprocessable_files:
         
     | 
| 
       113 
     | 
    
         
            -
                            raise self._create_parse_error( 
     | 
| 
      
 121 
     | 
    
         
            +
                            raise self._create_parse_error(
         
     | 
| 
      
 122 
     | 
    
         
            +
                                file,
         
     | 
| 
      
 123 
     | 
    
         
            +
                                self._get_file_type_error_message(filetype),
         
     | 
| 
      
 124 
     | 
    
         
            +
                            )
         
     | 
| 
       114 
125 
     | 
    
         | 
| 
       115 
126 
     | 
    
         
             
                        return {
         
     | 
| 
       116 
127 
     | 
    
         
             
                            "content": {
         
     | 
| 
         @@ -159,6 +170,10 @@ class UnstructuredParser(FileTypeParser): 
     | 
|
| 
       159 
170 
     | 
    
         
             
                                logger.warn(f"File {file.uri} cannot be parsed. Skipping it.")
         
     | 
| 
       160 
171 
     | 
    
         
             
                            else:
         
     | 
| 
       161 
172 
     | 
    
         
             
                                raise e
         
     | 
| 
      
 173 
     | 
    
         
            +
                        except Exception as e:
         
     | 
| 
      
 174 
     | 
    
         
            +
                            exception_str = str(e)
         
     | 
| 
      
 175 
     | 
    
         
            +
                            logger.error(f"File {file.uri} caused an error during parsing: {exception_str}.")
         
     | 
| 
      
 176 
     | 
    
         
            +
                            raise e
         
     | 
| 
       162 
177 
     | 
    
         | 
| 
       163 
178 
     | 
    
         
             
                def _read_file(
         
     | 
| 
       164 
179 
     | 
    
         
             
                    self,
         
     | 
| 
         @@ -176,20 +191,32 @@ class UnstructuredParser(FileTypeParser): 
     | 
|
| 
       176 
191 
     | 
    
         
             
                        # check whether unstructured library is actually available for better error message and to ensure proper typing (can't be None after this point)
         
     | 
| 
       177 
192 
     | 
    
         
             
                        raise Exception("unstructured library is not available")
         
     | 
| 
       178 
193 
     | 
    
         | 
| 
       179 
     | 
    
         
            -
                    filetype = self._get_filetype(file_handle, remote_file)
         
     | 
| 
      
 194 
     | 
    
         
            +
                    filetype: FileType | None = self._get_filetype(file_handle, remote_file)
         
     | 
| 
       180 
195 
     | 
    
         | 
| 
       181 
     | 
    
         
            -
                    if filetype  
     | 
| 
      
 196 
     | 
    
         
            +
                    if filetype is None or filetype not in self._supported_file_types():
         
     | 
| 
      
 197 
     | 
    
         
            +
                        raise self._create_parse_error(
         
     | 
| 
      
 198 
     | 
    
         
            +
                            remote_file,
         
     | 
| 
      
 199 
     | 
    
         
            +
                            self._get_file_type_error_message(filetype),
         
     | 
| 
      
 200 
     | 
    
         
            +
                        )
         
     | 
| 
      
 201 
     | 
    
         
            +
                    if filetype in {FileType.MD, FileType.TXT}:
         
     | 
| 
       182 
202 
     | 
    
         
             
                        file_content: bytes = file_handle.read()
         
     | 
| 
       183 
203 
     | 
    
         
             
                        decoded_content: str = optional_decode(file_content)
         
     | 
| 
       184 
204 
     | 
    
         
             
                        return decoded_content
         
     | 
| 
       185 
     | 
    
         
            -
                    if filetype not in self._supported_file_types():
         
     | 
| 
       186 
     | 
    
         
            -
                        raise self._create_parse_error(remote_file, self._get_file_type_error_message(filetype))
         
     | 
| 
       187 
205 
     | 
    
         
             
                    if format.processing.mode == "local":
         
     | 
| 
       188 
     | 
    
         
            -
                        return self._read_file_locally( 
     | 
| 
      
 206 
     | 
    
         
            +
                        return self._read_file_locally(
         
     | 
| 
      
 207 
     | 
    
         
            +
                            file_handle,
         
     | 
| 
      
 208 
     | 
    
         
            +
                            filetype,
         
     | 
| 
      
 209 
     | 
    
         
            +
                            format.strategy,
         
     | 
| 
      
 210 
     | 
    
         
            +
                            remote_file,
         
     | 
| 
      
 211 
     | 
    
         
            +
                        )
         
     | 
| 
       189 
212 
     | 
    
         
             
                    elif format.processing.mode == "api":
         
     | 
| 
       190 
213 
     | 
    
         
             
                        try:
         
     | 
| 
       191 
214 
     | 
    
         
             
                            result: str = self._read_file_remotely_with_retries(
         
     | 
| 
       192 
     | 
    
         
            -
                                file_handle, 
     | 
| 
      
 215 
     | 
    
         
            +
                                file_handle,
         
     | 
| 
      
 216 
     | 
    
         
            +
                                format.processing,
         
     | 
| 
      
 217 
     | 
    
         
            +
                                filetype,
         
     | 
| 
      
 218 
     | 
    
         
            +
                                format.strategy,
         
     | 
| 
      
 219 
     | 
    
         
            +
                                remote_file,
         
     | 
| 
       193 
220 
     | 
    
         
             
                            )
         
     | 
| 
       194 
221 
     | 
    
         
             
                        except Exception as e:
         
     | 
| 
       195 
222 
     | 
    
         
             
                            # If a parser error happens during remotely processing the file, this means the file is corrupted. This case is handled by the parse_records method, so just rethrow.
         
     | 
| 
         @@ -336,7 +363,11 @@ class UnstructuredParser(FileTypeParser): 
     | 
|
| 
       336 
363 
     | 
    
         | 
| 
       337 
364 
     | 
    
         
             
                    return self._render_markdown([element.to_dict() for element in elements])
         
     | 
| 
       338 
365 
     | 
    
         | 
| 
       339 
     | 
    
         
            -
                def _create_parse_error( 
     | 
| 
      
 366 
     | 
    
         
            +
                def _create_parse_error(
         
     | 
| 
      
 367 
     | 
    
         
            +
                    self,
         
     | 
| 
      
 368 
     | 
    
         
            +
                    remote_file: RemoteFile,
         
     | 
| 
      
 369 
     | 
    
         
            +
                    message: str,
         
     | 
| 
      
 370 
     | 
    
         
            +
                ) -> RecordParseError:
         
     | 
| 
       340 
371 
     | 
    
         
             
                    return RecordParseError(
         
     | 
| 
       341 
372 
     | 
    
         
             
                        FileBasedSourceError.ERROR_PARSING_RECORD, filename=remote_file.uri, message=message
         
     | 
| 
       342 
373 
     | 
    
         
             
                    )
         
     | 
| 
         @@ -360,32 +391,51 @@ class UnstructuredParser(FileTypeParser): 
     | 
|
| 
       360 
391 
     | 
    
         
             
                    # detect_filetype is either using the file name or file content
         
     | 
| 
       361 
392 
     | 
    
         
             
                    # if possible, try to leverage the file name to detect the file type
         
     | 
| 
       362 
393 
     | 
    
         
             
                    # if the file name is not available, use the file content
         
     | 
| 
       363 
     | 
    
         
            -
                    file_type =  
     | 
| 
       364 
     | 
    
         
            -
             
     | 
| 
       365 
     | 
    
         
            -
             
     | 
| 
       366 
     | 
    
         
            -
             
     | 
| 
      
 394 
     | 
    
         
            +
                    file_type: FileType | None = None
         
     | 
| 
      
 395 
     | 
    
         
            +
                    try:
         
     | 
| 
      
 396 
     | 
    
         
            +
                        file_type = detect_filetype(
         
     | 
| 
      
 397 
     | 
    
         
            +
                            filename=remote_file.uri,
         
     | 
| 
      
 398 
     | 
    
         
            +
                        )
         
     | 
| 
      
 399 
     | 
    
         
            +
                    except Exception:
         
     | 
| 
      
 400 
     | 
    
         
            +
                        # Path doesn't exist locally. Try something else...
         
     | 
| 
      
 401 
     | 
    
         
            +
                        pass
         
     | 
| 
      
 402 
     | 
    
         
            +
             
     | 
| 
      
 403 
     | 
    
         
            +
                    if file_type and file_type != FileType.UNK:
         
     | 
| 
       367 
404 
     | 
    
         
             
                        return file_type
         
     | 
| 
       368 
405 
     | 
    
         | 
| 
       369 
406 
     | 
    
         
             
                    type_based_on_content = detect_filetype(file=file)
         
     | 
| 
      
 407 
     | 
    
         
            +
                    file.seek(0)  # detect_filetype is reading to read the file content, so we need to reset
         
     | 
| 
       370 
408 
     | 
    
         | 
| 
       371 
     | 
    
         
            -
                     
     | 
| 
       372 
     | 
    
         
            -
             
     | 
| 
      
 409 
     | 
    
         
            +
                    if type_based_on_content and type_based_on_content != FileType.UNK:
         
     | 
| 
      
 410 
     | 
    
         
            +
                        return type_based_on_content
         
     | 
| 
       373 
411 
     | 
    
         | 
| 
       374 
     | 
    
         
            -
                     
     | 
| 
      
 412 
     | 
    
         
            +
                    extension = "." + remote_file.uri.split(".")[-1].lower()
         
     | 
| 
      
 413 
     | 
    
         
            +
                    if extension in EXT_TO_FILETYPE:
         
     | 
| 
      
 414 
     | 
    
         
            +
                        return EXT_TO_FILETYPE[extension]
         
     | 
| 
      
 415 
     | 
    
         
            +
             
     | 
| 
      
 416 
     | 
    
         
            +
                    return None
         
     | 
| 
       375 
417 
     | 
    
         | 
| 
       376 
418 
     | 
    
         
             
                def _supported_file_types(self) -> List[Any]:
         
     | 
| 
       377 
419 
     | 
    
         
             
                    return [FileType.MD, FileType.PDF, FileType.DOCX, FileType.PPTX, FileType.TXT]
         
     | 
| 
       378 
420 
     | 
    
         | 
| 
       379 
     | 
    
         
            -
                def _get_file_type_error_message( 
     | 
| 
      
 421 
     | 
    
         
            +
                def _get_file_type_error_message(
         
     | 
| 
      
 422 
     | 
    
         
            +
                    self,
         
     | 
| 
      
 423 
     | 
    
         
            +
                    file_type: FileType | None,
         
     | 
| 
      
 424 
     | 
    
         
            +
                ) -> str:
         
     | 
| 
       380 
425 
     | 
    
         
             
                    supported_file_types = ", ".join([str(type) for type in self._supported_file_types()])
         
     | 
| 
       381 
     | 
    
         
            -
                    return f"File type {file_type} is not supported. Supported file types are {supported_file_types}"
         
     | 
| 
      
 426 
     | 
    
         
            +
                    return f"File type {file_type or 'None'!s} is not supported. Supported file types are {supported_file_types}"
         
     | 
| 
       382 
427 
     | 
    
         | 
| 
       383 
428 
     | 
    
         
             
                def _render_markdown(self, elements: List[Any]) -> str:
         
     | 
| 
       384 
429 
     | 
    
         
             
                    return "\n\n".join((self._convert_to_markdown(el) for el in elements))
         
     | 
| 
       385 
430 
     | 
    
         | 
| 
       386 
431 
     | 
    
         
             
                def _convert_to_markdown(self, el: Dict[str, Any]) -> str:
         
     | 
| 
       387 
432 
     | 
    
         
             
                    if dpath.get(el, "type") == "Title":
         
     | 
| 
       388 
     | 
    
         
            -
                         
     | 
| 
      
 433 
     | 
    
         
            +
                        category_depth = dpath.get(el, "metadata/category_depth", default=1) or 1
         
     | 
| 
      
 434 
     | 
    
         
            +
                        if not isinstance(category_depth, int):
         
     | 
| 
      
 435 
     | 
    
         
            +
                            category_depth = (
         
     | 
| 
      
 436 
     | 
    
         
            +
                                int(category_depth) if isinstance(category_depth, (str, float)) else 1
         
     | 
| 
      
 437 
     | 
    
         
            +
                            )
         
     | 
| 
      
 438 
     | 
    
         
            +
                        heading_str = "#" * category_depth
         
     | 
| 
       389 
439 
     | 
    
         
             
                        return f"{heading_str} {dpath.get(el, 'text')}"
         
     | 
| 
       390 
440 
     | 
    
         
             
                    elif dpath.get(el, "type") == "ListItem":
         
     | 
| 
       391 
441 
     | 
    
         
             
                        return f"- {dpath.get(el, 'text')}"
         
     | 
| 
         @@ -1,13 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            Metadata-Version: 2.1
         
     | 
| 
       2 
2 
     | 
    
         
             
            Name: airbyte-cdk
         
     | 
| 
       3 
     | 
    
         
            -
            Version: 6.7. 
     | 
| 
      
 3 
     | 
    
         
            +
            Version: 6.7.0rc2
         
     | 
| 
       4 
4 
     | 
    
         
             
            Summary: A framework for writing Airbyte Connectors.
         
     | 
| 
       5 
5 
     | 
    
         
             
            Home-page: https://airbyte.com
         
     | 
| 
       6 
6 
     | 
    
         
             
            License: MIT
         
     | 
| 
       7 
7 
     | 
    
         
             
            Keywords: airbyte,connector-development-kit,cdk
         
     | 
| 
       8 
8 
     | 
    
         
             
            Author: Airbyte
         
     | 
| 
       9 
9 
     | 
    
         
             
            Author-email: contact@airbyte.io
         
     | 
| 
       10 
     | 
    
         
            -
            Requires-Python: >=3.10,< 
     | 
| 
      
 10 
     | 
    
         
            +
            Requires-Python: >=3.10,<3.13
         
     | 
| 
       11 
11 
     | 
    
         
             
            Classifier: Development Status :: 3 - Alpha
         
     | 
| 
       12 
12 
     | 
    
         
             
            Classifier: Intended Audience :: Developers
         
     | 
| 
       13 
13 
     | 
    
         
             
            Classifier: License :: OSI Approved :: MIT License
         
     | 
| 
         @@ -15,7 +15,6 @@ Classifier: Programming Language :: Python :: 3 
     | 
|
| 
       15 
15 
     | 
    
         
             
            Classifier: Programming Language :: Python :: 3.10
         
     | 
| 
       16 
16 
     | 
    
         
             
            Classifier: Programming Language :: Python :: 3.11
         
     | 
| 
       17 
17 
     | 
    
         
             
            Classifier: Programming Language :: Python :: 3.12
         
     | 
| 
       18 
     | 
    
         
            -
            Classifier: Programming Language :: Python :: 3.13
         
     | 
| 
       19 
18 
     | 
    
         
             
            Classifier: Topic :: Scientific/Engineering
         
     | 
| 
       20 
19 
     | 
    
         
             
            Classifier: Topic :: Software Development :: Libraries :: Python Modules
         
     | 
| 
       21 
20 
     | 
    
         
             
            Provides-Extra: file-based
         
     | 
| 
         @@ -42,7 +41,7 @@ Requires-Dist: jsonschema (>=3.2.0,<3.3.0) 
     | 
|
| 
       42 
41 
     | 
    
         
             
            Requires-Dist: langchain (==0.1.16) ; extra == "vector-db-based"
         
     | 
| 
       43 
42 
     | 
    
         
             
            Requires-Dist: langchain_core (==0.1.42)
         
     | 
| 
       44 
43 
     | 
    
         
             
            Requires-Dist: markdown ; extra == "file-based"
         
     | 
| 
       45 
     | 
    
         
            -
            Requires-Dist: nltk (==3. 
     | 
| 
      
 44 
     | 
    
         
            +
            Requires-Dist: nltk (==3.9.1)
         
     | 
| 
       46 
45 
     | 
    
         
             
            Requires-Dist: numpy (<2)
         
     | 
| 
       47 
46 
     | 
    
         
             
            Requires-Dist: openai[embeddings] (==0.27.9) ; extra == "vector-db-based"
         
     | 
| 
       48 
47 
     | 
    
         
             
            Requires-Dist: orjson (>=3.10.7,<4.0.0)
         
     | 
| 
         @@ -95,7 +95,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZ 
     | 
|
| 
       95 
95 
     | 
    
         
             
            airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
         
     | 
| 
       96 
96 
     | 
    
         
             
            airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=KwTd0oagnZI4tARxnJZlQiDHn1IXqS7dbnRT0rKRAj8,6626
         
     | 
| 
       97 
97 
     | 
    
         
             
            airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=QgIfSVPHx_MMUCgbQdm-NMpUlp_cpk0OQhoRDFtkrxE,4040
         
     | 
| 
       98 
     | 
    
         
            -
            airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256= 
     | 
| 
      
 98 
     | 
    
         
            +
            airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=SR5s8gbLmuQwCd4mgiNyk1tttocMC1bJbkA582TzbYA,12918
         
     | 
| 
       99 
99 
     | 
    
         
             
            airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         
     | 
| 
       100 
100 
     | 
    
         
             
            airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iNsF3jWCaZAmJYArmDQg0MJgZikk6frh3IfhcMBR_Qc,3924
         
     | 
| 
       101 
101 
     | 
    
         
             
            airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
         
     | 
| 
         @@ -202,7 +202,7 @@ airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=HyGRihJxcb_lEs 
     | 
|
| 
       202 
202 
     | 
    
         
             
            airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=JgpH21PrbRqwK92BJklZWvh2TndA6xZ-eP1LPMo44oQ,2832
         
     | 
| 
       203 
203 
     | 
    
         
             
            airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=k1ri7TtwrN8oYZpCl1bNNeAQmwBbwLjmOmIz8-tKflY,5897
         
     | 
| 
       204 
204 
     | 
    
         
             
            airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=0B4RYehU4z4dys3Tu-O98B0Uw7JO_LzStRwmNxKh6Xk,10486
         
     | 
| 
       205 
     | 
    
         
            -
            airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256= 
     | 
| 
      
 205 
     | 
    
         
            +
            airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=3dlhzKq4Vuc-E01uG5TLPAn0E8Be2SrBNukeP22RNFM,18601
         
     | 
| 
       206 
206 
     | 
    
         
             
            airbyte_cdk/sources/file_based/remote_file.py,sha256=yqRz93vPe8PBXLIMJ5W5u2JRlZRhg6sBrAjn3pPjJ8A,315
         
     | 
| 
       207 
207 
     | 
    
         
             
            airbyte_cdk/sources/file_based/schema_helpers.py,sha256=Cf8FH1bDFP0qCDDfEYir_WjP4exXUnikz8hZ40y1Ek0,9601
         
     | 
| 
       208 
208 
     | 
    
         
             
            airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=sEVnRhZ8x9f7PNjo6lewxid9z0PI8eSj7gSoFC3MH1Y,527
         
     | 
| 
         @@ -327,7 +327,7 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EemcgcQlI8-LPYOPlYv4Qkdjyho79XVLWaUHF5X 
     | 
|
| 
       327 
327 
     | 
    
         
             
            airbyte_cdk/utils/spec_schema_transformations.py,sha256=LVc9KbtMeV_z99jWo0Ou8u4l6eBJ0BWNhxj4zrrGKRs,763
         
     | 
| 
       328 
328 
     | 
    
         
             
            airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
         
     | 
| 
       329 
329 
     | 
    
         
             
            airbyte_cdk/utils/traced_exception.py,sha256=89TQdFuYZ1NJgmFpqLzY_T_T_64TpJYmVqs119Bp43g,6164
         
     | 
| 
       330 
     | 
    
         
            -
            airbyte_cdk-6.7. 
     | 
| 
       331 
     | 
    
         
            -
            airbyte_cdk-6.7. 
     | 
| 
       332 
     | 
    
         
            -
            airbyte_cdk-6.7. 
     | 
| 
       333 
     | 
    
         
            -
            airbyte_cdk-6.7. 
     | 
| 
      
 330 
     | 
    
         
            +
            airbyte_cdk-6.7.0rc2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
         
     | 
| 
      
 331 
     | 
    
         
            +
            airbyte_cdk-6.7.0rc2.dist-info/METADATA,sha256=0dmVuMrkaui3uUXZp0tg1CfHzY58AHUPJGAGQhk4VRw,13300
         
     | 
| 
      
 332 
     | 
    
         
            +
            airbyte_cdk-6.7.0rc2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
         
     | 
| 
      
 333 
     | 
    
         
            +
            airbyte_cdk-6.7.0rc2.dist-info/RECORD,,
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     |