PyPI - airbyte-cdk - Versions diffs - 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl - Mend

airbyte-cdk 6.5.3rc2py3-none-any.whl → 6.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (200) hide show

airbyte_cdk/destinations/vector_db_based/config.py CHANGED Viewed

@@ -17,7 +17,11 @@ class SeparatorSplitterConfigModel(BaseModel):
         title="Separators",
         description='List of separator strings to split text fields by. The separator itself needs to be wrapped in double quotes, e.g. to split by the dot character, use ".". To split by a newline, use "\\n".',
     )
-    keep_separator: bool = Field(default=False, title="Keep separator", description="Whether to keep the separator in the resulting chunks")
+    keep_separator: bool = Field(
+        default=False,
+        title="Keep separator",
+        description="Whether to keep the separator in the resulting chunks",
+    )
     class Config(OneOfOptionConfig):
         title = "By Separator"
@@ -68,18 +72,20 @@ class CodeSplitterConfigModel(BaseModel):
     class Config(OneOfOptionConfig):
         title = "By Programming Language"
-        description = (
-            "Split the text by suitable delimiters based on the programming language. This is useful for splitting code into chunks."
-        )
+        description = "Split the text by suitable delimiters based on the programming language. This is useful for splitting code into chunks."
         discriminator = "mode"
-TextSplitterConfigModel = Union[SeparatorSplitterConfigModel, MarkdownHeaderSplitterConfigModel, CodeSplitterConfigModel]
+TextSplitterConfigModel = Union[
+    SeparatorSplitterConfigModel, MarkdownHeaderSplitterConfigModel, CodeSplitterConfigModel
+]
 class FieldNameMappingConfigModel(BaseModel):
     from_field: str = Field(title="From field name", description="The field name in the source")
-    to_field: str = Field(title="To field name", description="The field name to use in the destination")
+    to_field: str = Field(
+        title="To field name", description="The field name to use in the destination"
+    )
 class ProcessingConfigModel(BaseModel):
@@ -132,9 +138,7 @@ class OpenAIEmbeddingConfigModel(BaseModel):
     class Config(OneOfOptionConfig):
         title = "OpenAI"
-        description = (
-            "Use the OpenAI API to embed text. This option is using the text-embedding-ada-002 model with 1536 embedding dimensions."
-        )
+        description = "Use the OpenAI API to embed text. This option is using the text-embedding-ada-002 model with 1536 embedding dimensions."
         discriminator = "mode"
@@ -142,7 +146,10 @@ class OpenAICompatibleEmbeddingConfigModel(BaseModel):
     mode: Literal["openai_compatible"] = Field("openai_compatible", const=True)
     api_key: str = Field(title="API key", default="", airbyte_secret=True)
     base_url: str = Field(
-        ..., title="Base URL", description="The base URL for your OpenAI-compatible service", examples=["https://your-service-name.com"]
+        ...,
+        title="Base URL",
+        description="The base URL for your OpenAI-compatible service",
+        examples=["https://your-service-name.com"],
     )
     model_name: str = Field(
         title="Model name",
@@ -151,7 +158,9 @@ class OpenAICompatibleEmbeddingConfigModel(BaseModel):
         examples=["text-embedding-ada-002"],
     )
     dimensions: int = Field(
-        title="Embedding dimensions", description="The number of dimensions the embedding model is generating", examples=[1536, 384]
+        title="Embedding dimensions",
+        description="The number of dimensions the embedding model is generating",
+        examples=[1536, 384],
     )
     class Config(OneOfOptionConfig):
@@ -199,10 +208,16 @@ class FakeEmbeddingConfigModel(BaseModel):
 class FromFieldEmbeddingConfigModel(BaseModel):
     mode: Literal["from_field"] = Field("from_field", const=True)
     field_name: str = Field(
-        ..., title="Field name", description="Name of the field in the record that contains the embedding", examples=["embedding", "vector"]
+        ...,
+        title="Field name",
+        description="Name of the field in the record that contains the embedding",
+        examples=["embedding", "vector"],
     )
     dimensions: int = Field(
-        ..., title="Embedding dimensions", description="The number of dimensions the embedding model is generating", examples=[1536, 384]
+        ...,
+        title="Embedding dimensions",
+        description="The number of dimensions the embedding model is generating",
+        examples=[1536, 384],
     )
     class Config(OneOfOptionConfig):
@@ -241,7 +256,14 @@ class VectorDBConfigModel(BaseModel):
         FakeEmbeddingConfigModel,
         AzureOpenAIEmbeddingConfigModel,
         OpenAICompatibleEmbeddingConfigModel,
-    ] = Field(..., title="Embedding", description="Embedding configuration", discriminator="mode", group="embedding", type="object")
+    ] = Field(
+        ...,
+        title="Embedding",
+        description="Embedding configuration",
+        discriminator="mode",
+        group="embedding",
+        type="object",
+    )
     processing: ProcessingConfigModel
     omit_raw_text: bool = Field(
         default=False,

airbyte_cdk/destinations/vector_db_based/document_processor.py CHANGED Viewed

@@ -8,9 +8,18 @@ from dataclasses import dataclass
 from typing import Any, Dict, List, Mapping, Optional, Tuple
 import dpath
-from airbyte_cdk.destinations.vector_db_based.config import ProcessingConfigModel, SeparatorSplitterConfigModel, TextSplitterConfigModel
+from airbyte_cdk.destinations.vector_db_based.config import (
+    ProcessingConfigModel,
+    SeparatorSplitterConfigModel,
+    TextSplitterConfigModel,
+)
 from airbyte_cdk.destinations.vector_db_based.utils import create_stream_identifier
-from airbyte_cdk.models import AirbyteRecordMessage, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode
+from airbyte_cdk.models import (
+    AirbyteRecordMessage,
+    ConfiguredAirbyteCatalog,
+    ConfiguredAirbyteStream,
+    DestinationSyncMode,
+)
 from airbyte_cdk.utils.traced_exception import AirbyteTracedException, FailureType
 from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
 from langchain.utils import stringify_dict
@@ -30,7 +39,14 @@ class Chunk:
     embedding: Optional[List[float]] = None
-headers_to_split_on = ["(?:^|\n)# ", "(?:^|\n)## ", "(?:^|\n)### ", "(?:^|\n)#### ", "(?:^|\n)##### ", "(?:^|\n)###### "]
+headers_to_split_on = [
+    "(?:^|\n)# ",
+    "(?:^|\n)## ",
+    "(?:^|\n)### ",
+    "(?:^|\n)#### ",
+    "(?:^|\n)##### ",
+    "(?:^|\n)###### ",
+]
 class DocumentProcessor:
@@ -64,7 +80,10 @@ class DocumentProcessor:
         return None
     def _get_text_splitter(
-        self, chunk_size: int, chunk_overlap: int, splitter_config: Optional[TextSplitterConfigModel]
+        self,
+        chunk_size: int,
+        chunk_overlap: int,
+        splitter_config: Optional[TextSplitterConfigModel],
     ) -> RecursiveCharacterTextSplitter:
         if splitter_config is None:
             splitter_config = SeparatorSplitterConfigModel(mode="separator")
@@ -89,14 +108,20 @@ class DocumentProcessor:
             return RecursiveCharacterTextSplitter.from_tiktoken_encoder(
                 chunk_size=chunk_size,
                 chunk_overlap=chunk_overlap,
-                separators=RecursiveCharacterTextSplitter.get_separators_for_language(Language(splitter_config.language)),
+                separators=RecursiveCharacterTextSplitter.get_separators_for_language(
+                    Language(splitter_config.language)
+                ),
                 disallowed_special=(),
             )
     def __init__(self, config: ProcessingConfigModel, catalog: ConfiguredAirbyteCatalog):
-        self.streams = {create_stream_identifier(stream.stream): stream for stream in catalog.streams}
+        self.streams = {
+            create_stream_identifier(stream.stream): stream for stream in catalog.streams
+        }
-        self.splitter = self._get_text_splitter(config.chunk_size, config.chunk_overlap, config.text_splitter)
+        self.splitter = self._get_text_splitter(
+            config.chunk_size, config.chunk_overlap, config.text_splitter
+        )
         self.text_fields = config.text_fields
         self.metadata_fields = config.metadata_fields
         self.field_name_mappings = config.field_name_mappings
@@ -119,10 +144,18 @@ class DocumentProcessor:
                 failure_type=FailureType.config_error,
             )
         chunks = [
-            Chunk(page_content=chunk_document.page_content, metadata=chunk_document.metadata, record=record)
+            Chunk(
+                page_content=chunk_document.page_content,
+                metadata=chunk_document.metadata,
+                record=record,
+            )
             for chunk_document in self._split_document(doc)
         ]
-        id_to_delete = doc.metadata[METADATA_RECORD_ID_FIELD] if METADATA_RECORD_ID_FIELD in doc.metadata else None
+        id_to_delete = (
+            doc.metadata[METADATA_RECORD_ID_FIELD]
+            if METADATA_RECORD_ID_FIELD in doc.metadata
+            else None
+        )
         return chunks, id_to_delete
     def _generate_document(self, record: AirbyteRecordMessage) -> Optional[Document]:
@@ -133,7 +166,9 @@ class DocumentProcessor:
         metadata = self._extract_metadata(record)
         return Document(page_content=text, metadata=metadata)
-    def _extract_relevant_fields(self, record: AirbyteRecordMessage, fields: Optional[List[str]]) -> Dict[str, Any]:
+    def _extract_relevant_fields(
+        self, record: AirbyteRecordMessage, fields: Optional[List[str]]
+    ) -> Dict[str, Any]:
         relevant_fields = {}
         if fields and len(fields) > 0:
             for field in fields:
@@ -156,7 +191,10 @@ class DocumentProcessor:
         stream_identifier = create_stream_identifier(record)
         current_stream: ConfiguredAirbyteStream = self.streams[stream_identifier]
         # if the sync mode is deduping, use the primary key to upsert existing records instead of appending new ones
-        if not current_stream.primary_key or current_stream.destination_sync_mode != DestinationSyncMode.append_dedup:
+        if (
+            not current_stream.primary_key
+            or current_stream.destination_sync_mode != DestinationSyncMode.append_dedup
+        ):
             return None
         primary_key = []

airbyte_cdk/destinations/vector_db_based/embedder.py CHANGED Viewed

@@ -92,7 +92,9 @@ class BaseOpenAIEmbedder(Embedder):
         batches = create_chunks(documents, batch_size=embedding_batch_size)
         embeddings: List[Optional[List[float]]] = []
         for batch in batches:
-            embeddings.extend(self.embeddings.embed_documents([chunk.page_content for chunk in batch]))
+            embeddings.extend(
+                self.embeddings.embed_documents([chunk.page_content for chunk in batch])
+            )
         return embeddings
     @property
@@ -103,13 +105,30 @@ class BaseOpenAIEmbedder(Embedder):
 class OpenAIEmbedder(BaseOpenAIEmbedder):
     def __init__(self, config: OpenAIEmbeddingConfigModel, chunk_size: int):
-        super().__init__(OpenAIEmbeddings(openai_api_key=config.openai_key, max_retries=15, disallowed_special=()), chunk_size)  # type: ignore
+        super().__init__(
+            OpenAIEmbeddings(
+                openai_api_key=config.openai_key, max_retries=15, disallowed_special=()
+            ),
+            chunk_size,
+        )  # type: ignore
 class AzureOpenAIEmbedder(BaseOpenAIEmbedder):
     def __init__(self, config: AzureOpenAIEmbeddingConfigModel, chunk_size: int):
         # Azure OpenAI API has — as of 20230927 — a limit of 16 documents per request
-        super().__init__(OpenAIEmbeddings(openai_api_key=config.openai_key, chunk_size=16, max_retries=15, openai_api_type="azure", openai_api_version="2023-05-15", openai_api_base=config.api_base, deployment=config.deployment, disallowed_special=()), chunk_size)  # type: ignore
+        super().__init__(
+            OpenAIEmbeddings(
+                openai_api_key=config.openai_key,
+                chunk_size=16,
+                max_retries=15,
+                openai_api_type="azure",
+                openai_api_version="2023-05-15",
+                openai_api_base=config.api_base,
+                deployment=config.deployment,
+                disallowed_special=(),
+            ),
+            chunk_size,
+        )  # type: ignore
 COHERE_VECTOR_SIZE = 1024
@@ -119,7 +138,9 @@ class CohereEmbedder(Embedder):
     def __init__(self, config: CohereEmbeddingConfigModel):
         super().__init__()
         # Client is set internally
-        self.embeddings = CohereEmbeddings(cohere_api_key=config.cohere_key, model="embed-english-light-v2.0")  # type: ignore
+        self.embeddings = CohereEmbeddings(
+            cohere_api_key=config.cohere_key, model="embed-english-light-v2.0"
+        )  # type: ignore
     def check(self) -> Optional[str]:
         try:
@@ -129,7 +150,10 @@ class CohereEmbedder(Embedder):
         return None
     def embed_documents(self, documents: List[Document]) -> List[Optional[List[float]]]:
-        return cast(List[Optional[List[float]]], self.embeddings.embed_documents([document.page_content for document in documents]))
+        return cast(
+            List[Optional[List[float]]],
+            self.embeddings.embed_documents([document.page_content for document in documents]),
+        )
     @property
     def embedding_dimensions(self) -> int:
@@ -150,7 +174,10 @@ class FakeEmbedder(Embedder):
         return None
     def embed_documents(self, documents: List[Document]) -> List[Optional[List[float]]]:
-        return cast(List[Optional[List[float]]], self.embeddings.embed_documents([document.page_content for document in documents]))
+        return cast(
+            List[Optional[List[float]]],
+            self.embeddings.embed_documents([document.page_content for document in documents]),
+        )
     @property
     def embedding_dimensions(self) -> int:
@@ -167,11 +194,20 @@ class OpenAICompatibleEmbedder(Embedder):
         self.config = config
         # Client is set internally
         # Always set an API key even if there is none defined in the config because the validator will fail otherwise. Embedding APIs that don't require an API key don't fail if one is provided, so this is not breaking usage.
-        self.embeddings = LocalAIEmbeddings(model=config.model_name, openai_api_key=config.api_key or "dummy-api-key", openai_api_base=config.base_url, max_retries=15, disallowed_special=())  # type: ignore
+        self.embeddings = LocalAIEmbeddings(
+            model=config.model_name,
+            openai_api_key=config.api_key or "dummy-api-key",
+            openai_api_base=config.base_url,
+            max_retries=15,
+            disallowed_special=(),
+        )  # type: ignore
     def check(self) -> Optional[str]:
         deployment_mode = os.environ.get("DEPLOYMENT_MODE", "")
-        if deployment_mode.casefold() == CLOUD_DEPLOYMENT_MODE and not self.config.base_url.startswith("https://"):
+        if (
+            deployment_mode.casefold() == CLOUD_DEPLOYMENT_MODE
+            and not self.config.base_url.startswith("https://")
+        ):
             return "Base URL must start with https://"
         try:
@@ -181,7 +217,10 @@ class OpenAICompatibleEmbedder(Embedder):
         return None
     def embed_documents(self, documents: List[Document]) -> List[Optional[List[float]]]:
-        return cast(List[Optional[List[float]]], self.embeddings.embed_documents([document.page_content for document in documents]))
+        return cast(
+            List[Optional[List[float]]],
+            self.embeddings.embed_documents([document.page_content for document in documents]),
+        )
     @property
     def embedding_dimensions(self) -> int:
@@ -254,8 +293,10 @@ def create_from_config(
     ],
     processing_config: ProcessingConfigModel,
 ) -> Embedder:
     if embedding_config.mode == "azure_openai" or embedding_config.mode == "openai":
-        return cast(Embedder, embedder_map[embedding_config.mode](embedding_config, processing_config.chunk_size))
+        return cast(
+            Embedder,
+            embedder_map[embedding_config.mode](embedding_config, processing_config.chunk_size),
+        )
     else:
         return cast(Embedder, embedder_map[embedding_config.mode](embedding_config))

airbyte_cdk/destinations/vector_db_based/test_utils.py CHANGED Viewed

@@ -26,12 +26,19 @@ class BaseIntegrationTest(unittest.TestCase):
     It provides helper methods to create Airbyte catalogs, records and state messages.
     """
-    def _get_configured_catalog(self, destination_mode: DestinationSyncMode) -> ConfiguredAirbyteCatalog:
-        stream_schema = {"type": "object", "properties": {"str_col": {"type": "str"}, "int_col": {"type": "integer"}}}
+    def _get_configured_catalog(
+        self, destination_mode: DestinationSyncMode
+    ) -> ConfiguredAirbyteCatalog:
+        stream_schema = {
+            "type": "object",
+            "properties": {"str_col": {"type": "str"}, "int_col": {"type": "integer"}},
+        }
         overwrite_stream = ConfiguredAirbyteStream(
             stream=AirbyteStream(
-                name="mystream", json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental, SyncMode.full_refresh]
+                name="mystream",
+                json_schema=stream_schema,
+                supported_sync_modes=[SyncMode.incremental, SyncMode.full_refresh],
             ),
             primary_key=[["int_col"]],
             sync_mode=SyncMode.incremental,
@@ -45,7 +52,10 @@ class BaseIntegrationTest(unittest.TestCase):
     def _record(self, stream: str, str_value: str, int_value: int) -> AirbyteMessage:
         return AirbyteMessage(
-            type=Type.RECORD, record=AirbyteRecordMessage(stream=stream, data={"str_col": str_value, "int_col": int_value}, emitted_at=0)
+            type=Type.RECORD,
+            record=AirbyteRecordMessage(
+                stream=stream, data={"str_col": str_value, "int_col": int_value}, emitted_at=0
+            ),
         )
     def setUp(self) -> None:

airbyte_cdk/destinations/vector_db_based/utils.py CHANGED Viewed

@@ -10,7 +10,11 @@ from airbyte_cdk.models import AirbyteRecordMessage, AirbyteStream
 def format_exception(exception: Exception) -> str:
-    return str(exception) + "\n" + "".join(traceback.TracebackException.from_exception(exception).format())
+    return (
+        str(exception)
+        + "\n"
+        + "".join(traceback.TracebackException.from_exception(exception).format())
+    )
 def create_chunks(iterable: Iterable[Any], batch_size: int) -> Iterator[Tuple[Any, ...]]:
@@ -26,4 +30,6 @@ def create_stream_identifier(stream: Union[AirbyteStream, AirbyteRecordMessage])
     if isinstance(stream, AirbyteStream):
         return str(stream.name if stream.namespace is None else f"{stream.namespace}_{stream.name}")
     else:
-        return str(stream.stream if stream.namespace is None else f"{stream.namespace}_{stream.stream}")
+        return str(
+            stream.stream if stream.namespace is None else f"{stream.namespace}_{stream.stream}"
+        )

airbyte_cdk/destinations/vector_db_based/writer.py CHANGED Viewed

@@ -27,7 +27,12 @@ class Writer:
     """
     def __init__(
-        self, processing_config: ProcessingConfigModel, indexer: Indexer, embedder: Embedder, batch_size: int, omit_raw_text: bool
+        self,
+        processing_config: ProcessingConfigModel,
+        indexer: Indexer,
+        embedder: Embedder,
+        batch_size: int,
+        omit_raw_text: bool,
     ) -> None:
         self.processing_config = processing_config
         self.indexer = indexer
@@ -54,7 +59,9 @@ class Writer:
             self.indexer.delete(ids, namespace, stream)
         for (namespace, stream), chunks in self.chunks.items():
-            embeddings = self.embedder.embed_documents([self._convert_to_document(chunk) for chunk in chunks])
+            embeddings = self.embedder.embed_documents(
+                [self._convert_to_document(chunk) for chunk in chunks]
+            )
             for i, document in enumerate(chunks):
                 document.embedding = embeddings[i]
                 if self.omit_raw_text:
@@ -63,7 +70,9 @@ class Writer:
         self._init_batch()
-    def write(self, configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage]) -> Iterable[AirbyteMessage]:
+    def write(
+        self, configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage]
+    ) -> Iterable[AirbyteMessage]:
         self.processor = DocumentProcessor(self.processing_config, configured_catalog)
         self.indexer.pre_sync(configured_catalog)
         for message in input_messages:
@@ -76,7 +85,9 @@ class Writer:
                 record_chunks, record_id_to_delete = self.processor.process(message.record)
                 self.chunks[(message.record.namespace, message.record.stream)].extend(record_chunks)
                 if record_id_to_delete is not None:
-                    self.ids_to_delete[(message.record.namespace, message.record.stream)].append(record_id_to_delete)
+                    self.ids_to_delete[(message.record.namespace, message.record.stream)].append(
+                        record_id_to_delete
+                    )
                 self.number_of_chunks += len(record_chunks)
                 if self.number_of_chunks >= self.batch_size:
                     self._process_batch()

airbyte_cdk/entrypoint.py CHANGED Viewed

@@ -62,33 +62,54 @@ class AirbyteEntrypoint(object):
     def parse_args(args: List[str]) -> argparse.Namespace:
         # set up parent parsers
         parent_parser = argparse.ArgumentParser(add_help=False)
-        parent_parser.add_argument("--debug", action="store_true", help="enables detailed debug logs related to the sync")
+        parent_parser.add_argument(
+            "--debug", action="store_true", help="enables detailed debug logs related to the sync"
+        )
         main_parser = argparse.ArgumentParser()
         subparsers = main_parser.add_subparsers(title="commands", dest="command")
         # spec
-        subparsers.add_parser("spec", help="outputs the json configuration specification", parents=[parent_parser])
+        subparsers.add_parser(
+            "spec", help="outputs the json configuration specification", parents=[parent_parser]
+        )
         # check
-        check_parser = subparsers.add_parser("check", help="checks the config can be used to connect", parents=[parent_parser])
+        check_parser = subparsers.add_parser(
+            "check", help="checks the config can be used to connect", parents=[parent_parser]
+        )
         required_check_parser = check_parser.add_argument_group("required named arguments")
-        required_check_parser.add_argument("--config", type=str, required=True, help="path to the json configuration file")
+        required_check_parser.add_argument(
+            "--config", type=str, required=True, help="path to the json configuration file"
+        )
         # discover
         discover_parser = subparsers.add_parser(
-            "discover", help="outputs a catalog describing the source's schema", parents=[parent_parser]
+            "discover",
+            help="outputs a catalog describing the source's schema",
+            parents=[parent_parser],
         )
         required_discover_parser = discover_parser.add_argument_group("required named arguments")
-        required_discover_parser.add_argument("--config", type=str, required=True, help="path to the json configuration file")
+        required_discover_parser.add_argument(
+            "--config", type=str, required=True, help="path to the json configuration file"
+        )
         # read
-        read_parser = subparsers.add_parser("read", help="reads the source and outputs messages to STDOUT", parents=[parent_parser])
+        read_parser = subparsers.add_parser(
+            "read", help="reads the source and outputs messages to STDOUT", parents=[parent_parser]
+        )
-        read_parser.add_argument("--state", type=str, required=False, help="path to the json-encoded state file")
+        read_parser.add_argument(
+            "--state", type=str, required=False, help="path to the json-encoded state file"
+        )
         required_read_parser = read_parser.add_argument_group("required named arguments")
-        required_read_parser.add_argument("--config", type=str, required=True, help="path to the json configuration file")
         required_read_parser.add_argument(
-            "--catalog", type=str, required=True, help="path to the catalog used to determine which data to read"
+            "--config", type=str, required=True, help="path to the json configuration file"
+        )
+        required_read_parser.add_argument(
+            "--catalog",
+            type=str,
+            required=True,
+            help="path to the catalog used to determine which data to read",
         )
         return main_parser.parse_args(args)
@@ -108,11 +129,14 @@ class AirbyteEntrypoint(object):
         source_spec: ConnectorSpecification = self.source.spec(self.logger)
         try:
             with tempfile.TemporaryDirectory() as temp_dir:
-                os.environ[ENV_REQUEST_CACHE_PATH] = temp_dir  # set this as default directory for request_cache to store *.sqlite files
+                os.environ[ENV_REQUEST_CACHE_PATH] = (
+                    temp_dir  # set this as default directory for request_cache to store *.sqlite files
+                )
                 if cmd == "spec":
                     message = AirbyteMessage(type=Type.SPEC, spec=source_spec)
                     yield from [
-                        self.airbyte_message_to_string(queued_message) for queued_message in self._emit_queued_messages(self.source)
+                        self.airbyte_message_to_string(queued_message)
+                        for queued_message in self._emit_queued_messages(self.source)
                     ]
                     yield self.airbyte_message_to_string(message)
                 else:
@@ -120,23 +144,38 @@ class AirbyteEntrypoint(object):
                     config = self.source.configure(raw_config, temp_dir)
                     yield from [
-                        self.airbyte_message_to_string(queued_message) for queued_message in self._emit_queued_messages(self.source)
+                        self.airbyte_message_to_string(queued_message)
+                        for queued_message in self._emit_queued_messages(self.source)
                     ]
                     if cmd == "check":
-                        yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.check(source_spec, config))
+                        yield from map(
+                            AirbyteEntrypoint.airbyte_message_to_string,
+                            self.check(source_spec, config),
+                        )
                     elif cmd == "discover":
-                        yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.discover(source_spec, config))
+                        yield from map(
+                            AirbyteEntrypoint.airbyte_message_to_string,
+                            self.discover(source_spec, config),
+                        )
                     elif cmd == "read":
                         config_catalog = self.source.read_catalog(parsed_args.catalog)
                         state = self.source.read_state(parsed_args.state)
-                        yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.read(source_spec, config, config_catalog, state))
+                        yield from map(
+                            AirbyteEntrypoint.airbyte_message_to_string,
+                            self.read(source_spec, config, config_catalog, state),
+                        )
                     else:
                         raise Exception("Unexpected command " + cmd)
         finally:
-            yield from [self.airbyte_message_to_string(queued_message) for queued_message in self._emit_queued_messages(self.source)]
-    def check(self, source_spec: ConnectorSpecification, config: TConfig) -> Iterable[AirbyteMessage]:
+            yield from [
+                self.airbyte_message_to_string(queued_message)
+                for queued_message in self._emit_queued_messages(self.source)
+            ]
+    def check(
+        self, source_spec: ConnectorSpecification, config: TConfig
+    ) -> Iterable[AirbyteMessage]:
         self.set_up_secret_filter(config, source_spec.connectionSpecification)
         try:
             self.validate_connection(source_spec, config)
@@ -161,7 +200,10 @@ class AirbyteEntrypoint(object):
                 raise traced_exc
             else:
                 yield AirbyteMessage(
-                    type=Type.CONNECTION_STATUS, connectionStatus=AirbyteConnectionStatus(status=Status.FAILED, message=traced_exc.message)
+                    type=Type.CONNECTION_STATUS,
+                    connectionStatus=AirbyteConnectionStatus(
+                        status=Status.FAILED, message=traced_exc.message
+                    ),
                 )
                 return
         if check_result.status == Status.SUCCEEDED:
@@ -172,7 +214,9 @@ class AirbyteEntrypoint(object):
         yield from self._emit_queued_messages(self.source)
         yield AirbyteMessage(type=Type.CONNECTION_STATUS, connectionStatus=check_result)
-    def discover(self, source_spec: ConnectorSpecification, config: TConfig) -> Iterable[AirbyteMessage]:
+    def discover(
+        self, source_spec: ConnectorSpecification, config: TConfig
+    ) -> Iterable[AirbyteMessage]:
         self.set_up_secret_filter(config, source_spec.connectionSpecification)
         if self.source.check_config_against_spec:
             self.validate_connection(source_spec, config)
@@ -181,7 +225,9 @@ class AirbyteEntrypoint(object):
         yield from self._emit_queued_messages(self.source)
         yield AirbyteMessage(type=Type.CATALOG, catalog=catalog)
-    def read(self, source_spec: ConnectorSpecification, config: TConfig, catalog: Any, state: list[Any]) -> Iterable[AirbyteMessage]:
+    def read(
+        self, source_spec: ConnectorSpecification, config: TConfig, catalog: Any, state: list[Any]
+    ) -> Iterable[AirbyteMessage]:
         self.set_up_secret_filter(config, source_spec.connectionSpecification)
         if self.source.check_config_against_spec:
             self.validate_connection(source_spec, config)
@@ -194,16 +240,24 @@ class AirbyteEntrypoint(object):
             yield self.handle_record_counts(message, stream_message_counter)
     @staticmethod
-    def handle_record_counts(message: AirbyteMessage, stream_message_count: DefaultDict[HashableStreamDescriptor, float]) -> AirbyteMessage:
+    def handle_record_counts(
+        message: AirbyteMessage, stream_message_count: DefaultDict[HashableStreamDescriptor, float]
+    ) -> AirbyteMessage:
         match message.type:
             case Type.RECORD:
-                stream_message_count[HashableStreamDescriptor(name=message.record.stream, namespace=message.record.namespace)] += 1.0  # type: ignore[union-attr] # record has `stream` and `namespace`
+                stream_message_count[
+                    HashableStreamDescriptor(
+                        name=message.record.stream, namespace=message.record.namespace
+                    )
+                ] += 1.0  # type: ignore[union-attr] # record has `stream` and `namespace`
             case Type.STATE:
                 stream_descriptor = message_utils.get_stream_descriptor(message)
                 # Set record count from the counter onto the state message
                 message.state.sourceStats = message.state.sourceStats or AirbyteStateStats()  # type: ignore[union-attr] # state has `sourceStats`
-                message.state.sourceStats.recordCount = stream_message_count.get(stream_descriptor, 0.0)  # type: ignore[union-attr] # state has `sourceStats`
+                message.state.sourceStats.recordCount = stream_message_count.get(
+                    stream_descriptor, 0.0
+                )  # type: ignore[union-attr] # state has `sourceStats`
                 # Reset the counter
                 stream_message_count[stream_descriptor] = 0.0
@@ -283,7 +337,9 @@ def _init_internal_request_filter() -> None:
             )
         if not parsed_url.hostname:
-            raise requests.exceptions.InvalidURL("Invalid URL specified: The endpoint that data is being requested from is not a valid URL")
+            raise requests.exceptions.InvalidURL(
+                "Invalid URL specified: The endpoint that data is being requested from is not a valid URL"
+            )
         try:
             is_private = _is_private_url(parsed_url.hostname, parsed_url.port)  # type: ignore [arg-type]

airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl

airbyte-cdk 6.5.3rc2py3-none-any.whl → 6.6.0py3-none-any.whl