PyPI - airbyte-cdk - Versions diffs - 6.54.11__py3-none-any.whl → 6.55.1__py3-none-any.whl - Mend

airbyte-cdk 6.54.11py3-none-any.whl → 6.55.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

airbyte_cdk/test/entrypoint_wrapper.py CHANGED Viewed

@@ -19,9 +19,11 @@ import logging
 import re
 import tempfile
 import traceback
+from collections import deque
+from collections.abc import Generator, Mapping
 from io import StringIO
 from pathlib import Path
-from typing import Any, List, Mapping, Optional, Union
+from typing import Any, List, Literal, Optional, Union, final, overload
 import orjson
 from pydantic import ValidationError as V2ValidationError
@@ -36,6 +38,7 @@ from airbyte_cdk.models import (
     AirbyteMessageSerializer,
     AirbyteStateMessage,
     AirbyteStateMessageSerializer,
+    AirbyteStreamState,
     AirbyteStreamStatus,
     ConfiguredAirbyteCatalog,
     ConfiguredAirbyteCatalogSerializer,
@@ -48,13 +51,41 @@ from airbyte_cdk.test.models.scenario import ExpectedOutcome
 class EntrypointOutput:
-    def __init__(self, messages: List[str], uncaught_exception: Optional[BaseException] = None):
-        try:
-            self._messages = [self._parse_message(message) for message in messages]
-        except V2ValidationError as exception:
-            raise ValueError("All messages are expected to be AirbyteMessage") from exception
+    """A class to encapsulate the output of an Airbyte connector's execution.
+    This class can be initialized with a list of messages or a file containing messages.
+    It provides methods to access different types of messages produced during the execution
+    of an Airbyte connector, including both successful messages and error messages.
+    When working with records and state messages, it provides both a list and an iterator
+    implementation. Lists are easier to work with, but generators are better suited to handle
+    large volumes of messages without overflowing the available memory.
+    """
+    def __init__(
+        self,
+        messages: list[str] | None = None,
+        uncaught_exception: Optional[BaseException] = None,
+        *,
+        message_file: Path | None = None,
+    ) -> None:
+        if messages is None and message_file is None:
+            raise ValueError("Either messages or message_file must be provided")
+        if messages is not None and message_file is not None:
+            raise ValueError("Only one of messages or message_file can be provided")
+        self._messages: list[AirbyteMessage] | None = None
+        self._message_file: Path | None = message_file
+        if messages:
+            try:
+                self._messages = [self._parse_message(message) for message in messages]
+            except V2ValidationError as exception:
+                raise ValueError("All messages are expected to be AirbyteMessage") from exception
         if uncaught_exception:
+            if self._messages is None:
+                self._messages = []
             self._messages.append(
                 assemble_uncaught_exception(
                     type(uncaught_exception), uncaught_exception
@@ -72,39 +103,76 @@ class EntrypointOutput:
             )
     @property
-    def records_and_state_messages(self) -> List[AirbyteMessage]:
-        return self._get_message_by_types([Type.RECORD, Type.STATE])
+    def records_and_state_messages(
+        self,
+    ) -> list[AirbyteMessage]:
+        return self.get_message_by_types(
+            message_types=[Type.RECORD, Type.STATE],
+            safe_iterator=False,
+        )
+    def records_and_state_messages_iterator(
+        self,
+    ) -> Generator[AirbyteMessage, None, None]:
+        """Returns a generator that yields record and state messages one by one.
+        Use this instead of `records_and_state_messages` when the volume of messages could be large
+        enough to overload available memory.
+        """
+        return self.get_message_by_types(
+            message_types=[Type.RECORD, Type.STATE],
+            safe_iterator=True,
+        )
     @property
     def records(self) -> List[AirbyteMessage]:
-        return self._get_message_by_types([Type.RECORD])
+        return self.get_message_by_types([Type.RECORD])
+    @property
+    def records_iterator(self) -> Generator[AirbyteMessage, None, None]:
+        """Returns a generator that yields record messages one by one.
+        Use this instead of `records` when the volume of records could be large
+        enough to overload available memory.
+        """
+        return self.get_message_by_types([Type.RECORD], safe_iterator=True)
     @property
     def state_messages(self) -> List[AirbyteMessage]:
-        return self._get_message_by_types([Type.STATE])
+        return self.get_message_by_types([Type.STATE])
     @property
     def spec_messages(self) -> List[AirbyteMessage]:
-        return self._get_message_by_types([Type.SPEC])
+        return self.get_message_by_types([Type.SPEC])
     @property
     def connection_status_messages(self) -> List[AirbyteMessage]:
-        return self._get_message_by_types([Type.CONNECTION_STATUS])
+        return self.get_message_by_types([Type.CONNECTION_STATUS])
     @property
-    def most_recent_state(self) -> Any:
-        state_messages = self._get_message_by_types([Type.STATE])
-        if not state_messages:
-            raise ValueError("Can't provide most recent state as there are no state messages")
-        return state_messages[-1].state.stream  # type: ignore[union-attr] # state has `stream`
+    def most_recent_state(self) -> AirbyteStreamState | None:
+        state_message_iterator = self.get_message_by_types(
+            [Type.STATE],
+            safe_iterator=True,
+        )
+        # Use a deque with maxlen=1 to efficiently get the last state message
+        double_ended_queue = deque(state_message_iterator, maxlen=1)
+        try:
+            final_state_message: AirbyteMessage = double_ended_queue.pop()
+        except IndexError:
+            raise ValueError(
+                "Can't provide most recent state as there are no state messages."
+            ) from None
+        return final_state_message.state.stream  # type: ignore[union-attr] # state has `stream`
     @property
     def logs(self) -> List[AirbyteMessage]:
-        return self._get_message_by_types([Type.LOG])
+        return self.get_message_by_types([Type.LOG])
     @property
     def trace_messages(self) -> List[AirbyteMessage]:
-        return self._get_message_by_types([Type.TRACE])
+        return self.get_message_by_types([Type.TRACE])
     @property
     def analytics_messages(self) -> List[AirbyteMessage]:
@@ -116,7 +184,7 @@ class EntrypointOutput:
     @property
     def catalog(self) -> AirbyteMessage:
-        catalog = self._get_message_by_types([Type.CATALOG])
+        catalog = self.get_message_by_types([Type.CATALOG])
         if len(catalog) != 1:
             raise ValueError(f"Expected exactly one catalog but got {len(catalog)}")
         return catalog[0]
@@ -131,13 +199,80 @@ class EntrypointOutput:
         )
         return list(status_messages)
-    def _get_message_by_types(self, message_types: List[Type]) -> List[AirbyteMessage]:
-        return [message for message in self._messages if message.type in message_types]
+    def get_message_iterator(self) -> Generator[AirbyteMessage, None, None]:
+        """Creates a generator which yields messages one by one.
+        This will iterate over all messages in the output file (if provided) or the messages
+        provided during initialization. File results are provided first, followed by any
+        messages that were passed in directly.
+        """
+        if self._message_file:
+            try:
+                with open(self._message_file, "r", encoding="utf-8") as file:
+                    for line in file:
+                        if not line.strip():
+                            # Skip empty lines
+                            continue
+                        yield self._parse_message(line.strip())
+            except FileNotFoundError:
+                raise ValueError(f"Message file {self._message_file} not found")
+        if self._messages is not None:
+            yield from self._messages
+    # Overloads to provide proper type hints for different usages of `get_message_by_types`.
+    @overload
+    def get_message_by_types(
+        self,
+        message_types: list[Type],
+    ) -> list[AirbyteMessage]: ...
+    @overload
+    def get_message_by_types(
+        self,
+        message_types: list[Type],
+        *,
+        safe_iterator: Literal[False],
+    ) -> list[AirbyteMessage]: ...
+    @overload
+    def get_message_by_types(
+        self,
+        message_types: list[Type],
+        *,
+        safe_iterator: Literal[True],
+    ) -> Generator[AirbyteMessage, None, None]: ...
+    def get_message_by_types(
+        self,
+        message_types: list[Type],
+        *,
+        safe_iterator: bool = False,
+    ) -> list[AirbyteMessage] | Generator[AirbyteMessage, None, None]:
+        """Get messages of specific types.
+        If `safe_iterator` is True, returns a generator that yields messages one by one.
+        If `safe_iterator` is False, returns a list of messages.
+        Use `safe_iterator=True` when the volume of messages could overload the available
+        memory.
+        """
+        message_generator = self.get_message_iterator()
+        if safe_iterator:
+            return (message for message in message_generator if message.type in message_types)
+        return [message for message in message_generator if message.type in message_types]
     def _get_trace_message_by_trace_type(self, trace_type: TraceType) -> List[AirbyteMessage]:
         return [
             message
-            for message in self._get_message_by_types([Type.TRACE])
+            for message in self.get_message_by_types(
+                [Type.TRACE],
+                safe_iterator=True,
+            )
             if message.trace.type == trace_type  # type: ignore[union-attr] # trace has `type`
         ]
@@ -184,7 +319,7 @@ def _run_command(
     parsed_args = AirbyteEntrypoint.parse_args(args)
     source_entrypoint = AirbyteEntrypoint(source)
-    messages = []
+    messages: list[str] = []
     uncaught_exception = None
     try:
         for message in source_entrypoint.run(parsed_args):
@@ -199,8 +334,10 @@ def _run_command(
     captured_logs = log_capture_buffer.getvalue().split("\n")[:-1]
     parent_logger.removeHandler(stream_handler)
-    return EntrypointOutput(messages + captured_logs, uncaught_exception=uncaught_exception)
+    return EntrypointOutput(
+        messages=messages + captured_logs,
+        uncaught_exception=uncaught_exception,
+    )
 def discover(

airbyte_cdk/test/models/scenario.py CHANGED Viewed

@@ -9,14 +9,20 @@ up iteration cycles.
 from __future__ import annotations
+import json
+import tempfile
+from contextlib import contextmanager, suppress
 from pathlib import Path  # noqa: TC003  # Pydantic needs this (don't move to 'if typing' block)
-from typing import Any, Literal, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
 import yaml
 from pydantic import BaseModel, ConfigDict
 from airbyte_cdk.test.models.outcome import ExpectedOutcome
+if TYPE_CHECKING:
+    from collections.abc import Generator
 class ConnectorTestScenario(BaseModel):
     """Acceptance test scenario, as a Pydantic model.
@@ -41,13 +47,13 @@ class ConnectorTestScenario(BaseModel):
     config_path: Path | None = None
     config_dict: dict[str, Any] | None = None
-    id: str | None = None
+    _id: str | None = None  # Used to override the default ID generation
     configured_catalog_path: Path | None = None
     timeout_seconds: int | None = None
     expect_records: AcceptanceTestExpectRecords | None = None
     file_types: AcceptanceTestFileTypes | None = None
-    status: Literal["succeed", "failed"] | None = None
+    status: Literal["succeed", "failed", "exception"] | None = None
     def get_config_dict(
         self,
@@ -93,16 +99,49 @@ class ConnectorTestScenario(BaseModel):
         return ExpectedOutcome.from_status_str(self.status)
     @property
-    def instance_name(self) -> str:
-        return self.config_path.stem if self.config_path else "Unnamed Scenario"
+    def id(self) -> str:
+        """Return a unique identifier for the test scenario.
+        This is used by PyTest to identify the test scenario.
+        """
+        if self._id:
+            return self._id
-    def __str__(self) -> str:
-        if self.id:
-            return f"'{self.id}' Test Scenario"
         if self.config_path:
-            return f"'{self.config_path.name}' Test Scenario"
+            return self.config_path.stem
+        return str(hash(self))
-        return f"'{hash(self)}' Test Scenario"
+    def __str__(self) -> str:
+        return f"'{self.id}' Test Scenario"
+    @contextmanager
+    def with_temp_config_file(
+        self,
+        connector_root: Path,
+    ) -> Generator[Path, None, None]:
+        """Yield a temporary JSON file path containing the config dict and delete it on exit."""
+        config = self.get_config_dict(
+            empty_if_missing=True,
+            connector_root=connector_root,
+        )
+        with tempfile.NamedTemporaryFile(
+            prefix="config-",
+            suffix=".json",
+            mode="w",
+            delete=False,  # Don't fail if cannot delete the file on exit
+            encoding="utf-8",
+        ) as temp_file:
+            temp_file.write(json.dumps(config))
+            temp_file.flush()
+            # Allow the file to be read by other processes
+            temp_path = Path(temp_file.name)
+            temp_path.chmod(temp_path.stat().st_mode | 0o444)
+            yield temp_path
+        # attempt cleanup, ignore errors
+        with suppress(OSError):
+            temp_path.unlink()
     def without_expected_outcome(self) -> ConnectorTestScenario:
         """Return a copy of the scenario that does not expect failure or success.

airbyte_cdk/test/standard_tests/__init__.py CHANGED Viewed

@@ -27,10 +27,8 @@ Available test suites base classes:
 '''
-from airbyte_cdk.test.standard_tests.connector_base import (
-    ConnectorTestScenario,
-    ConnectorTestSuiteBase,
-)
+from airbyte_cdk.test.models.scenario import ConnectorTestScenario
+from airbyte_cdk.test.standard_tests.connector_base import ConnectorTestSuiteBase
 from airbyte_cdk.test.standard_tests.declarative_sources import (
     DeclarativeSourceTestSuite,
 )

airbyte_cdk/test/standard_tests/connector_base.py CHANGED Viewed

@@ -3,16 +3,11 @@
 from __future__ import annotations
-import abc
 import importlib
-import inspect
 import os
-import sys
-from collections.abc import Callable
 from pathlib import Path
-from typing import cast
+from typing import TYPE_CHECKING, cast
-import yaml
 from boltons.typeutils import classproperty
 from airbyte_cdk.models import (
@@ -24,14 +19,20 @@ from airbyte_cdk.test.models import (
     ConnectorTestScenario,
 )
 from airbyte_cdk.test.standard_tests._job_runner import IConnector, run_test_job
+from airbyte_cdk.test.standard_tests.docker_base import DockerConnectorTestSuite
 from airbyte_cdk.utils.connector_paths import (
     ACCEPTANCE_TEST_CONFIG,
     find_connector_root,
 )
+if TYPE_CHECKING:
+    from collections.abc import Callable
-class ConnectorTestSuiteBase(abc.ABC):
-    """Base class for connector test suites."""
+    from airbyte_cdk.test import entrypoint_wrapper
+class ConnectorTestSuiteBase(DockerConnectorTestSuite):
+    """Base class for Python connector test suites."""
     connector: type[IConnector] | Callable[[], IConnector] | None  # type: ignore [reportRedeclaration]
     """The connector class or a factory function that returns an scenario of IConnector."""
@@ -79,13 +80,6 @@ class ConnectorTestSuiteBase(abc.ABC):
                 ) from e
             return cast(type[IConnector], getattr(module, matching_class_name))
-    @classmethod
-    def get_test_class_dir(cls) -> Path:
-        """Get the file path that contains the class."""
-        module = sys.modules[cls.__module__]
-        # Get the directory containing the test file
-        return Path(inspect.getfile(module)).parent
     @classmethod
     def create_connector(
         cls,
@@ -118,69 +112,7 @@ class ConnectorTestSuiteBase(abc.ABC):
             test_scenario=scenario,
             connector_root=self.get_connector_root_dir(),
         )
-        conn_status_messages: list[AirbyteMessage] = [
-            msg for msg in result._messages if msg.type == Type.CONNECTION_STATUS
-        ]  # noqa: SLF001  # Non-public API
-        assert len(conn_status_messages) == 1, (
-            f"Expected exactly one CONNECTION_STATUS message. Got: {result._messages}"
+        assert len(result.connection_status_messages) == 1, (
+            f"Expected exactly one CONNECTION_STATUS message. "
+            "Got: {result.connection_status_messages!s}"
         )
-    @classmethod
-    def get_connector_root_dir(cls) -> Path:
-        """Get the root directory of the connector."""
-        return find_connector_root([cls.get_test_class_dir(), Path.cwd()])
-    @classproperty
-    def acceptance_test_config_path(cls) -> Path:
-        """Get the path to the acceptance test config file."""
-        result = cls.get_connector_root_dir() / ACCEPTANCE_TEST_CONFIG
-        if result.exists():
-            return result
-        raise FileNotFoundError(f"Acceptance test config file not found at: {str(result)}")
-    @classmethod
-    def get_scenarios(
-        cls,
-    ) -> list[ConnectorTestScenario]:
-        """Get acceptance tests for a given category.
-        This has to be a separate function because pytest does not allow
-        parametrization of fixtures with arguments from the test class itself.
-        """
-        categories = ["connection", "spec"]
-        all_tests_config = yaml.safe_load(cls.acceptance_test_config_path.read_text())
-        if "acceptance_tests" not in all_tests_config:
-            raise ValueError(
-                f"Acceptance tests config not found in {cls.acceptance_test_config_path}."
-                f" Found only: {str(all_tests_config)}."
-            )
-        test_scenarios: list[ConnectorTestScenario] = []
-        for category in categories:
-            if (
-                category not in all_tests_config["acceptance_tests"]
-                or "tests" not in all_tests_config["acceptance_tests"][category]
-            ):
-                continue
-            for test in all_tests_config["acceptance_tests"][category]["tests"]:
-                if "config_path" not in test:
-                    # Skip tests without a config_path
-                    continue
-                if "iam_role" in test["config_path"]:
-                    # We skip iam_role tests for now, as they are not supported in the test suite.
-                    continue
-                scenario = ConnectorTestScenario.model_validate(test)
-                if scenario.config_path and scenario.config_path in [
-                    s.config_path for s in test_scenarios
-                ]:
-                    # Skip duplicate scenarios based on config_path
-                    continue
-                test_scenarios.append(scenario)
-        return test_scenarios

airbyte-cdk 6.54.11__py3-none-any.whl → 6.55.1__py3-none-any.whl

airbyte-cdk 6.54.11py3-none-any.whl → 6.55.1py3-none-any.whl