PyPI - airbyte-internal-ops - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

airbyte-internal-ops 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

airbyte_ops_mcp/live_tests/_connection_retriever/retrieval.py ADDED Viewed

@@ -0,0 +1,391 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+"""Core retrieval logic for vendored connection-retriever.
+Vendored from: airbyte-platform-internal/tools/connection-retriever/src/connection_retriever/retrieval.py
+This is a minimal subset focused on retrieving unmasked source config.
+For testing candidate discovery, see issue #91.
+"""
+from __future__ import annotations
+import logging
+import uuid
+from dataclasses import dataclass
+from typing import Any, Mapping
+import requests
+import sqlalchemy
+from google.cloud import secretmanager
+from airbyte_ops_mcp.live_tests._connection_retriever.audit_logging import (
+    audit,
+)
+from airbyte_ops_mcp.live_tests._connection_retriever.consts import (
+    CLOUD_REGISTRY_URL,
+    ConnectionObject,
+)
+from airbyte_ops_mcp.live_tests._connection_retriever.db_access import (
+    get_pool,
+)
+from airbyte_ops_mcp.live_tests._connection_retriever.secrets_resolution import (
+    get_resolved_config,
+)
+LOGGER = logging.getLogger(__name__)
+# SQL Queries
+SELECT_ON_CONNECTION_NOT_EU = sqlalchemy.text(
+    """
+    SELECT
+        source_id,
+        destination_id,
+        source_catalog_id,
+        catalog
+    FROM
+        connection
+    JOIN
+        actor ON connection.source_id = actor.id
+    JOIN
+        workspace ON actor.workspace_id = workspace.id
+    JOIN
+        dataplane_group ON workspace.dataplane_group_id = dataplane_group.id
+    WHERE
+        connection.id = :connection_id
+        AND dataplane_group.name != 'EU'
+    """
+)
+SELECT_ON_CONNECTION_DATAPLANE_GROUP_IS_EU = sqlalchemy.text(
+    """
+    SELECT
+        CASE WHEN dataplane_group.name = 'EU' THEN TRUE ELSE FALSE END as is_eu
+    FROM
+        connection
+    JOIN
+        actor ON connection.source_id = actor.id
+    JOIN
+        workspace ON actor.workspace_id = workspace.id
+    JOIN
+        dataplane_group ON workspace.dataplane_group_id = dataplane_group.id
+    WHERE
+        connection.id = :connection_id
+    """
+)
+SELECT_ON_ACTOR_WITH_ORGANIZATION = sqlalchemy.text(
+    """
+    SELECT
+        organization_id,
+        workspace_id,
+        actor_definition_id,
+        configuration
+    FROM
+        actor
+    JOIN
+        workspace ON workspace.id = actor.workspace_id
+    WHERE
+        actor.id = :actor_id
+    """
+)
+SELECT_ON_OAUTH_PARAMETER = sqlalchemy.text(
+    """
+    SELECT
+        organization_id,
+        workspace_id,
+        configuration
+    FROM
+        actor_oauth_parameter
+    WHERE
+        actor_definition_id = :actor_definition_id
+    ORDER BY created_at ASC;
+    """
+)
+@dataclass
+class RetrievalMetadata:
+    """Metadata about a retrieval operation for audit logging."""
+    connection_id: str
+    connection_object: ConnectionObject
+    retrieval_reason: str
+@dataclass
+class TestingCandidate:
+    """A connection candidate for testing."""
+    connection_id: str
+    connection_url: str | None = None
+    stream_count: int | None = None
+    last_attempt_duration_in_microseconds: int | None = None
+    is_internal: bool | None = None
+    streams_with_data: list[str] | None = None
+    # ConnectionObject fields
+    connection: str | None = None
+    source_id: str | None = None
+    destination_id: str | None = None
+    destination_config: Mapping | None = None
+    source_config: Mapping | None = None
+    catalog: Mapping | None = None
+    configured_catalog: Mapping | None = None
+    state: list[Mapping] | None = None
+    workspace_id: str | None = None
+    destination_docker_image: str | None = None
+    source_docker_image: str | None = None
+    def update(self, **kwargs: Any) -> None:
+        """Update fields from keyword arguments."""
+        for key, value in kwargs.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+            else:
+                raise AttributeError(
+                    f"{key} is not a valid field of {self.__class__.__name__}"
+                )
+class ConnectionNotFoundError(Exception):
+    """Raised when a connection cannot be found."""
+    pass
+@audit
+def get_connection(
+    connection_id: str,
+    db_conn: sqlalchemy.Connection,
+) -> Mapping | None:
+    """Get connection details from the database."""
+    connection_result = db_conn.execute(
+        SELECT_ON_CONNECTION_NOT_EU, parameters={"connection_id": connection_id}
+    ).first()
+    if connection_result is None:
+        raise ValueError(f"Could not find connection {connection_id}.")
+    return {
+        "source_id": connection_result[0],
+        "destination_id": connection_result[1],
+        "source_catalog_id": connection_result[2],
+        "catalog": connection_result[3],
+    }
+def get_actor_config(
+    actor_id: str,
+    db_conn: sqlalchemy.Connection,
+    secret_manager_client: secretmanager.SecretManagerServiceClient,
+) -> Mapping | None:
+    """Get resolved actor configuration with secrets."""
+    actor_result = db_conn.execute(
+        SELECT_ON_ACTOR_WITH_ORGANIZATION, parameters={"actor_id": actor_id}
+    ).first()
+    if actor_result is None:
+        raise ValueError(f"Could not find actor configuration for actor {actor_id}.")
+    organization_id, workspace_id, actor_definition_id, actor_configuration = (
+        actor_result
+    )
+    spec = get_spec(actor_definition_id)
+    oauth_parameter_configuration = _get_oauth_parameters_overrides(
+        db_conn, actor_definition_id, organization_id, workspace_id
+    )
+    return get_resolved_config(
+        secret_manager_client, actor_configuration, oauth_parameter_configuration, spec
+    )
+def _get_oauth_parameters_overrides(
+    db_conn: sqlalchemy.Connection,
+    actor_definition_id: str,
+    actor_organization_id: str,
+    actor_workspace_id: str,
+) -> dict:
+    """Get OAuth parameter overrides for an actor.
+    Priority:
+    1. Same workspace and organization id
+    2. Same workspace
+    3. Same organization
+    4. Default parameters
+    """
+    oauth_actor_parameters = db_conn.execute(
+        SELECT_ON_OAUTH_PARAMETER,
+        parameters={"actor_definition_id": actor_definition_id},
+    ).fetchall()
+    if not oauth_actor_parameters:
+        return {}
+    organization_override = None
+    workspace_override = None
+    default = None
+    for (
+        oauth_organization_id,
+        oauth_workspace_id,
+        oauth_parameter_configuration,
+    ) in oauth_actor_parameters:
+        if (
+            oauth_organization_id == actor_organization_id
+            and oauth_workspace_id == actor_workspace_id
+        ):
+            # Most precise case - return early
+            return oauth_parameter_configuration
+        if (
+            oauth_organization_id == actor_organization_id
+            and oauth_workspace_id is None
+        ):
+            if organization_override is not None:
+                raise ValueError(
+                    "Multiple oauth parameters overrides for this actor_definition_id "
+                    "for this organization"
+                )
+            organization_override = oauth_parameter_configuration
+        elif oauth_workspace_id == actor_workspace_id:
+            if workspace_override is not None:
+                raise ValueError(
+                    "Multiple oauth parameters overrides for this actor_definition_id "
+                    "for this workspace"
+                )
+            workspace_override = oauth_parameter_configuration
+        elif oauth_organization_id is None and oauth_workspace_id is None:
+            default = oauth_parameter_configuration
+    if workspace_override is not None:
+        return workspace_override
+    elif organization_override is not None:
+        return organization_override
+    elif default is not None:
+        return default
+    return {}
+@audit
+def get_source_config(
+    source_id: str,
+    db_conn: sqlalchemy.Connection,
+    secret_manager_client: secretmanager.SecretManagerServiceClient,
+) -> Mapping | None:
+    """Get resolved source configuration with secrets."""
+    return get_actor_config(source_id, db_conn, secret_manager_client)
+def get_registry_entries() -> list[dict]:
+    """Fetch connector entries from the cloud registry."""
+    registry_response = requests.get(CLOUD_REGISTRY_URL)
+    registry_response.raise_for_status()
+    registry = registry_response.json()
+    return registry["sources"] + registry["destinations"]
+def get_spec(actor_definition_id: uuid.UUID) -> dict:
+    """Get connector spec from the cloud registry for a given actor definition id."""
+    entries = get_registry_entries()
+    try:
+        return next(
+            entry["spec"]
+            for entry in entries
+            if (
+                entry.get("sourceDefinitionId") == str(actor_definition_id)
+                or entry.get("destinationDefinitionId") == str(actor_definition_id)
+            )
+        )
+    except StopIteration as err:
+        raise ValueError(
+            f"Could not find spec for actor definition {actor_definition_id}."
+        ) from err
+def retrieve_objects(
+    connection_objects: list[ConnectionObject],
+    retrieval_reason: str,
+    connection_id: str,
+) -> list[TestingCandidate]:
+    """Retrieve connection objects for a given connection ID.
+    This is a simplified version that only supports retrieval by connection_id.
+    For testing candidate discovery by docker image, see issue #91.
+    Args:
+        connection_objects: List of ConnectionObject types to retrieve
+        retrieval_reason: Reason for retrieval (for audit logging)
+        connection_id: The connection ID to retrieve objects for
+    Returns:
+        List containing a single TestingCandidate with the requested objects
+    """
+    connection_candidates = [TestingCandidate(connection_id=connection_id)]
+    secret_manager_client = secretmanager.SecretManagerServiceClient()
+    connection_pool = get_pool(secret_manager_client)
+    with connection_pool.connect() as db_conn:
+        for candidate in connection_candidates.copy():
+            is_eu_result = db_conn.execute(
+                SELECT_ON_CONNECTION_DATAPLANE_GROUP_IS_EU,
+                parameters={"connection_id": candidate.connection_id},
+            ).first()
+            if is_eu_result is None:
+                raise ConnectionNotFoundError(
+                    f"Credentials were not found for connection ID {candidate.connection_id}."
+                )
+            elif is_eu_result[0] is True:
+                connection_candidates.remove(candidate)
+                LOGGER.warning(
+                    f"Credential retrieval not permitted; the data residency for "
+                    f"connection ID {candidate.connection_id} is within the EU. "
+                    f"Candidate will be removed from the list"
+                )
+                continue
+            candidate.update(
+                **{
+                    connection_object.value.replace("-", "_"): retrieve_object(
+                        candidate.connection_id,
+                        connection_object,
+                        retrieval_reason,
+                        db_conn,
+                        secret_manager_client,
+                    )
+                    for connection_object in connection_objects
+                }
+            )
+    return connection_candidates
+def retrieve_object(
+    connection_id: str,
+    connection_object: ConnectionObject,
+    retrieval_reason: str,
+    db_conn: sqlalchemy.Connection,
+    secret_manager_client: secretmanager.SecretManagerServiceClient,
+) -> Mapping | list[Mapping] | str | None:
+    """Retrieve a single connection object."""
+    retrieval_metadata = RetrievalMetadata(
+        connection_id, connection_object, retrieval_reason
+    )
+    connection = get_connection(retrieval_metadata, connection_id, db_conn)
+    if connection_object == ConnectionObject.SOURCE_ID:
+        return connection["source_id"]
+    elif connection_object == ConnectionObject.DESTINATION_ID:
+        return connection["destination_id"]
+    elif connection_object == ConnectionObject.SOURCE_CONFIG:
+        return get_source_config(
+            retrieval_metadata,
+            connection["source_id"],
+            db_conn,
+            secret_manager_client,
+        )
+    elif connection_object == ConnectionObject.CONFIGURED_CATALOG:
+        return connection["catalog"]
+    else:
+        raise NotImplementedError(
+            f"Connection object {connection_object} not implemented in vendored version. "
+            f"Only SOURCE_CONFIG, SOURCE_ID, DESTINATION_ID, and CONFIGURED_CATALOG are supported."
+        )

airbyte_ops_mcp/live_tests/_connection_retriever/secrets_resolution.py ADDED Viewed

@@ -0,0 +1,130 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+"""Secret resolution for vendored connection-retriever.
+Vendored from: airbyte-platform-internal/tools/connection-retriever/src/connection_retriever/secrets_resolution.py
+"""
+from __future__ import annotations
+from typing import Any
+import dpath
+from google.cloud import secretmanager
+from airbyte_ops_mcp.live_tests._connection_retriever.consts import (
+    GCP_PROJECT_NAME,
+)
+def get_secret_value(
+    secret_manager_client: secretmanager.SecretManagerServiceClient, secret_id: str
+) -> str:
+    """Get the value of the enabled version of a secret.
+    Args:
+        secret_manager_client: The secret manager client
+        secret_id: The id of the secret
+    Returns:
+        The value of the enabled version of the secret
+    """
+    response = secret_manager_client.list_secret_versions(
+        request={"parent": secret_id, "filter": "state:ENABLED"}
+    )
+    if len(response.versions) == 0:
+        raise ValueError(f"No enabled version of secret {secret_id} found")
+    enabled_version = response.versions[0]
+    response = secret_manager_client.access_secret_version(name=enabled_version.name)
+    return response.payload.data.decode("UTF-8")
+def is_secret(value: Any) -> bool:
+    """Determine if a value is a secret.
+    Args:
+        value: The value to check
+    Returns:
+        True if the value is a secret, False otherwise
+    """
+    return isinstance(value, dict) and value.get("_secret") is not None
+def resolve_secrets_in_config(
+    secret_manager_client: secretmanager.SecretManagerServiceClient,
+    connector_config: dict,
+) -> dict:
+    """Recursively resolve secrets in the connector_config.
+    Args:
+        secret_manager_client: The secret manager client
+        connector_config: The connector_config to resolve secrets in
+    Returns:
+        The connector_config with secrets resolved
+    """
+    for key in connector_config:
+        if is_secret(connector_config[key]):
+            secret_id = f"projects/{GCP_PROJECT_NAME}/secrets/{connector_config[key]['_secret']}"
+            connector_config[key] = get_secret_value(secret_manager_client, secret_id)
+        elif isinstance(connector_config[key], dict):
+            connector_config[key] = resolve_secrets_in_config(
+                secret_manager_client, connector_config[key]
+            )
+    return connector_config
+def merge_dicts_non_destructive(a: dict, b: dict) -> dict:
+    """Merge two dicts, with b taking precedence for conflicts."""
+    merged = a.copy()
+    for key, value in b.items():
+        if key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
+            merged[key] = merge_dicts_non_destructive(merged[key], value)
+        else:
+            merged[key] = value
+    return merged
+def get_resolved_config(
+    secret_manager_client: secretmanager.SecretManagerServiceClient,
+    actor_configuration: dict,
+    actor_oauth_parameter: dict,
+    spec: dict,
+) -> dict:
+    """Get the resolved configuration, resolving secrets and merging OAuth params.
+    Args:
+        secret_manager_client: The secret manager client
+        actor_configuration: The actor configuration
+        actor_oauth_parameter: The actor oauth parameter
+        spec: The connector spec
+    Returns:
+        The resolved configuration
+    """
+    resolved_configuration = resolve_secrets_in_config(
+        secret_manager_client, actor_configuration
+    )
+    # Merge the resolved oauth parameter if the actor definition has OAuth
+    if "advanced_auth" in spec:
+        try:
+            is_using_oauth = (
+                dpath.get(
+                    actor_configuration,
+                    "/".join(spec["advanced_auth"]["predicate_key"]),
+                )
+                == spec["advanced_auth"]["predicate_value"]
+            )
+        except KeyError:
+            # When no predicate_key is defined but we have advanced_auth in spec
+            # we can assume that the connector is only using OAuth.
+            is_using_oauth = True
+        if is_using_oauth:
+            resolved_oauth_parameter = resolve_secrets_in_config(
+                secret_manager_client, actor_oauth_parameter
+            )
+            resolved_configuration = merge_dicts_non_destructive(
+                resolved_configuration, resolved_oauth_parameter
+            )
+    return resolved_configuration

airbyte_ops_mcp/live_tests/config.py ADDED Viewed

@@ -0,0 +1,190 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+"""Configuration options for live tests.
+This module provides configuration classes and enums for controlling
+live test behavior, including connection filtering, stream selection,
+and test modes.
+Based on airbyte-ci implementation:
+https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/models.py
+https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/connection_objects_retrieval.py
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from enum import Enum
+from pathlib import Path
+class ConnectionSubset(Enum):
+    """Signals which connection pool to consider for live tests.
+    SANDBOXES: Only use Airbyte sandbox connections (safer, limited data)
+    ALL: Use all available connections on Cloud (more coverage, real data)
+    """
+    SANDBOXES = "sandboxes"
+    ALL = "all"
+    @classmethod
+    def from_string(
+        cls,
+        value: str,
+    ) -> ConnectionSubset:
+        """Parse connection subset from string."""
+        value_lower = value.lower()
+        if value_lower == "sandboxes":
+            return cls.SANDBOXES
+        if value_lower == "all":
+            return cls.ALL
+        raise ValueError(
+            f"Unknown connection subset: {value}. Must be 'sandboxes' or 'all'."
+        )
+class TargetOrControl(Enum):
+    """Identifies whether a connector is the target or control version."""
+    TARGET = "target"
+    CONTROL = "control"
+class ActorType(Enum):
+    """Type of connector actor."""
+    SOURCE = "source"
+    DESTINATION = "destination"
+@dataclass
+class LiveTestConfig:
+    """Configuration for live test execution.
+    This class consolidates all configuration options for running live tests,
+    including connection filtering, stream selection, and test behavior.
+    """
+    # Connection filtering
+    connection_id: str | None = None
+    connection_subset: ConnectionSubset = ConnectionSubset.SANDBOXES
+    max_connections: int | None = None
+    auto_select_connections: bool = False
+    # Stream filtering
+    selected_streams: set[str] | None = None
+    # Custom paths for local testing
+    custom_config_path: Path | None = None
+    custom_catalog_path: Path | None = None
+    custom_state_path: Path | None = None
+    # Test behavior
+    test_description: str | None = None
+    retrieval_reason: str | None = None
+    def __post_init__(self) -> None:
+        """Validate configuration after initialization."""
+        if self.connection_id and self.auto_select_connections:
+            raise ValueError(
+                "Cannot set both connection_id and auto_select_connections"
+            )
+@dataclass
+class StreamFilter:
+    """Filter for selecting which streams to test.
+    Provides utilities for filtering streams based on include/exclude patterns.
+    """
+    include_streams: set[str] | None = None
+    exclude_streams: set[str] | None = None
+    def filter_streams(
+        self,
+        available_streams: set[str],
+    ) -> set[str]:
+        """Filter available streams based on include/exclude rules.
+        If include_streams is set, only those streams are included.
+        If exclude_streams is set, those streams are removed from the result.
+        """
+        if self.include_streams:
+            result = available_streams & self.include_streams
+        else:
+            result = available_streams.copy()
+        if self.exclude_streams:
+            result = result - self.exclude_streams
+        return result
+    def matches(
+        self,
+        stream_name: str,
+    ) -> bool:
+        """Check if a stream name matches the filter."""
+        if self.include_streams and stream_name not in self.include_streams:
+            return False
+        return not (self.exclude_streams and stream_name in self.exclude_streams)
+@dataclass
+class ConnectionCandidate:
+    """Represents a candidate connection for testing.
+    Used when auto-selecting connections to test based on stream coverage
+    and sync duration.
+    """
+    connection_id: str
+    workspace_id: str | None = None
+    streams_with_data: list[str] = field(default_factory=list)
+    last_sync_duration_seconds: float | None = None
+    @property
+    def stream_count(self) -> int:
+        return len(self.streams_with_data)
+def select_best_connection_candidates(
+    candidates: list[ConnectionCandidate],
+    max_connections: int | None = None,
+) -> list[tuple[ConnectionCandidate, list[str]]]:
+    """Select the best subset of connection candidates for testing.
+    This function reduces the list of candidates to minimize the number of
+    connections while maximizing stream coverage. It prioritizes faster
+    connections (shorter sync duration).
+    Based on airbyte-ci implementation:
+    https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/connection_objects_retrieval.py#L201-L220
+    """
+    # Sort by sync duration (faster first)
+    sorted_candidates = sorted(
+        candidates,
+        key=lambda c: c.last_sync_duration_seconds or float("inf"),
+    )
+    tested_streams: set[str] = set()
+    selected: list[tuple[ConnectionCandidate, list[str]]] = []
+    for candidate in sorted_candidates:
+        streams_to_test = []
+        for stream in candidate.streams_with_data:
+            if stream not in tested_streams:
+                streams_to_test.append(stream)
+                tested_streams.add(stream)
+        if streams_to_test:
+            selected.append((candidate, streams_to_test))
+    # Sort by number of streams (most streams first)
+    selected = sorted(selected, key=lambda x: len(x[1]), reverse=True)
+    # Apply max_connections limit
+    if max_connections:
+        selected = selected[:max_connections]
+    return selected

airbyte-internal-ops 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

airbyte-internal-ops 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl