PyPI - airbyte-internal-ops - Versions diffs - 0.1.2.post2.dev20080805740__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

airbyte-internal-ops 0.1.2.post2.dev20080805740py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

airbyte_ops_mcp/live_tests/connection_fetcher.py CHANGED Viewed

@@ -9,12 +9,13 @@ from __future__ import annotations
 import json
 import os
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
 import requests
 from airbyte import constants
+from airbyte.cloud import CloudWorkspace
 from airbyte.exceptions import PyAirbyteInputError
@@ -29,6 +30,16 @@ class ConnectionData:
     config: dict[str, Any]
     catalog: dict[str, Any]
     stream_names: list[str]
+    docker_repository: str | None = None
+    docker_image_tag: str | None = None
+    state: list[dict[str, Any]] | None = field(default=None)
+    @property
+    def connector_image(self) -> str | None:
+        """Get the full connector image name with tag."""
+        if self.docker_repository and self.docker_image_tag:
+            return f"{self.docker_repository}:{self.docker_image_tag}"
+        return None
 def _get_access_token(
@@ -121,6 +132,28 @@ def fetch_connection_data(
         )
     source_data = source_response.json()
+    source_definition_id = source_data.get("definitionId", "")
+    # Try to get docker repository and image tag from source definition version
+    docker_repository = None
+    docker_image_tag = None
+    if source_definition_id:
+        try:
+            # Use the Config API to get version info for the source
+            config_api_root = constants.CLOUD_CONFIG_API_ROOT
+            version_response = requests.post(
+                f"{config_api_root}/actor_definition_versions/get_for_source",
+                json={"sourceId": source_id},
+                headers=headers,
+                timeout=30,
+            )
+            if version_response.status_code == 200:
+                version_data = version_response.json()
+                docker_repository = version_data.get("dockerRepository")
+                docker_image_tag = version_data.get("dockerImageTag")
+        except Exception:
+            # Non-fatal: we can still proceed without docker info
+            pass
     # Build configured catalog from connection streams
     streams_config = conn_data.get("configurations", {}).get("streams", [])
@@ -133,10 +166,12 @@ def fetch_connection_data(
         connection_id=connection_id,
         source_id=source_id,
         source_name=source_data.get("name", ""),
-        source_definition_id=source_data.get("definitionId", ""),
+        source_definition_id=source_definition_id,
         config=source_data.get("configuration", {}),
         catalog=catalog,
         stream_names=stream_names,
+        docker_repository=docker_repository,
+        docker_image_tag=docker_image_tag,
     )
@@ -214,3 +249,125 @@ def save_connection_data_to_files(
     catalog_path.write_text(json.dumps(connection_data.catalog, indent=2))
     return config_path, catalog_path
+def fetch_connection_artifacts(
+    connection_id: str,
+    workspace_id: str | None = None,
+    client_id: str | None = None,
+    client_secret: str | None = None,
+) -> tuple[dict[str, Any] | None, list[dict[str, Any]] | None]:
+    """Fetch catalog and state artifacts using PyAirbyte's CloudConnection.
+    This uses the Config API endpoints via PyAirbyte to get the actual
+    configured catalog (with full schemas) and state artifacts without
+    requiring direct database access.
+    Args:
+        connection_id: The connection ID to fetch artifacts for.
+        workspace_id: Airbyte Cloud workspace ID (defaults to env var).
+        client_id: Airbyte Cloud client ID (defaults to env var).
+        client_secret: Airbyte Cloud client secret (defaults to env var).
+    Returns:
+        Tuple of (catalog, state) where:
+        - catalog: The configured catalog dict with full schemas, or None
+        - state: List of state dicts for each stream, or None if no state
+    """
+    workspace_id = workspace_id or os.getenv("AIRBYTE_CLOUD_WORKSPACE_ID")
+    client_id = client_id or os.getenv("AIRBYTE_CLOUD_CLIENT_ID")
+    client_secret = client_secret or os.getenv("AIRBYTE_CLOUD_CLIENT_SECRET")
+    if not workspace_id:
+        raise PyAirbyteInputError(
+            message="Missing Airbyte Cloud workspace ID",
+            context={"hint": "Set AIRBYTE_CLOUD_WORKSPACE_ID env var"},
+        )
+    workspace = CloudWorkspace(
+        workspace_id=workspace_id,
+        client_id=client_id,
+        client_secret=client_secret,
+    )
+    connection = workspace.get_connection(connection_id)
+    catalog = connection.get_catalog_artifact()
+    state = connection.get_state_artifacts()
+    return catalog, state
+def enrich_connection_data_with_artifacts(
+    connection_data: ConnectionData,
+    workspace_id: str | None = None,
+    client_id: str | None = None,
+    client_secret: str | None = None,
+) -> ConnectionData:
+    """Enrich ConnectionData with full catalog and state from PyAirbyte.
+    This replaces the minimal catalog (with empty schemas) with the actual
+    configured catalog from the Config API, and adds state artifacts.
+    Args:
+        connection_data: The connection data to enrich.
+        workspace_id: Airbyte Cloud workspace ID (defaults to env var).
+        client_id: Airbyte Cloud client ID (defaults to env var).
+        client_secret: Airbyte Cloud client secret (defaults to env var).
+    Returns:
+        ConnectionData with enriched catalog and state.
+    """
+    catalog, state = fetch_connection_artifacts(
+        connection_id=connection_data.connection_id,
+        workspace_id=workspace_id,
+        client_id=client_id,
+        client_secret=client_secret,
+    )
+    if catalog is not None:
+        # Convert syncCatalog format to ConfiguredAirbyteCatalog format
+        connection_data.catalog = _convert_sync_catalog_to_configured(catalog)
+    connection_data.state = state
+    return connection_data
+def _convert_sync_catalog_to_configured(sync_catalog: dict[str, Any]) -> dict[str, Any]:
+    """Convert syncCatalog format to ConfiguredAirbyteCatalog format.
+    The Config API returns syncCatalog in a slightly different format than
+    the Airbyte protocol's ConfiguredAirbyteCatalog. This function converts
+    between the two formats.
+    """
+    configured_streams = []
+    for stream_config in sync_catalog.get("streams", []):
+        stream_info = stream_config.get("stream", {})
+        config_info = stream_config.get("config", {})
+        configured_stream = {
+            "stream": {
+                "name": stream_info.get("name", ""),
+                "json_schema": stream_info.get("jsonSchema", {}),
+                "supported_sync_modes": stream_info.get("supportedSyncModes", []),
+                "source_defined_cursor": stream_info.get("sourceDefinedCursor", False),
+                "default_cursor_field": stream_info.get("defaultCursorField", []),
+                "source_defined_primary_key": stream_info.get(
+                    "sourceDefinedPrimaryKey", []
+                ),
+            },
+            "sync_mode": config_info.get("syncMode", "full_refresh"),
+            "destination_sync_mode": config_info.get("destinationSyncMode", "append"),
+        }
+        cursor_field = config_info.get("cursorField")
+        if cursor_field:
+            configured_stream["cursor_field"] = cursor_field
+        primary_key = config_info.get("primaryKey")
+        if primary_key:
+            configured_stream["primary_key"] = primary_key
+        configured_streams.append(configured_stream)
+    return {"streams": configured_streams}

airbyte_ops_mcp/live_tests/connection_secret_retriever.py ADDED Viewed

@@ -0,0 +1,173 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+"""Retrieve unmasked connection secrets via vendored connection-retriever.
+This module provides a focused utility for enriching connection config with
+unmasked secrets from the vendored connection-retriever code. It is designed
+to work alongside the existing connection_fetcher module, which handles all
+other connection data via the public Cloud API.
+The secret retriever requires:
+- GCP credentials with appropriate permissions
+- Cloud SQL Proxy running to internal Postgres (or CI environment)
+Usage:
+    from airbyte_ops_mcp.live_tests.connection_fetcher import fetch_connection_data
+    from airbyte_ops_mcp.live_tests.connection_secret_retriever import (
+        enrich_config_with_secrets,
+        should_use_secret_retriever,
+    )
+    # Fetch connection data via public API (config will have masked secrets)
+    connection_data = fetch_connection_data(connection_id)
+    # Enrich with unmasked secrets if enabled
+    if should_use_secret_retriever():
+        connection_data = enrich_config_with_secrets(
+            connection_data,
+            retrieval_reason="MCP live test",
+        )
+"""
+from __future__ import annotations
+import logging
+import os
+from dataclasses import replace
+from typing import TYPE_CHECKING
+from airbyte_ops_mcp.live_tests._connection_retriever import (
+    ConnectionObject,
+    retrieve_objects,
+)
+if TYPE_CHECKING:
+    from airbyte_ops_mcp.live_tests.connection_fetcher import ConnectionData
+logger = logging.getLogger(__name__)
+# Environment variable to enable secret retrieval
+ENV_USE_SECRET_RETRIEVER = "USE_CONNECTION_SECRET_RETRIEVER"
+# GCP credential environment variables
+ENV_GOOGLE_APPLICATION_CREDENTIALS = "GOOGLE_APPLICATION_CREDENTIALS"
+ENV_GCP_PROD_DB_ACCESS_CREDENTIALS = "GCP_PROD_DB_ACCESS_CREDENTIALS"
+def _ensure_gcp_credentials_env() -> None:
+    """Ensure GCP credentials are available via standard env var.
+    If GOOGLE_APPLICATION_CREDENTIALS is not set but GCP_PROD_DB_ACCESS_CREDENTIALS is,
+    copy the value to GOOGLE_APPLICATION_CREDENTIALS. This provides a fallback
+    for internal employees who use GCP_PROD_DB_ACCESS_CREDENTIALS as their standard
+    credential path for prod database access.
+    This function is idempotent and safe to call multiple times.
+    """
+    if ENV_GOOGLE_APPLICATION_CREDENTIALS not in os.environ:
+        gsm_creds = os.getenv(ENV_GCP_PROD_DB_ACCESS_CREDENTIALS)
+        if gsm_creds:
+            os.environ[ENV_GOOGLE_APPLICATION_CREDENTIALS] = gsm_creds
+            logger.debug(
+                f"Using {ENV_GCP_PROD_DB_ACCESS_CREDENTIALS} as fallback for "
+                f"{ENV_GOOGLE_APPLICATION_CREDENTIALS}"
+            )
+def is_secret_retriever_enabled() -> bool:
+    """Check if secret retrieval is enabled via environment variable.
+    Returns:
+        True if USE_CONNECTION_SECRET_RETRIEVER is set to a truthy value.
+    """
+    value = os.getenv(ENV_USE_SECRET_RETRIEVER, "").lower()
+    return value in ("true", "1", "yes")
+def should_use_secret_retriever() -> bool:
+    """Check if secret retrieval should be used.
+    Returns:
+        True if USE_CONNECTION_SECRET_RETRIEVER env var is set to a truthy value.
+    """
+    return is_secret_retriever_enabled()
+def retrieve_unmasked_config(
+    connection_id: str,
+    retrieval_reason: str = "MCP live tests",
+) -> dict | None:
+    """Retrieve unmasked source config from vendored connection-retriever.
+    This function directly queries the internal Postgres database to get
+    the source configuration with unmasked secrets.
+    Args:
+        connection_id: The Airbyte Cloud connection ID.
+        retrieval_reason: Reason for retrieval (for audit logging).
+    Returns:
+        The unmasked source config dict, or None if retrieval fails.
+    """
+    # Ensure GCP credentials are available (supports GCP_PROD_DB_ACCESS_CREDENTIALS fallback)
+    _ensure_gcp_credentials_env()
+    # Only request the source config - that's all we need for secrets
+    requested_objects = [ConnectionObject.SOURCE_CONFIG]
+    candidates = retrieve_objects(
+        connection_objects=requested_objects,
+        retrieval_reason=retrieval_reason,
+        connection_id=connection_id,
+    )
+    if not candidates:
+        logger.warning(
+            f"No connection data found for connection ID {connection_id} "
+            "via connection-retriever"
+        )
+        return None
+    candidate = candidates[0]
+    if candidate.source_config:
+        return dict(candidate.source_config)
+    return None
+def enrich_config_with_secrets(
+    connection_data: ConnectionData,
+    retrieval_reason: str = "MCP live tests",
+) -> ConnectionData:
+    """Enrich connection data with unmasked secrets from internal retriever.
+    This function takes a ConnectionData object (typically from the public
+    Cloud API with masked secrets) and replaces the config with unmasked
+    secrets from the internal connection-retriever.
+    Args:
+        connection_data: The connection data to enrich.
+        retrieval_reason: Reason for retrieval (for audit logging).
+    Returns:
+        A new ConnectionData with unmasked config, or the original if
+        retrieval fails or is not available.
+    """
+    unmasked_config = retrieve_unmasked_config(
+        connection_id=connection_data.connection_id,
+        retrieval_reason=retrieval_reason,
+    )
+    if unmasked_config is None:
+        logger.info(
+            f"Could not retrieve unmasked config for {connection_data.connection_id}, "
+            "using masked config from Cloud API"
+        )
+        return connection_data
+    logger.info(
+        f"Successfully enriched config with unmasked secrets for "
+        f"{connection_data.connection_id}"
+    )
+    # Return a new ConnectionData with the unmasked config
+    return replace(connection_data, config=unmasked_config)

airbyte_ops_mcp/live_tests/evaluation_modes.py ADDED Viewed

@@ -0,0 +1,45 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+"""Test evaluation modes for live tests.
+This module provides evaluation modes that control how test failures are handled.
+Based on airbyte-ci implementation:
+https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/evaluation_modes.py
+"""
+from __future__ import annotations
+from enum import Enum
+class TestEvaluationMode(Enum):
+    """Test evaluation modes.
+    Tests may be run in "diagnostic" mode or "strict" mode.
+    When run in "diagnostic" mode, validation failures won't fail the overall
+    test run, but errors will still be surfaced in the test report.
+    In "strict" mode, tests pass/fail as usual.
+    Diagnostic mode is useful for tests that don't affect the overall
+    functionality of the connector but test an ideal state.
+    """
+    DIAGNOSTIC = "diagnostic"
+    STRICT = "strict"
+    @classmethod
+    def from_string(
+        cls,
+        value: str,
+    ) -> TestEvaluationMode:
+        """Parse evaluation mode from string."""
+        value_lower = value.lower()
+        if value_lower == "diagnostic":
+            return cls.DIAGNOSTIC
+        if value_lower == "strict":
+            return cls.STRICT
+        raise ValueError(
+            f"Unknown evaluation mode: {value}. Must be 'diagnostic' or 'strict'."
+        )

airbyte_ops_mcp/live_tests/http_metrics.py CHANGED Viewed

@@ -21,11 +21,13 @@ from typing import Iterator
 try:
     from mitmproxy import http as mitmproxy_http
     from mitmproxy import io as mitmproxy_io
+    from mitmproxy.addons.savehar import SaveHar
     MITMPROXY_AVAILABLE = True
 except ImportError:
     mitmproxy_http = None  # type: ignore[assignment]
     mitmproxy_io = None  # type: ignore[assignment]
+    SaveHar = None  # type: ignore[assignment, misc]
     MITMPROXY_AVAILABLE = False
 logger = logging.getLogger(__name__)
@@ -317,3 +319,82 @@ def compute_http_metrics_comparison(
         },
         "difference": target_metrics.flow_count - control_metrics.flow_count,
     }
+def get_http_flows_from_mitm_dump(
+    mitm_dump_path: Path,
+) -> list[mitmproxy_http.HTTPFlow]:  # type: ignore[name-defined]
+    """Get HTTP flows from a mitmproxy dump file.
+    Based on airbyte-ci implementation:
+    https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/utils.py#L129-L139
+    Args:
+        mitm_dump_path: Path to the mitmproxy dump file.
+    Returns:
+        List of HTTP flows from the dump file.
+    """
+    if not MITMPROXY_AVAILABLE:
+        logger.warning("mitmproxy Python package not installed")
+        return []
+    if not mitm_dump_path.exists():
+        logger.warning(f"Mitmproxy dump file not found: {mitm_dump_path}")
+        return []
+    with open(mitm_dump_path, "rb") as dump_file:
+        return [
+            f
+            for f in mitmproxy_io.FlowReader(dump_file).stream()
+            if isinstance(f, mitmproxy_http.HTTPFlow)
+        ]
+def mitm_http_stream_to_har(
+    mitm_http_stream_path: Path,
+    har_file_path: Path,
+) -> Path:
+    """Convert a mitmproxy HTTP stream file to a HAR file.
+    Based on airbyte-ci implementation:
+    https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/utils.py#L142-L154
+    HAR (HTTP Archive) is a standard JSON format for recording HTTP transactions.
+    This allows HTTP traffic captured by mitmproxy to be viewed in browser dev tools
+    or other HAR viewers.
+    Args:
+        mitm_http_stream_path: Path to the mitmproxy HTTP stream file (.mitm).
+        har_file_path: Path where the HAR file will be saved.
+    Returns:
+        Path to the generated HAR file.
+    Raises:
+        RuntimeError: If mitmproxy is not available.
+    """
+    if not MITMPROXY_AVAILABLE or SaveHar is None:
+        raise RuntimeError(
+            "mitmproxy Python package not installed; cannot convert to HAR"
+        )
+    flows = get_http_flows_from_mitm_dump(mitm_http_stream_path)
+    if not flows:
+        logger.warning(f"No HTTP flows found in {mitm_http_stream_path}")
+        return har_file_path
+    har_file_path.parent.mkdir(parents=True, exist_ok=True)
+    try:
+        SaveHar().export_har(flows, str(har_file_path))
+    except Exception as e:
+        logger.error(f"Failed to export HAR file to {har_file_path}: {e}")
+        raise
+    if har_file_path.exists() and har_file_path.stat().st_size > 0:
+        logger.info(f"Generated HAR file at {har_file_path}")
+    else:
+        logger.error(f"Failed to generate valid HAR file at {har_file_path}")
+        raise RuntimeError(f"Failed to generate valid HAR file at {har_file_path}")
+    return har_file_path

airbyte_ops_mcp/live_tests/message_cache/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+"""Message cache for storing Airbyte messages from connector executions.
+This module provides a DuckDB-based message cache for persisting and querying
+Airbyte messages produced during connector test runs.
+Based on airbyte-ci implementation:
+https://github.com/airbytehq/airbyte/tree/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/backends
+"""
+from airbyte_ops_mcp.live_tests.message_cache.duckdb_cache import DuckDbMessageCache
+__all__ = [
+    "DuckDbMessageCache",
+]

airbyte-internal-ops 0.1.2.post2.dev20080805740__py3-none-any.whl → 0.1.4__py3-none-any.whl

airbyte-internal-ops 0.1.2.post2.dev20080805740py3-none-any.whl → 0.1.4py3-none-any.whl