PyPI - airbyte-internal-ops - Versions diffs - 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

airbyte-internal-ops 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

airbyte_ops_mcp/constants.py CHANGED Viewed

@@ -10,6 +10,9 @@ from airbyte.exceptions import PyAirbyteInputError
 MCP_SERVER_NAME = "airbyte-internal-ops"
 """The name of the MCP server."""
+USER_AGENT = "Airbyte-Internal-Ops Python client"
+"""User-Agent string for HTTP requests to Airbyte Cloud APIs."""
 # Environment variable names for internal admin authentication
 ENV_AIRBYTE_INTERNAL_ADMIN_FLAG = "AIRBYTE_INTERNAL_ADMIN_FLAG"
 ENV_AIRBYTE_INTERNAL_ADMIN_USER = "AIRBYTE_INTERNAL_ADMIN_USER"

airbyte_ops_mcp/gcp_logs/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+"""GCP Cloud Logging utilities for fetching error details by error ID."""
+from airbyte_ops_mcp.gcp_logs.error_lookup import (
+    GCPLogEntry,
+    GCPLogPayload,
+    GCPLogSearchResult,
+    GCPSeverity,
+    fetch_error_logs,
+)
+__all__ = [
+    "GCPLogEntry",
+    "GCPLogPayload",
+    "GCPLogSearchResult",
+    "GCPSeverity",
+    "fetch_error_logs",
+]

airbyte_ops_mcp/gcp_logs/error_lookup.py ADDED Viewed

@@ -0,0 +1,383 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+"""Fetch full stack traces from Google Cloud Logs by error ID.
+This module provides functionality to look up error details from GCP Cloud Logging
+using an error ID (UUID). This is useful for debugging API errors that return
+only an error ID in the response.
+Example:
+    from airbyte_ops_mcp.gcp_logs import fetch_error_logs
+    result = fetch_error_logs(
+        error_id="3173452e-8f22-4286-a1ec-b0f16c1e078a",
+        project="prod-ab-cloud-proj",
+        lookback_days=7,
+    )
+    for entry in result.entries:
+        print(entry.message)
+"""
+from __future__ import annotations
+import re
+from datetime import UTC, datetime, timedelta
+from enum import StrEnum
+from typing import Any
+from google.cloud import logging
+from google.cloud.logging_v2 import entries
+from pydantic import BaseModel, Field
+# Default GCP project for Airbyte Cloud
+DEFAULT_GCP_PROJECT = "prod-ab-cloud-proj"
+class GCPSeverity(StrEnum):
+    """Valid GCP Cloud Logging severity levels."""
+    DEBUG = "DEBUG"
+    INFO = "INFO"
+    NOTICE = "NOTICE"
+    WARNING = "WARNING"
+    ERROR = "ERROR"
+    CRITICAL = "CRITICAL"
+    ALERT = "ALERT"
+    EMERGENCY = "EMERGENCY"
+class GCPLogResourceLabels(BaseModel):
+    """Resource labels from a GCP log entry."""
+    pod_name: str | None = Field(default=None, description="Kubernetes pod name")
+    container_name: str | None = Field(
+        default=None, description="Container name within the pod"
+    )
+    namespace_name: str | None = Field(default=None, description="Kubernetes namespace")
+    cluster_name: str | None = Field(default=None, description="GKE cluster name")
+class GCPLogResource(BaseModel):
+    """Resource information from a GCP log entry."""
+    type: str | None = Field(default=None, description="Resource type")
+    labels: GCPLogResourceLabels = Field(
+        default_factory=GCPLogResourceLabels, description="Resource labels"
+    )
+class GCPLogSourceLocation(BaseModel):
+    """Source location information from a GCP log entry."""
+    file: str | None = Field(default=None, description="Source file path")
+    line: int | None = Field(default=None, description="Line number")
+    function: str | None = Field(default=None, description="Function name")
+class GCPLogEntry(BaseModel):
+    """A single log entry from GCP Cloud Logging."""
+    timestamp: datetime | None = Field(
+        default=None, description="When the log entry was created"
+    )
+    severity: str | None = Field(
+        default=None, description="Log severity (DEBUG, INFO, WARNING, ERROR, etc.)"
+    )
+    log_name: str | None = Field(default=None, description="Full log name path")
+    insert_id: str | None = Field(
+        default=None, description="Unique identifier for the log entry"
+    )
+    trace: str | None = Field(
+        default=None, description="Trace ID for distributed tracing"
+    )
+    span_id: str | None = Field(default=None, description="Span ID within the trace")
+    payload: Any = Field(default=None, description="Log entry payload (text or struct)")
+    payload_type: str | None = Field(
+        default=None, description="Type of payload (text, struct, protobuf)"
+    )
+    resource: GCPLogResource = Field(
+        default_factory=GCPLogResource, description="Resource information"
+    )
+    source_location: GCPLogSourceLocation | None = Field(
+        default=None, description="Source code location"
+    )
+    labels: dict[str, str] = Field(
+        default_factory=dict, description="User-defined labels"
+    )
+class GCPLogPayload(BaseModel):
+    """Extracted and combined payload from grouped log entries."""
+    timestamp: datetime | None = Field(
+        default=None, description="Timestamp of the first entry in the group"
+    )
+    severity: str | None = Field(default=None, description="Severity of the log group")
+    resource: GCPLogResource = Field(
+        default_factory=GCPLogResource, description="Resource information"
+    )
+    num_log_lines: int = Field(
+        default=0, description="Number of log lines combined into this payload"
+    )
+    message: str = Field(default="", description="Combined message from all log lines")
+class GCPLogSearchResult(BaseModel):
+    """Result of searching GCP Cloud Logging for an error ID."""
+    error_id: str = Field(description="The error ID that was searched for")
+    project: str = Field(description="GCP project that was searched")
+    lookback_days_searched: int = Field(
+        description="Number of lookback days that were searched"
+    )
+    total_entries_found: int = Field(
+        description="Total number of log entries found (including related entries)"
+    )
+    entries: list[GCPLogEntry] = Field(
+        default_factory=list, description="Raw log entries found"
+    )
+    payloads: list[GCPLogPayload] = Field(
+        default_factory=list,
+        description="Extracted and grouped payloads (reconstructed stack traces)",
+    )
+def _build_filter(
+    error_id: str,
+    lookback_days: int,
+    min_severity_filter: GCPSeverity | None,
+) -> str:
+    """Build the Cloud Logging filter query."""
+    filter_parts = [f'"{error_id}"']
+    start_time = datetime.now(UTC) - timedelta(days=lookback_days)
+    filter_parts.append(f'timestamp >= "{start_time.isoformat()}"')
+    if min_severity_filter:
+        filter_parts.append(f"severity>={min_severity_filter}")
+    return " AND ".join(filter_parts)
+def _entry_to_model(
+    entry: entries.StructEntry | entries.TextEntry | entries.ProtobufEntry,
+) -> GCPLogEntry:
+    """Convert a GCP log entry to a Pydantic model."""
+    resource_labels = {}
+    if entry.resource and entry.resource.labels:
+        resource_labels = dict(entry.resource.labels)
+    resource = GCPLogResource(
+        type=entry.resource.type if entry.resource else None,
+        labels=GCPLogResourceLabels(
+            pod_name=resource_labels.get("pod_name"),
+            container_name=resource_labels.get("container_name"),
+            namespace_name=resource_labels.get("namespace_name"),
+            cluster_name=resource_labels.get("cluster_name"),
+        ),
+    )
+    source_location = None
+    if entry.source_location:
+        source_location = GCPLogSourceLocation(
+            file=entry.source_location.get("file"),
+            line=entry.source_location.get("line"),
+            function=entry.source_location.get("function"),
+        )
+    payload: Any = None
+    payload_type = "unknown"
+    if isinstance(entry, entries.StructEntry):
+        payload = entry.payload
+        payload_type = "struct"
+    elif isinstance(entry, entries.TextEntry):
+        payload = entry.payload
+        payload_type = "text"
+    elif isinstance(entry, entries.ProtobufEntry):
+        payload = str(entry.payload)
+        payload_type = "protobuf"
+    return GCPLogEntry(
+        timestamp=entry.timestamp,
+        severity=entry.severity,
+        log_name=entry.log_name,
+        insert_id=entry.insert_id,
+        trace=entry.trace,
+        span_id=entry.span_id,
+        payload=payload,
+        payload_type=payload_type,
+        resource=resource,
+        source_location=source_location,
+        labels=dict(entry.labels) if entry.labels else {},
+    )
+def _group_entries_by_occurrence(
+    log_entries: list[GCPLogEntry],
+) -> list[list[GCPLogEntry]]:
+    """Group log entries by occurrence (timestamp clusters within 1 second)."""
+    if not log_entries:
+        return []
+    sorted_entries = sorted(
+        log_entries, key=lambda x: x.timestamp or datetime.min.replace(tzinfo=UTC)
+    )
+    groups: list[list[GCPLogEntry]] = []
+    current_group = [sorted_entries[0]]
+    current_timestamp = sorted_entries[0].timestamp or datetime.min.replace(tzinfo=UTC)
+    for entry in sorted_entries[1:]:
+        entry_timestamp = entry.timestamp or datetime.min.replace(tzinfo=UTC)
+        time_diff = abs((entry_timestamp - current_timestamp).total_seconds())
+        current_pod = current_group[0].resource.labels.pod_name
+        entry_pod = entry.resource.labels.pod_name
+        if time_diff <= 1 and entry_pod == current_pod:
+            current_group.append(entry)
+        else:
+            groups.append(current_group)
+            current_group = [entry]
+            current_timestamp = entry_timestamp
+    if current_group:
+        groups.append(current_group)
+    return groups
+def _extract_payloads(log_entries: list[GCPLogEntry]) -> list[GCPLogPayload]:
+    """Extract and group payloads by occurrence."""
+    if not log_entries:
+        return []
+    grouped = _group_entries_by_occurrence(log_entries)
+    results = []
+    for group in grouped:
+        payloads = []
+        for entry in group:
+            if entry.payload:
+                payload_text = str(entry.payload)
+                payload_text = re.sub(r"\x1b\[[0-9;]*m", "", payload_text)
+                payloads.append(payload_text)
+        combined_message = "\n".join(payloads)
+        first_entry = group[0]
+        result = GCPLogPayload(
+            timestamp=first_entry.timestamp,
+            severity=first_entry.severity,
+            resource=first_entry.resource,
+            num_log_lines=len(group),
+            message=combined_message,
+        )
+        results.append(result)
+    return results
+def fetch_error_logs(
+    error_id: str,
+    project: str = DEFAULT_GCP_PROJECT,
+    lookback_days: int = 7,
+    min_severity_filter: GCPSeverity | None = None,
+    include_log_envelope_seconds: float = 1.0,
+    max_log_entries: int | None = None,
+) -> GCPLogSearchResult:
+    """Fetch logs from Google Cloud Logging by error ID.
+    This function searches GCP Cloud Logging for log entries containing the
+    specified error ID, then fetches related log entries (multi-line stack traces)
+    from the same timestamp and resource.
+    """
+    client_options = {"quota_project_id": project}
+    client = logging.Client(project=project, client_options=client_options)
+    filter_str = _build_filter(error_id, lookback_days, min_severity_filter)
+    entries_iterator = client.list_entries(
+        filter_=filter_str,
+        order_by=logging.DESCENDING,
+    )
+    initial_matches = list(entries_iterator)
+    if not initial_matches:
+        return GCPLogSearchResult(
+            error_id=error_id,
+            project=project,
+            lookback_days_searched=lookback_days,
+            total_entries_found=0,
+            entries=[],
+            payloads=[],
+        )
+    all_results: list[GCPLogEntry] = []
+    seen_insert_ids: set[str] = set()
+    for match in initial_matches:
+        timestamp = match.timestamp
+        resource_type_val = match.resource.type if match.resource else None
+        resource_labels = (
+            dict(match.resource.labels)
+            if match.resource and match.resource.labels
+            else {}
+        )
+        log_name = match.log_name
+        start_time = timestamp - timedelta(seconds=include_log_envelope_seconds)
+        end_time = timestamp + timedelta(seconds=include_log_envelope_seconds)
+        related_filter_parts = [
+            f'timestamp >= "{start_time.isoformat()}"',
+            f'timestamp <= "{end_time.isoformat()}"',
+        ]
+        if log_name:
+            related_filter_parts.append(f'logName="{log_name}"')
+        if resource_type_val:
+            related_filter_parts.append(f'resource.type="{resource_type_val}"')
+        if "pod_name" in resource_labels:
+            related_filter_parts.append(
+                f'resource.labels.pod_name="{resource_labels["pod_name"]}"'
+            )
+        if "container_name" in resource_labels:
+            related_filter_parts.append(
+                f'resource.labels.container_name="{resource_labels["container_name"]}"'
+            )
+        # Note: resource_type_val is extracted from the matched entry, and
+        # min_severity_filter is already applied in the initial search filter
+        related_filter = " AND ".join(related_filter_parts)
+        related_entries = client.list_entries(
+            filter_=related_filter,
+            order_by=logging.ASCENDING,
+        )
+        for entry in related_entries:
+            if entry.insert_id and entry.insert_id not in seen_insert_ids:
+                seen_insert_ids.add(entry.insert_id)
+                all_results.append(_entry_to_model(entry))
+    all_results.sort(
+        key=lambda x: x.timestamp or datetime.min.replace(tzinfo=UTC), reverse=True
+    )
+    if max_log_entries:
+        all_results = all_results[:max_log_entries]
+    payloads = _extract_payloads(all_results)
+    return GCPLogSearchResult(
+        error_id=error_id,
+        project=project,
+        lookback_days_searched=lookback_days,
+        total_entries_found=len(all_results),
+        entries=all_results,
+        payloads=payloads,
+    )

airbyte_ops_mcp/mcp/cloud_connector_versions.py CHANGED Viewed

@@ -121,6 +121,7 @@ def get_cloud_connector_version(
         # Use vendored API client instead of connector.get_connector_version()
         # Use Config API root for version management operations
+        # Pass workspace_id to get detailed scoped configuration context
         version_data = api_client.get_connector_version(
             connector_id=actor_id,
             connector_type=actor_type,
@@ -128,13 +129,31 @@ def get_cloud_connector_version(
             client_id=auth.client_id,
             client_secret=auth.client_secret,
             bearer_token=auth.bearer_token,
+            workspace_id=workspace_id,
+        )
+        # Determine if version is pinned from scoped config context (more reliable)
+        # The API's isVersionOverrideApplied only returns true for USER-created pins,
+        # not system-generated pins (e.g., breaking_change origin). Check scopedConfigs
+        # for a more accurate picture of whether ANY pin exists.
+        scoped_configs = version_data.get("scopedConfigs", {})
+        has_any_pin = (
+            any(config is not None for config in scoped_configs.values())
+            if scoped_configs
+            else False
+        )
+        # Use scoped config existence as the source of truth for "is pinned"
+        # Fall back to API's isVersionOverrideApplied if no scoped config data
+        is_pinned = (
+            has_any_pin if scoped_configs else version_data["isVersionOverrideApplied"]
         )
         return ConnectorVersionInfo(
             connector_id=actor_id,
             connector_type=actor_type,
             version=version_data["dockerImageTag"],
-            is_version_pinned=version_data["isVersionOverrideApplied"],
+            is_version_pinned=is_pinned,
         )
     except CloudAuthError:
         raise

airbyte_ops_mcp/mcp/gcp_logs.py ADDED Viewed

@@ -0,0 +1,92 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+"""MCP tools for GCP Cloud Logging operations.
+This module provides MCP tools for querying GCP Cloud Logging,
+particularly for looking up error details by error ID.
+"""
+from __future__ import annotations
+from typing import Annotated
+from fastmcp import FastMCP
+from pydantic import Field
+from airbyte_ops_mcp.gcp_logs import (
+    GCPLogSearchResult,
+    GCPSeverity,
+    fetch_error_logs,
+)
+from airbyte_ops_mcp.gcp_logs.error_lookup import DEFAULT_GCP_PROJECT
+from airbyte_ops_mcp.mcp._mcp_utils import mcp_tool, register_mcp_tools
+@mcp_tool(
+    read_only=True,
+    idempotent=True,
+)
+def lookup_cloud_backend_error(
+    error_id: Annotated[
+        str,
+        Field(
+            description=(
+                "The error ID (UUID) to search for. This is typically returned "
+                "in API error responses as {'errorId': '...'}"
+            )
+        ),
+    ],
+    project: Annotated[
+        str,
+        Field(
+            default=DEFAULT_GCP_PROJECT,
+            description=(
+                "GCP project ID to search in. Defaults to 'prod-ab-cloud-proj' "
+                "(Airbyte Cloud production)."
+            ),
+        ),
+    ],
+    lookback_days: Annotated[
+        int,
+        Field(
+            default=7,
+            description="Number of days to look back in logs. Defaults to 7.",
+        ),
+    ],
+    min_severity_filter: Annotated[
+        GCPSeverity | None,
+        Field(
+            default=None,
+            description="Optional minimum severity level to filter logs.",
+        ),
+    ],
+    max_log_entries: Annotated[
+        int,
+        Field(
+            default=200,
+            description="Maximum number of log entries to return. Defaults to 200.",
+        ),
+    ],
+) -> GCPLogSearchResult:
+    """Look up error details from GCP Cloud Logging by error ID.
+    When an Airbyte Cloud API returns an error response with only an error ID
+    (e.g., {"errorId": "3173452e-8f22-4286-a1ec-b0f16c1e078a"}), this tool
+    fetches the full stack trace and error details from GCP Cloud Logging.
+    The tool searches for log entries containing the error ID and fetches
+    related entries (multi-line stack traces) from the same timestamp and pod.
+    Requires GCP credentials with Logs Viewer role on the target project.
+    """
+    return fetch_error_logs(
+        error_id=error_id,
+        project=project,
+        lookback_days=lookback_days,
+        min_severity_filter=min_severity_filter,
+        max_log_entries=max_log_entries,
+    )
+def register_gcp_logs_tools(app: FastMCP) -> None:
+    """Register GCP logs tools with the FastMCP app."""
+    register_mcp_tools(app)

airbyte_ops_mcp/mcp/server.py CHANGED Viewed

@@ -24,6 +24,7 @@ from airbyte_ops_mcp.constants import MCP_SERVER_NAME
 from airbyte_ops_mcp.mcp.cloud_connector_versions import (
     register_cloud_connector_version_tools,
 )
+from airbyte_ops_mcp.mcp.gcp_logs import register_gcp_logs_tools
 from airbyte_ops_mcp.mcp.github import register_github_tools
 from airbyte_ops_mcp.mcp.github_repo_ops import register_github_repo_ops_tools
 from airbyte_ops_mcp.mcp.prerelease import register_prerelease_tools
@@ -62,6 +63,7 @@ def register_server_assets(app: FastMCP) -> None:
     register_prerelease_tools(app)
     register_cloud_connector_version_tools(app)
     register_prod_db_query_tools(app)
+    register_gcp_logs_tools(app)
     register_prompts(app)
     register_regression_tests_tools(app)

{airbyte_internal_ops-0.2.4.dist-info → airbyte_internal_ops-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{airbyte_internal_ops-0.2.4.dist-info → airbyte_internal_ops-0.3.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

airbyte-internal-ops 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

airbyte-internal-ops 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl