PyPI - airbyte-internal-ops - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

airbyte-internal-ops 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

airbyte_ops_mcp/mcp/github.py CHANGED Viewed

@@ -14,12 +14,27 @@ import requests
 from fastmcp import FastMCP
 from pydantic import BaseModel, Field
-from airbyte_ops_mcp.github_actions import GITHUB_API_BASE, resolve_github_token
+from airbyte_ops_mcp.github_actions import (
+    GITHUB_API_BASE,
+    get_workflow_jobs,
+    resolve_github_token,
+)
 from airbyte_ops_mcp.mcp._mcp_utils import mcp_tool, register_mcp_tools
 DOCKERHUB_API_BASE = "https://hub.docker.com/v2"
+class JobInfo(BaseModel):
+    """Information about a single job in a workflow run."""
+    job_id: int
+    name: str
+    status: str
+    conclusion: str | None = None
+    started_at: str | None = None
+    completed_at: str | None = None
 class WorkflowRunStatus(BaseModel):
     """Response model for check_workflow_status MCP tool."""
@@ -34,6 +49,7 @@ class WorkflowRunStatus(BaseModel):
     updated_at: str
     run_started_at: str | None = None
     jobs_url: str
+    jobs: list[JobInfo] = []
 def _parse_workflow_url(url: str) -> tuple[str, str, int]:
@@ -148,6 +164,22 @@ def check_workflow_status(
     # Get workflow run details
     run_data = _get_workflow_run(owner, repo, run_id, token)
+    # Get jobs for the workflow run (uses upstream function that resolves its own token)
+    workflow_jobs = get_workflow_jobs(owner, repo, run_id)
+    # Convert dataclass objects to Pydantic models for the response
+    jobs = [
+        JobInfo(
+            job_id=job.job_id,
+            name=job.name,
+            status=job.status,
+            conclusion=job.conclusion,
+            started_at=job.started_at,
+            completed_at=job.completed_at,
+        )
+        for job in workflow_jobs
+    ]
     return WorkflowRunStatus(
         run_id=run_data["id"],
         status=run_data["status"],
@@ -160,6 +192,7 @@ def check_workflow_status(
         updated_at=run_data["updated_at"],
         run_started_at=run_data.get("run_started_at"),
         jobs_url=run_data["jobs_url"],
+        jobs=jobs,
     )

airbyte_ops_mcp/mcp/prerelease.py CHANGED Viewed

@@ -228,17 +228,17 @@ def publish_connector_to_airbyte_registry(
     # Guard: Check for required token
     token = resolve_github_token(PRERELEASE_TOKEN_ENV_VARS)
-    # Get the PR's head ref and SHA
+    # Get the PR's head SHA for computing the docker image tag
+    # Note: We no longer pass gitref to the workflow - it derives the ref from PR number
     head_info = _get_pr_head_info(
         DEFAULT_REPO_OWNER, DEFAULT_REPO_NAME, pr_number, token
     )
     # Prepare workflow inputs
-    # The workflow expects these inputs from slash-command-dispatch
+    # The workflow uses refs/pull/{pr}/head directly - no gitref needed
     # Note: The workflow auto-detects modified connectors from the PR
     workflow_inputs = {
         "repo": f"{DEFAULT_REPO_OWNER}/{DEFAULT_REPO_NAME}",
-        "gitref": head_info.ref,
         "pr": str(pr_number),
     }

airbyte_ops_mcp/mcp/prod_db_queries.py CHANGED Viewed

@@ -7,28 +7,78 @@ airbyte_ops_mcp.prod_db_access.queries for use by AI agents.
 from __future__ import annotations
+from datetime import datetime
 from typing import Annotated, Any
 import requests
 from airbyte.exceptions import PyAirbyteInputError
 from fastmcp import FastMCP
-from pydantic import Field
+from pydantic import BaseModel, Field
+from airbyte_ops_mcp.constants import OrganizationAliasEnum
 from airbyte_ops_mcp.mcp._mcp_utils import mcp_tool, register_mcp_tools
 from airbyte_ops_mcp.prod_db_access.queries import (
     query_actors_pinned_to_version,
     query_connections_by_connector,
+    query_connections_by_destination_connector,
     query_connector_versions,
     query_dataplanes_list,
     query_failed_sync_attempts_for_connector,
     query_new_connector_releases,
     query_sync_results_for_version,
     query_workspace_info,
+    query_workspaces_by_email_domain,
 )
 # Cloud UI base URL for building connection URLs
 CLOUD_UI_BASE_URL = "https://cloud.airbyte.com"
+# =============================================================================
+# Pydantic Models for MCP Tool Responses
+# =============================================================================
+class WorkspaceInfo(BaseModel):
+    """Information about a workspace found by email domain search."""
+    organization_id: str = Field(description="The organization UUID")
+    workspace_id: str = Field(description="The workspace UUID")
+    workspace_name: str = Field(description="The name of the workspace")
+    slug: str | None = Field(
+        default=None, description="The workspace slug (URL-friendly identifier)"
+    )
+    email: str | None = Field(
+        default=None, description="The email address associated with the workspace"
+    )
+    dataplane_group_id: str | None = Field(
+        default=None, description="The dataplane group UUID (region)"
+    )
+    dataplane_name: str | None = Field(
+        default=None, description="The name of the dataplane (e.g., 'US', 'EU')"
+    )
+    created_at: datetime | None = Field(
+        default=None, description="When the workspace was created"
+    )
+class WorkspacesByEmailDomainResult(BaseModel):
+    """Result of looking up workspaces by email domain."""
+    email_domain: str = Field(
+        description="The email domain that was searched for (e.g., 'motherduck.com')"
+    )
+    total_workspaces_found: int = Field(
+        description="Total number of workspaces matching the email domain"
+    )
+    unique_organization_ids: list[str] = Field(
+        description="List of unique organization IDs found"
+    )
+    workspaces: list[WorkspaceInfo] = Field(
+        description="List of workspaces matching the email domain"
+    )
 # Cloud registry URL for resolving canonical names
 CLOUD_REGISTRY_URL = (
     "https://connectors.airbyte.com/files/registries/v0/cloud_registry.json"
@@ -36,13 +86,18 @@ CLOUD_REGISTRY_URL = (
 def _resolve_canonical_name_to_definition_id(canonical_name: str) -> str:
-    """Resolve a canonical source name to a definition ID.
+    """Resolve a canonical connector name to a definition ID.
+    Auto-detects whether the connector is a source or destination based on the
+    canonical name prefix ("source-" or "destination-"). If no prefix is present,
+    searches both sources and destinations.
     Args:
-        canonical_name: Canonical source name (e.g., 'source-youtube-analytics').
+        canonical_name: Canonical connector name (e.g., 'source-youtube-analytics',
+            'destination-duckdb', 'YouTube Analytics', 'DuckDB').
     Returns:
-        The source definition ID (UUID).
+        The connector definition ID (UUID).
     Raises:
         PyAirbyteInputError: If the canonical name cannot be resolved.
@@ -56,31 +111,65 @@ def _resolve_canonical_name_to_definition_id(canonical_name: str) -> str:
         )
     data = response.json()
-    sources = data.get("sources", [])
-    # Normalize the canonical name for matching
     normalized_input = canonical_name.lower().strip()
-    # Try exact match on name field
-    for source in sources:
-        source_name = source.get("name", "").lower()
-        # The registry returns names like "YouTube Analytics"
-        # So we need to handle both formats
-        if source_name == normalized_input:
-            return source["sourceDefinitionId"]
-        # Also try matching against a slugified version
-        # e.g., "YouTube Analytics" -> "youtube-analytics"
-        slugified = source_name.replace(" ", "-")
-        if slugified == normalized_input or f"source-{slugified}" == normalized_input:
-            return source["sourceDefinitionId"]
+    # Determine which registries to search based on prefix
+    is_source = normalized_input.startswith("source-")
+    is_destination = normalized_input.startswith("destination-")
+    # Search sources if it looks like a source or has no prefix
+    if is_source or not is_destination:
+        sources = data.get("sources", [])
+        for source in sources:
+            source_name = source.get("name", "").lower()
+            if source_name == normalized_input:
+                return source["sourceDefinitionId"]
+            slugified = source_name.replace(" ", "-")
+            if (
+                slugified == normalized_input
+                or f"source-{slugified}" == normalized_input
+            ):
+                return source["sourceDefinitionId"]
+    # Search destinations if it looks like a destination or has no prefix
+    if is_destination or not is_source:
+        destinations = data.get("destinations", [])
+        for destination in destinations:
+            destination_name = destination.get("name", "").lower()
+            if destination_name == normalized_input:
+                return destination["destinationDefinitionId"]
+            slugified = destination_name.replace(" ", "-")
+            if (
+                slugified == normalized_input
+                or f"destination-{slugified}" == normalized_input
+            ):
+                return destination["destinationDefinitionId"]
+    # Build appropriate error message based on what was searched
+    if is_source:
+        connector_type = "source"
+        hint = (
+            "Use the exact canonical name (e.g., 'source-youtube-analytics') "
+            "or display name (e.g., 'YouTube Analytics')."
+        )
+    elif is_destination:
+        connector_type = "destination"
+        hint = (
+            "Use the exact canonical name (e.g., 'destination-duckdb') "
+            "or display name (e.g., 'DuckDB')."
+        )
+    else:
+        connector_type = "connector"
+        hint = (
+            "Use the exact canonical name (e.g., 'source-youtube-analytics', "
+            "'destination-duckdb') or display name (e.g., 'YouTube Analytics', 'DuckDB')."
+        )
     raise PyAirbyteInputError(
-        message=f"Could not find source definition for canonical name: {canonical_name}",
+        message=f"Could not find {connector_type} definition for canonical name: {canonical_name}",
         context={
-            "hint": "Use the exact canonical name (e.g., 'source-youtube-analytics') "
-            "or display name (e.g., 'YouTube Analytics'). "
-            "You can list available sources using the connector registry tools.",
+            "hint": hint
+            + " You can list available connectors using the connector registry tools.",
             "searched_for": canonical_name,
         },
     )
@@ -275,11 +364,12 @@ def query_prod_failed_sync_attempts_for_connector(
         ),
     ] = None,
     organization_id: Annotated[
-        str | None,
+        str | OrganizationAliasEnum | None,
         Field(
             description=(
-                "Optional organization ID (UUID) to filter results. "
-                "If provided, only failed attempts from this organization will be returned."
+                "Optional organization ID (UUID) or alias to filter results. "
+                "If provided, only failed attempts from this organization will be returned. "
+                "Accepts '@airbyte-internal' as an alias for the Airbyte internal org."
             ),
             default=None,
         ),
@@ -327,9 +417,12 @@ def query_prod_failed_sync_attempts_for_connector(
     else:
         resolved_definition_id = source_definition_id  # type: ignore[assignment]
+    # Resolve organization ID alias
+    resolved_organization_id = OrganizationAliasEnum.resolve(organization_id)
     return query_failed_sync_attempts_for_connector(
         connector_definition_id=resolved_definition_id,
-        organization_id=organization_id,
+        organization_id=resolved_organization_id,
         days=days,
         limit=limit,
     )
@@ -346,7 +439,8 @@ def query_prod_connections_by_connector(
         Field(
             description=(
                 "Source connector definition ID (UUID) to search for. "
-                "Exactly one of this or source_canonical_name is required. "
+                "Exactly one of source_definition_id, source_canonical_name, "
+                "destination_definition_id, or destination_canonical_name is required. "
                 "Example: 'afa734e4-3571-11ec-991a-1e0031268139' for YouTube Analytics."
             ),
             default=None,
@@ -357,18 +451,44 @@ def query_prod_connections_by_connector(
         Field(
             description=(
                 "Canonical source connector name to search for. "
-                "Exactly one of this or source_definition_id is required. "
+                "Exactly one of source_definition_id, source_canonical_name, "
+                "destination_definition_id, or destination_canonical_name is required. "
                 "Examples: 'source-youtube-analytics', 'YouTube Analytics'."
             ),
             default=None,
         ),
     ] = None,
-    organization_id: Annotated[
+    destination_definition_id: Annotated[
         str | None,
         Field(
             description=(
-                "Optional organization ID (UUID) to filter results. "
-                "If provided, only connections in this organization will be returned."
+                "Destination connector definition ID (UUID) to search for. "
+                "Exactly one of source_definition_id, source_canonical_name, "
+                "destination_definition_id, or destination_canonical_name is required. "
+                "Example: 'e5c8e66c-a480-4a5e-9c0e-e8e5e4c5c5c5' for DuckDB."
+            ),
+            default=None,
+        ),
+    ] = None,
+    destination_canonical_name: Annotated[
+        str | None,
+        Field(
+            description=(
+                "Canonical destination connector name to search for. "
+                "Exactly one of source_definition_id, source_canonical_name, "
+                "destination_definition_id, or destination_canonical_name is required. "
+                "Examples: 'destination-duckdb', 'DuckDB'."
+            ),
+            default=None,
+        ),
+    ] = None,
+    organization_id: Annotated[
+        str | OrganizationAliasEnum | None,
+        Field(
+            description=(
+                "Optional organization ID (UUID) or alias to filter results. "
+                "If provided, only connections in this organization will be returned. "
+                "Accepts '@airbyte-internal' as an alias for the Airbyte internal org."
             ),
             default=None,
         ),
@@ -378,38 +498,88 @@ def query_prod_connections_by_connector(
         Field(description="Maximum number of results (default: 1000)", default=1000),
     ] = 1000,
 ) -> list[dict[str, Any]]:
-    """Search for all connections using a specific source connector type.
+    """Search for all connections using a specific source or destination connector type.
     This tool queries the Airbyte Cloud Prod DB Replica directly for fast results.
-    It finds all connections where the source connector matches the specified type,
-    regardless of how the source is named by users.
+    It finds all connections where the source or destination connector matches the
+    specified type, regardless of how the connector is named by users.
     Optionally filter by organization_id to limit results to a specific organization.
+    Use '@airbyte-internal' as an alias for the Airbyte internal organization.
     Returns a list of connection dicts with workspace context and clickable Cloud UI URLs.
-    Each dict contains: connection_id, connection_name, connection_url, source_id,
+    For source queries, returns: connection_id, connection_name, connection_url, source_id,
     source_name, source_definition_id, workspace_id, workspace_name, organization_id,
     dataplane_group_id, dataplane_name.
+    For destination queries, returns: connection_id, connection_name, connection_url,
+    destination_id, destination_name, destination_definition_id, workspace_id,
+    workspace_name, organization_id, dataplane_group_id, dataplane_name.
     """
-    # Validate that exactly one of the two parameters is provided
-    if (source_definition_id is None) == (source_canonical_name is None):
+    # Validate that exactly one of the four connector parameters is provided
+    provided_params = [
+        source_definition_id,
+        source_canonical_name,
+        destination_definition_id,
+        destination_canonical_name,
+    ]
+    num_provided = sum(p is not None for p in provided_params)
+    if num_provided != 1:
         raise PyAirbyteInputError(
             message=(
-                "Exactly one of source_definition_id or source_canonical_name "
-                "must be provided, but not both."
+                "Exactly one of source_definition_id, source_canonical_name, "
+                "destination_definition_id, or destination_canonical_name must be provided."
             ),
         )
-    # Resolve canonical name to definition ID if needed
+    # Determine if this is a source or destination query and resolve the definition ID
+    is_source_query = (
+        source_definition_id is not None or source_canonical_name is not None
+    )
     resolved_definition_id: str
     if source_canonical_name:
         resolved_definition_id = _resolve_canonical_name_to_definition_id(
             canonical_name=source_canonical_name,
         )
+    elif source_definition_id:
+        resolved_definition_id = source_definition_id
+    elif destination_canonical_name:
+        resolved_definition_id = _resolve_canonical_name_to_definition_id(
+            canonical_name=destination_canonical_name,
+        )
     else:
-        resolved_definition_id = source_definition_id  # type: ignore[assignment]
-    # Query the database and transform rows to include connection URLs
+        resolved_definition_id = destination_definition_id  # type: ignore[assignment]
+    # Resolve organization ID alias
+    resolved_organization_id = OrganizationAliasEnum.resolve(organization_id)
+    # Query the database based on connector type
+    if is_source_query:
+        return [
+            {
+                "organization_id": str(row.get("organization_id", "")),
+                "workspace_id": str(row["workspace_id"]),
+                "workspace_name": row.get("workspace_name", ""),
+                "connection_id": str(row["connection_id"]),
+                "connection_name": row.get("connection_name", ""),
+                "connection_url": (
+                    f"{CLOUD_UI_BASE_URL}/workspaces/{row['workspace_id']}"
+                    f"/connections/{row['connection_id']}/status"
+                ),
+                "source_id": str(row["source_id"]),
+                "source_name": row.get("source_name", ""),
+                "source_definition_id": str(row["source_definition_id"]),
+                "dataplane_group_id": str(row.get("dataplane_group_id", "")),
+                "dataplane_name": row.get("dataplane_name", ""),
+            }
+            for row in query_connections_by_connector(
+                connector_definition_id=resolved_definition_id,
+                organization_id=resolved_organization_id,
+                limit=limit,
+            )
+        ]
+    # Destination query
     return [
         {
             "organization_id": str(row.get("organization_id", "")),
@@ -421,20 +591,93 @@ def query_prod_connections_by_connector(
                 f"{CLOUD_UI_BASE_URL}/workspaces/{row['workspace_id']}"
                 f"/connections/{row['connection_id']}/status"
             ),
-            "source_id": str(row["source_id"]),
-            "source_name": row.get("source_name", ""),
-            "source_definition_id": str(row["source_definition_id"]),
+            "destination_id": str(row["destination_id"]),
+            "destination_name": row.get("destination_name", ""),
+            "destination_definition_id": str(row["destination_definition_id"]),
             "dataplane_group_id": str(row.get("dataplane_group_id", "")),
             "dataplane_name": row.get("dataplane_name", ""),
         }
-        for row in query_connections_by_connector(
+        for row in query_connections_by_destination_connector(
             connector_definition_id=resolved_definition_id,
-            organization_id=organization_id,
+            organization_id=resolved_organization_id,
             limit=limit,
         )
     ]
+@mcp_tool(
+    read_only=True,
+    idempotent=True,
+)
+def query_prod_workspaces_by_email_domain(
+    email_domain: Annotated[
+        str,
+        Field(
+            description=(
+                "Email domain to search for (e.g., 'motherduck.com', 'fivetran.com'). "
+                "Do not include the '@' symbol. This will find workspaces where users "
+                "have email addresses with this domain."
+            ),
+        ),
+    ],
+    limit: Annotated[
+        int,
+        Field(
+            description="Maximum number of workspaces to return (default: 100)",
+            default=100,
+        ),
+    ] = 100,
+) -> WorkspacesByEmailDomainResult:
+    """Find workspaces by email domain.
+    This tool searches for workspaces where users have email addresses matching
+    the specified domain. This is useful for identifying workspaces belonging to
+    specific companies - for example, searching for "motherduck.com" will find
+    workspaces belonging to MotherDuck employees.
+    Use cases:
+    - Finding partner organization connections for testing connector fixes
+    - Identifying internal test accounts for specific integrations
+    - Locating workspaces belonging to technology partners
+    The returned organization IDs can be used with other tools like
+    `query_prod_connections_by_connector` to find connections within
+    those organizations for safe testing.
+    """
+    # Strip leading @ if provided
+    clean_domain = email_domain.lstrip("@")
+    # Query the database
+    rows = query_workspaces_by_email_domain(email_domain=clean_domain, limit=limit)
+    # Convert rows to Pydantic models
+    workspaces = [
+        WorkspaceInfo(
+            organization_id=str(row["organization_id"]),
+            workspace_id=str(row["workspace_id"]),
+            workspace_name=row.get("workspace_name", ""),
+            slug=row.get("slug"),
+            email=row.get("email"),
+            dataplane_group_id=str(row["dataplane_group_id"])
+            if row.get("dataplane_group_id")
+            else None,
+            dataplane_name=row.get("dataplane_name"),
+            created_at=row.get("created_at"),
+        )
+        for row in rows
+    ]
+    # Extract unique organization IDs
+    unique_org_ids = list(dict.fromkeys(w.organization_id for w in workspaces))
+    return WorkspacesByEmailDomainResult(
+        email_domain=clean_domain,
+        total_workspaces_found=len(workspaces),
+        unique_organization_ids=unique_org_ids,
+        workspaces=workspaces,
+    )
 def register_prod_db_query_tools(app: FastMCP) -> None:
     """Register prod DB query tools with the FastMCP app."""
     register_mcp_tools(app, domain=__name__)

airbyte-internal-ops 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

airbyte-internal-ops 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl