PyPI - cc-plugin-codex - Versions diffs - 0.1.4__py3-none-any.whl - Mend

cc-plugin-codex 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

cc_plugin_codex/__init__.py +5 -0
cc_plugin_codex/claude.py +284 -0
cc_plugin_codex/cli_contract.py +122 -0
cc_plugin_codex/config.py +172 -0
cc_plugin_codex/context.py +210 -0
cc_plugin_codex/jobs.py +561 -0
cc_plugin_codex/normalize.py +243 -0
cc_plugin_codex/preflight.py +94 -0
cc_plugin_codex/py.typed +0 -0
cc_plugin_codex/schemas.py +344 -0
cc_plugin_codex/server.py +1656 -0
cc_plugin_codex-0.1.4.dist-info/METADATA +223 -0
cc_plugin_codex-0.1.4.dist-info/RECORD +16 -0
cc_plugin_codex-0.1.4.dist-info/WHEEL +4 -0
cc_plugin_codex-0.1.4.dist-info/entry_points.txt +2 -0
cc_plugin_codex-0.1.4.dist-info/licenses/LICENSE +21 -0

cc_plugin_codex/normalize.py ADDED Viewed

@@ -0,0 +1,243 @@
+"""Build per-tool prompts and normalize claude's JSON envelope into the contract."""
+from __future__ import annotations
+import json
+from typing import Any, cast
+from cc_plugin_codex import cli_contract
+from cc_plugin_codex.claude import contract_changed_error
+from cc_plugin_codex.schemas import (
+    Confidence,
+    ContextSummary,
+    ErrorInfo,
+    ErrorResult,
+    Finding,
+    Meta,
+    RawResponse,
+    Severity,
+    SuccessResult,
+    Usage,
+    Verdict,
+)
+_SCHEMA_INSTRUCTION = (
+    "Respond with ONLY a single JSON object (no prose, no code fence) with keys: "
+    '"summary" (string), "verdict" (one of pass|concerns|fail|unknown), '
+    '"confidence" (one of low|medium|high), "findings" (array of objects with '
+    "severity[critical|high|medium|low|nit], title, file, line, line_end (optional "
+    "end line for multi-line findings), evidence, risk, recommendation), "
+    '"questions" (array of strings), "assumptions" (array of strings), '
+    '"next_steps" (array of strings: concrete actions to take next).'
+)
+_LEAD = {
+    "claude_ask": "Give an independent second opinion on the following question.",
+    "claude_review_changes": "Review the following code changes for correctness, "
+    "regressions, security, and missing tests.",
+    "claude_adversarial_review": "Attack the following plan/claim. Find the strongest "
+    "counterarguments, failure modes, and risks.",
+}
+_VALID_VERDICT = {"pass", "concerns", "fail", "unknown"}
+_VALID_CONFIDENCE = {"low", "medium", "high"}
+_VALID_SEVERITY = {"critical", "high", "medium", "low", "nit"}
+def _str_list(value: Any) -> list[str]:
+    return [str(x) for x in value if x] if isinstance(value, list) else []
+def build_prompt(tool: str, payload: dict[str, Any], context_text: str) -> str:
+    parts = [_LEAD.get(tool, _LEAD["claude_ask"])]
+    if tool == "claude_ask":
+        parts.append(payload["prompt"])
+        if payload.get("context"):
+            parts.append(f"\nAdditional context:\n{payload['context']}")
+    elif tool == "claude_review_changes":
+        if payload.get("focus"):
+            parts.append(f"Focus especially on: {payload['focus']}.")
+        parts.append(f"\nChanges (scope={payload.get('scope')}):\n{context_text}")
+    elif tool == "claude_adversarial_review":
+        parts.append(f"\nTarget:\n{payload['target']}")
+        if payload.get("evidence"):
+            parts.append(f"\nEvidence:\n{payload['evidence']}")
+        if context_text:
+            parts.append(f"\nRelated changes:\n{context_text}")
+    parts.append("\n" + _SCHEMA_INSTRUCTION)
+    return "\n".join(parts)
+def extract_json(text: str) -> dict | None:
+    decoder = json.JSONDecoder()
+    def scan(candidate: str) -> dict | None:
+        for idx, char in enumerate(candidate):
+            if char != "{":
+                continue
+            try:
+                parsed, _ = decoder.raw_decode(candidate[idx:])
+            except json.JSONDecodeError:
+                continue
+            if isinstance(parsed, dict):
+                return parsed
+        return None
+    fence_start = 0
+    while True:
+        start = text.find("```", fence_start)
+        if start < 0:
+            break
+        body_start = text.find("\n", start + 3)
+        if body_start < 0:
+            break
+        end = text.find("```", body_start + 1)
+        if end < 0:
+            break
+        parsed = scan(text[body_start + 1 : end])
+        if parsed is not None:
+            return parsed
+        fence_start = end + 3
+    return scan(text)
+def _clamp(value: Any, allowed: set[str], default: str) -> str:
+    return value if value in allowed else default
+def _clean_findings(raw: Any) -> list[Finding]:
+    findings: list[Finding] = []
+    if not isinstance(raw, list):
+        return findings
+    for f in raw:
+        if not isinstance(f, dict):
+            continue
+        if not all(f.get(k) for k in ("title", "evidence", "risk", "recommendation")):
+            continue  # drop incomplete findings rather than fabricate fields
+        line = f.get("line")
+        line_end = f.get("line_end")
+        findings.append(
+            Finding(
+                severity=cast("Severity", _clamp(f.get("severity"), _VALID_SEVERITY, "low")),
+                title=str(f["title"]),
+                file=str(f["file"]) if f.get("file") else None,
+                line=line if isinstance(line, int) else None,
+                line_end=line_end if isinstance(line_end, int) else None,
+                evidence=str(f["evidence"]),
+                risk=str(f["risk"]),
+                recommendation=str(f["recommendation"]),
+            )
+        )
+    return findings
+def _error(info: ErrorInfo, meta: Meta) -> dict:
+    return ErrorResult(error=info, meta=meta).model_dump(mode="json", exclude_none=True)
+def apply_cost_usage(meta: Meta, env: dict) -> None:
+    """Plumb total_cost_usd / usage from a claude JSON envelope onto meta.
+    Used on both the success path and the non-zero-exit error path, so a failed
+    paid call (e.g. budget_exceeded) still reports what it spent when available."""
+    cost = env.get("total_cost_usd")
+    if isinstance(cost, (int, float)):
+        meta.cost_usd = float(cost)
+    raw_usage = env.get("usage")
+    if isinstance(raw_usage, dict):
+        meta.usage = Usage(
+            input_tokens=raw_usage.get("input_tokens"),
+            output_tokens=raw_usage.get("output_tokens"),
+            cache_read_input_tokens=raw_usage.get("cache_read_input_tokens"),
+            cache_creation_input_tokens=raw_usage.get("cache_creation_input_tokens"),
+        )
+def normalize_envelope(
+    tool: str,
+    stdout: str,
+    meta: Meta,
+    detail: str,
+    context_summary: ContextSummary | None = None,
+) -> dict:
+    try:
+        env = json.loads(stdout)
+    except json.JSONDecodeError:
+        return _error(
+            ErrorInfo(
+                code="invalid_json",
+                message="claude did not return valid JSON.",
+                repair="Retry; if it persists, reduce context size.",
+            ),
+            meta,
+        )
+    # Plumb cost and usage onto meta regardless of success/error path.
+    apply_cost_usage(meta, env)
+    if env.get("is_error") or env.get("subtype") not in cli_contract.SUCCESS_SUBTYPES:
+        detail = (env.get("result") or "").strip() or (env.get("subtype") or "unknown error")
+        # A drift signature can arrive as a zero-exit is_error envelope (not just a
+        # nonzero exit), so classify it the same way here.
+        if cli_contract.is_contract_drift(env.get("result"), env.get("subtype")):
+            return _error(contract_changed_error(), meta)
+        return _error(
+            ErrorInfo(
+                code="nonzero_exit",
+                message=f"claude reported an error: {detail[:200]}",
+                repair="Inspect the error; retry with a smaller or corrected request.",
+            ),
+            meta,
+        )
+    text = env.get("result", "") or ""
+    raw = RawResponse(
+        text=text if detail == "full" else None,
+        session_id=env.get("session_id"),
+        model=next(iter(env.get("modelUsage") or {}), None),
+    )
+    inner = extract_json(text)
+    # If Claude was blocked by denied tools AND produced nothing usable, surface it.
+    denials = env.get("permission_denials") or []
+    if denials and (inner is None and not text.strip()):
+        return _error(
+            ErrorInfo(
+                code="claude_permission_error",
+                message=f"claude was denied required tools: {str(denials)[:160]}",
+                repair="Use access=toolless, or allow the needed read-only tools.",
+            ),
+            meta,
+        )
+    if inner is None:
+        result = SuccessResult(
+            tool=tool,
+            summary=text.strip()[:500] or "(no content)",
+            verdict="unknown",
+            confidence="low",
+            raw_response=raw,
+            context_summary=context_summary if detail == "full" else None,
+            meta=meta,
+        )
+        if denials:
+            result.meta.permission_denials = denials
+        return result.model_dump(mode="json", exclude_none=True)
+    result = SuccessResult(
+        tool=tool,
+        summary=str(inner.get("summary", "")),
+        verdict=cast("Verdict", _clamp(inner.get("verdict"), _VALID_VERDICT, "unknown")),
+        confidence=cast("Confidence", _clamp(inner.get("confidence"), _VALID_CONFIDENCE, "low")),
+        findings=_clean_findings(inner.get("findings", [])),
+        questions=_str_list(inner.get("questions")),
+        assumptions=_str_list(inner.get("assumptions")),
+        next_steps=_str_list(inner.get("next_steps")),
+        raw_response=raw,
+        context_summary=context_summary if detail == "full" else None,
+        meta=meta,
+    )
+    if denials:
+        result.meta.permission_denials = denials
+    return result.model_dump(mode="json", exclude_none=True)

cc_plugin_codex/preflight.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""Feature-detect which `claude` flags exist, by parsing `claude --help` once.
+Only the HELP_GATED flags (depth/cosmetic) are gated on this probe: dropping one
+when absent keeps the server working across a minor upstream change. The
+guarantee-bearing ALWAYS_SEND flags are never gated here — their removal is caught
+loudly at run time (cli_contract_changed), not silently pre-empted, because
+`--help` parsing is fuzzy and a false negative must never drop a safety/cost flag.
+Everything degrades, nothing crashes: any probe failure yields help_parsed=False,
+which makes is_supported() return True for every flag (fail open == today's
+behavior)."""
+from __future__ import annotations
+import re
+import subprocess
+import time
+from dataclasses import dataclass
+from cc_plugin_codex import cli_contract
+_LONG_FLAG_RE = re.compile(r"--[a-z][a-z0-9-]+")
+@dataclass(frozen=True)
+class FlagSupport:
+    supported: frozenset[str]
+    help_parsed: bool  # False => probe failed; callers must fail open
+# Process-level cache: (monotonic_timestamp, FlagSupport). A long-lived MCP server
+# re-probes after HELP_CACHE_TTL_SECONDS so an in-place `claude` upgrade is noticed.
+_cache: tuple[float, FlagSupport] | None = None
+def _probe_help() -> str:
+    """Return the combined `claude --help` text, or "" on any failure. Never raises."""
+    try:
+        proc = subprocess.run(
+            [cli_contract.CLAUDE_BIN, *cli_contract.HELP_ARGS],
+            capture_output=True,
+            text=True,
+            timeout=10,
+            check=False,
+        )
+    except (OSError, subprocess.SubprocessError):
+        return ""
+    return f"{proc.stdout}\n{proc.stderr}"
+def _parse_supported(help_text: str) -> frozenset[str]:
+    """Extract long-flag names from help text. Deliberately tolerant: this only
+    governs HELP_GATED flags, where a stray/missing match drops a harmless flag."""
+    return frozenset(_LONG_FLAG_RE.findall(help_text))
+def flag_support(force: bool = False) -> FlagSupport:
+    """Cached FlagSupport for the installed `claude`. force=True bypasses the cache
+    (used by tests / diagnostics)."""
+    global _cache  # noqa: PLW0603 — intentional process-level memoization of the help probe
+    now = time.monotonic()
+    if not force and _cache is not None:
+        stamped, value = _cache
+        if now - stamped < cli_contract.HELP_CACHE_TTL_SECONDS:
+            return value
+    help_text = _probe_help()
+    if not help_text.strip():
+        value = FlagSupport(supported=frozenset(), help_parsed=False)
+    else:
+        value = FlagSupport(supported=_parse_supported(help_text), help_parsed=True)
+    _cache = (now, value)
+    return value
+def reset_cache() -> None:
+    """Drop the cached probe (used by tests)."""
+    global _cache  # noqa: PLW0603 — resets the intentional module-level cache
+    _cache = None
+def is_supported(flag: str, fs: FlagSupport) -> bool:
+    """Whether `flag` may be sent. Fails OPEN: when the probe could not run
+    (help_parsed=False) every flag is treated as supported, preserving today's
+    behavior."""
+    return (not fs.help_parsed) or (flag in fs.supported)
+def missing_expected_flags(fs: FlagSupport) -> list[str]:
+    """Guarantee-bearing ALWAYS_SEND flags that `--help` did not list. Empty when
+    the probe could not run (so we never warn on a failed probe). Diagnostic only —
+    surfaced by claude_status, it does NOT gate execution."""
+    if not fs.help_parsed:
+        return []
+    return sorted(f for f in cli_contract.ALWAYS_SEND_FLAGS if f not in fs.supported)

cc_plugin_codex/py.typed ADDED Viewed

File without changes

cc_plugin_codex/schemas.py ADDED Viewed

@@ -0,0 +1,344 @@
+"""Pydantic models for the normalized tool result contract."""
+from __future__ import annotations
+from typing import Literal
+from uuid import uuid4
+from pydantic import BaseModel, ConfigDict, Field, TypeAdapter
+# Bump this whenever the agent-visible surface changes: tool names, input or
+# output schemas, the ErrorCode set, the config_mode/access/scope/detail value
+# sets, or the capability guarantees in CAPABILITY_SUMMARY. Clients cache by it.
+FINGERPRINT = "cc-plugin-codex/0.1/schema-12"
+Severity = Literal["critical", "high", "medium", "low", "nit"]
+Verdict = Literal["pass", "concerns", "fail", "unknown"]
+Confidence = Literal["low", "medium", "high"]
+ConfigMode = Literal["inherit", "scoped", "bare"]
+Access = Literal["toolless", "readonly"]
+Scope = Literal["working_tree", "staged", "branch"]
+Detail = Literal["summary", "full"]
+Effort = Literal["low", "medium", "high", "xhigh", "max"]
+# Lifecycle states for a background job. Terminal: done|failed|cancelled|timeout.
+# (TTL-expired records are deleted and reported as job_not_found, not a state.)
+JobState = Literal["running", "done", "failed", "cancelled", "timeout"]
+def workspace_warning_for(source: str | None, cwd: str) -> str | None:
+    """Warning when the workspace was resolved from the server's own cwd.
+    The MCP server process launches from its install directory, so a cwd-resolved
+    workspace silently reviews the wrong repo. Surfacing this (rather than failing)
+    lets agents notice and pass workspace_root without breaking existing callers.
+    Shared by the sync meta builder and the background-job meta rebuild so the two
+    paths cannot drift."""
+    if source == "cwd":
+        return (
+            f"workspace resolved from the server's own cwd ({cwd}); pass "
+            "workspace_root (or configure an MCP root) to be sure the review "
+            "targets the intended repository"
+        )
+    return None
+ErrorCode = Literal[
+    "claude_not_found",
+    "claude_auth_required",
+    "api_key_missing",
+    "api_key_invalid",
+    "unsupported_config_mode",
+    "unsupported_access",
+    "invalid_scope",
+    "invalid_base",
+    "invalid_workspace_root",
+    "workspace_outside_roots",
+    "context_too_large",
+    "timeout",
+    "budget_exceeded",
+    "claude_permission_error",
+    "nonzero_exit",
+    "invalid_json",
+    "internal_error",
+    # The installed `claude` rejected a flag/value this plugin sends — its CLI
+    # contract drifted and the plugin likely needs an update.
+    "cli_contract_changed",
+    # Background-job lifecycle errors (claude_job_result for a non-done job):
+    "job_not_found",
+    "job_running",
+    "job_cancelled",
+    "job_timeout",
+    "job_failed",
+]
+class Usage(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    input_tokens: int | None = None
+    output_tokens: int | None = None
+    cache_read_input_tokens: int | None = None
+    cache_creation_input_tokens: int | None = None
+class Finding(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    severity: Severity
+    title: str
+    file: str | None = None
+    line: int | None = None
+    line_end: int | None = None  # end line when the finding spans a range (line = start)
+    evidence: str
+    risk: str
+    recommendation: str
+class RawResponse(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    text: str | None = None
+    session_id: str | None = None
+    model: str | None = None
+class ContextSummary(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    files_changed: int = 0
+    lines_added: int = 0
+    lines_removed: int = 0
+class Meta(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    cwd: str
+    workspace_source: str | None = None  # how cwd was resolved: param|roots|cwd
+    workspace_warning: str | None = None  # set when cwd was resolved from server cwd
+    config_mode: ConfigMode
+    access: Access
+    scope: str | None = None
+    base: str | None = None
+    timeout_seconds: int
+    elapsed_ms: int
+    # The effective (env-defaulted + clamped) value passed to claude as
+    # --max-budget-usd. It is a best-effort stop threshold, not a hard cap; compare
+    # against cost_usd to see how close actual spend came.
+    requested_max_budget_usd: float | None = None
+    truncated: bool = False
+    truncation_hint: str | None = None
+    command_exit_code: int | None = None
+    permission_denials: list | None = None
+    # Optional `claude` flags this server dropped because the installed CLI did not
+    # advertise them in --help (e.g. ["--effort"]). Empty in the common case;
+    # informational — guarantee-bearing flags are never dropped, only depth/cosmetic ones.
+    compat_warnings: list[str] = Field(default_factory=list)
+    redacted_paths: list[str] = Field(default_factory=list)
+    cost_usd: float | None = None
+    usage: Usage | None = None
+    job_id: str | None = None  # set on background-job results; None for sync calls
+    request_id: str = Field(default_factory=lambda: uuid4().hex)
+    fingerprint: str = FINGERPRINT
+class SuccessResult(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    ok: Literal[True] = True
+    tool: str
+    summary: str
+    verdict: Verdict
+    confidence: Confidence
+    findings: list[Finding] = Field(default_factory=list)
+    questions: list[str] = Field(default_factory=list)
+    assumptions: list[str] = Field(default_factory=list)
+    next_steps: list[str] = Field(default_factory=list)
+    raw_response: RawResponse = Field(default_factory=RawResponse)
+    context_summary: ContextSummary | None = None
+    meta: Meta
+class ErrorInfo(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    code: ErrorCode
+    message: str
+    repair: str
+    offending_param: str | None = None
+    retryable: bool = False
+class ErrorResult(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    ok: Literal[False] = False
+    error: ErrorInfo
+    meta: Meta
+class ResolvedDefaults(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    config_mode: ConfigMode
+    access: Access
+    model: str | None = None
+    effort: Effort
+    max_budget_usd: float
+    timeout_seconds: int
+    budget_bounds: list[float]  # [min, max] clamp range for max_budget_usd
+    timeout_bounds: list[int]  # [min, max] clamp range for timeout_seconds
+    practical_min_budget_hint: str
+class StatusResult(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    ok: Literal[True] = True
+    claude_found: bool
+    claude_version: str | None = None
+    # Readiness probes (all free — no paid Claude call):
+    claude_authenticated: bool | None = None  # None = could not determine
+    auth_detail: str | None = None
+    version_supported: bool | None = None  # major is in supported_majors()
+    # Set when version_supported is False: a major outside the tested range is
+    # advisory, not fatal — tools may still work, so we warn instead of blocking.
+    version_warning: str | None = None
+    # Set when `claude --help` did not list a guarantee-bearing flag this plugin
+    # sends — an early, free signal that the CLI contract drifted.
+    flags_warning: str | None = None
+    ready: bool = False  # found AND authenticated (version is advisory, not gating)
+    config_modes_available: dict
+    resolved_defaults: ResolvedDefaults
+    caveat: str
+    fingerprint: str = FINGERPRINT
+class ToolCapability(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    name: str
+    cost: Literal["free", "paid"]
+    use_when: str
+    required_params: list[str] = Field(default_factory=list)
+    key_optional_params: list[str] = Field(default_factory=list)
+    returns: str
+class CapabilitiesResult(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    ok: Literal[True] = True
+    name: str
+    version: str
+    fingerprint: str = FINGERPRINT
+    transport: str
+    stability: str
+    paid_tools: list[str]
+    free_tools: list[str]
+    tool_details: list[ToolCapability] = Field(default_factory=list)
+    config_modes: list[str]
+    access_modes: list[str]
+    scope: list[str]  # what this server is for
+    negative_scope: list[str]  # what it deliberately does NOT do
+    prerequisites: list[str]
+    deprecation_policy: str
+class JobStarted(BaseModel):
+    """Returned by the *_async tools: a handle to poll, not a result."""
+    model_config = ConfigDict(extra="forbid")
+    ok: Literal[True] = True
+    job_id: str
+    kind: str  # the tool the job runs, e.g. claude_review_changes
+    status: JobState = "running"
+    started_at: str  # ISO-8601 UTC
+    deadline_seconds: int  # wall-clock cap after which a poll reaps the job
+    poll_after_ms: int = 1000
+    ttl_seconds: int
+    expires_at: str | None = None
+    meta: Meta
+    fingerprint: str = FINGERPRINT
+class JobStatus(BaseModel):
+    """Returned by claude_job_status: lifecycle state without the full result."""
+    model_config = ConfigDict(extra="forbid")
+    ok: Literal[True] = True
+    job_id: str
+    kind: str
+    status: JobState
+    started_at: str
+    elapsed_ms: int
+    deadline_seconds: int
+    poll_after_ms: int = 1000
+    ttl_seconds: int
+    expires_at: str | None = None
+    result_available: bool = False  # true once status == done
+    cost_usd: float | None = None  # populated for terminal jobs that spent
+    detail: str | None = None  # short human hint (e.g. failure reason)
+    fingerprint: str = FINGERPRINT
+class DryRunResult(BaseModel):
+    """Free preview of what a diff review WOULD send — no Claude call, no spend."""
+    model_config = ConfigDict(extra="forbid")
+    ok: Literal[True] = True
+    tool: Literal["claude_review_dry_run"] = "claude_review_dry_run"
+    cwd: str
+    workspace_source: str | None = None
+    workspace_warning: str | None = None
+    scope: str
+    base: str | None = None
+    context_summary: ContextSummary
+    diff_bytes: int  # full UTF-8 size of the redacted diff that would be sent
+    max_diff_bytes: int  # the server's truncation threshold
+    truncated: bool = False  # true when diff_bytes > max_diff_bytes
+    truncation_hint: str | None = None
+    redacted_paths_count: int = 0
+    redacted_paths: list[str] = Field(default_factory=list)
+    fingerprint: str = FINGERPRINT
+class JobSummary(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    job_id: str
+    kind: str
+    status: JobState
+    started_at: str
+    elapsed_ms: int
+    result_available: bool = False
+    expires_at: str | None = None
+    cost_usd: float | None = None
+class JobListResult(BaseModel):
+    """Returned by claude_job_list: the workspace's known jobs, newest first."""
+    model_config = ConfigDict(extra="forbid")
+    ok: Literal[True] = True
+    jobs: list[JobSummary] = Field(default_factory=list)
+    fingerprint: str = FINGERPRINT
+def _object_union_schema(adapter: TypeAdapter) -> dict:
+    """Wrap a model union's anyOf in a top-level object schema.
+    MCP/FastMCP require an output schema whose top level is ``type: object``;
+    a bare ``anyOf`` is rejected. We keep the discriminating ``ok`` key visible
+    at the top and carry the full branch schemas (and their $defs) underneath.
+    """
+    union = adapter.json_schema()
+    return {
+        "type": "object",
+        "properties": {
+            "ok": {"type": "boolean", "description": "true = success result, false = error result"},
+        },
+        "required": ["ok"],
+        "anyOf": union["anyOf"],
+        "$defs": union.get("$defs", {}),
+    }
+# Advertised output schemas (convention: a discriminated ok:true|false union).
+RESULT_SCHEMA = _object_union_schema(TypeAdapter(SuccessResult | ErrorResult))
+STATUS_SCHEMA = StatusResult.model_json_schema()
+CAPABILITIES_SCHEMA = CapabilitiesResult.model_json_schema()
+# A failed *_async launch returns the error envelope; an empty diff returns a
+# SuccessResult without starting a job.
+JOB_STARTED_SCHEMA = _object_union_schema(TypeAdapter(JobStarted | SuccessResult | ErrorResult))
+JOB_STATUS_SCHEMA = _object_union_schema(TypeAdapter(JobStatus | ErrorResult))
+# Dry-run and job-list can fail (bad scope/base/workspace), so advertise the union.
+DRY_RUN_SCHEMA = _object_union_schema(TypeAdapter(DryRunResult | ErrorResult))
+JOB_LIST_SCHEMA = _object_union_schema(TypeAdapter(JobListResult | ErrorResult))