npm - tanuki-telemetry - Versions diffs - 1.1.0 - Mend

tanuki-telemetry 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/Dockerfile +22 -0
package/bin/tanuki.mjs +251 -0
package/frontend/eslint.config.js +23 -0
package/frontend/index.html +13 -0
package/frontend/package.json +39 -0
package/frontend/src/App.tsx +232 -0
package/frontend/src/assets/hero.png +0 -0
package/frontend/src/assets/react.svg +1 -0
package/frontend/src/assets/vite.svg +1 -0
package/frontend/src/components/ArtifactsPanel.tsx +429 -0
package/frontend/src/components/ChildStreams.tsx +176 -0
package/frontend/src/components/CoordinatorPage.tsx +317 -0
package/frontend/src/components/Header.tsx +108 -0
package/frontend/src/components/InsightsPanel.tsx +142 -0
package/frontend/src/components/IterationsTable.tsx +98 -0
package/frontend/src/components/KnowledgePage.tsx +308 -0
package/frontend/src/components/LoginPage.tsx +55 -0
package/frontend/src/components/PlanProgress.tsx +163 -0
package/frontend/src/components/QualityReport.tsx +276 -0
package/frontend/src/components/ScreenshotUpload.tsx +117 -0
package/frontend/src/components/ScreenshotsGrid.tsx +266 -0
package/frontend/src/components/SessionDetail.tsx +265 -0
package/frontend/src/components/SessionList.tsx +234 -0
package/frontend/src/components/SettingsPage.tsx +213 -0
package/frontend/src/components/StreamComms.tsx +228 -0
package/frontend/src/components/TanukiLogo.tsx +16 -0
package/frontend/src/components/Timeline.tsx +416 -0
package/frontend/src/components/WalkthroughPage.tsx +458 -0
package/frontend/src/hooks/useApi.ts +81 -0
package/frontend/src/hooks/useAuth.ts +54 -0
package/frontend/src/hooks/useKnowledge.ts +33 -0
package/frontend/src/hooks/useWebSocket.ts +95 -0
package/frontend/src/index.css +66 -0
package/frontend/src/lib/api.ts +15 -0
package/frontend/src/lib/utils.ts +58 -0
package/frontend/src/main.tsx +10 -0
package/frontend/src/types.ts +181 -0
package/frontend/tsconfig.app.json +32 -0
package/frontend/tsconfig.json +7 -0
package/frontend/vite.config.ts +25 -0
package/install.sh +87 -0
package/package.json +63 -0
package/src/api-keys.ts +97 -0
package/src/auth.ts +165 -0
package/src/coordinator.ts +136 -0
package/src/dashboard-server.ts +5 -0
package/src/dashboard.ts +826 -0
package/src/db.ts +1009 -0
package/src/index.ts +20 -0
package/src/middleware.ts +76 -0
package/src/tools.ts +864 -0
package/src/types-shim.d.ts +18 -0
package/src/types.ts +171 -0
package/tsconfig.json +19 -0

package/src/tools.ts ADDED Viewed

@@ -0,0 +1,864 @@
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { z } from "zod";
+import { v4 as uuidv4 } from "uuid";
+import {
+  createSession,
+  insertEvent,
+  insertIteration,
+  insertScreenshot,
+  insertArtifact,
+  insertInsight,
+  validateInsight,
+  getInsightsForContext,
+  createPlan,
+  updatePlanStep,
+  getPlanSteps,
+  endSession,
+  getSessionSummary,
+  listSessions,
+  getComparisonResults,
+  createWalkthrough,
+  insertWalkthroughAction,
+  insertWalkthroughScreenshot,
+  endWalkthrough,
+} from "./db.js";
+import type { FinalResult, Session, SessionSummary, Insight, PlanStep } from "./types.js";
+import {
+  saveCoordinatorState,
+  getCoordinatorState,
+  getLatestCoordinatorSession,
+  listCoordinatorSessions,
+  compactCoordinatorContext,
+  getCoordinatorHistory,
+} from "./coordinator.js";
+function jsonResponse(data: unknown): { content: Array<{ type: "text"; text: string }> } {
+  return {
+    content: [{ type: "text" as const, text: JSON.stringify(data, null, 2) }],
+  };
+}
+function formatSessionSummary(summary: SessionSummary): string {
+  const { session, events, iterations, screenshots } = summary;
+  const finalResult = session.final_result
+    ? JSON.parse(session.final_result)
+    : null;
+  let md = `# Session Summary\n\n`;
+  md += `- **Session ID:** ${session.id}\n`;
+  md += `- **Worktree:** ${session.worktree_name}\n`;
+  if (session.ticket_id)
+    md += `- **Ticket:** ${session.ticket_id} — ${session.ticket_title ?? ""}\n`;
+  if (session.branch_name) md += `- **Branch:** ${session.branch_name}\n`;
+  md += `- **Mode:** ${session.mode}\n`;
+  md += `- **Status:** ${session.status}\n`;
+  md += `- **Iterations:** ${session.total_iterations} / ${session.max_iterations}\n`;
+  md += `- **Tokens:** ${session.total_input_tokens} in / ${session.total_output_tokens} out\n`;
+  md += `- **Started:** ${session.started_at}\n`;
+  if (session.ended_at) md += `- **Ended:** ${session.ended_at}\n`;
+  if (session.duration_seconds != null)
+    md += `- **Duration:** ${session.duration_seconds}s\n`;
+  if (finalResult) {
+    md += `\n## Final Result\n\n`;
+    md += `- Tests: ${finalResult.tests_passed ? "PASS" : "FAIL"}\n`;
+    md += `- Lint: ${finalResult.lint_passed ? "PASS" : "FAIL"}\n`;
+    md += `- Typecheck: ${finalResult.typecheck_passed ? "PASS" : "FAIL"}\n`;
+    if (finalResult.pr_url) md += `- PR: ${finalResult.pr_url}\n`;
+  }
+  if (events.length > 0) {
+    md += `\n## Events (${events.length})\n\n`;
+    for (const e of events) {
+      md += `- **[${e.timestamp}] [${e.phase}] ${e.event_type}:** ${e.message}\n`;
+    }
+  }
+  if (iterations.length > 0) {
+    md += `\n## Iterations (${iterations.length})\n\n`;
+    for (const it of iterations) {
+      md += `### Iteration ${it.iteration_number} — ${it.result.toUpperCase()}\n`;
+      md += `- **Trigger:** ${it.trigger}\n`;
+      md += `- **Error:** ${it.error_summary}\n`;
+      if (it.fix_description) md += `- **Fix:** ${it.fix_description}\n`;
+      if (it.files_changed) {
+        const files = JSON.parse(it.files_changed) as string[];
+        md += `- **Files:** ${files.join(", ")}\n`;
+      }
+      if (it.duration_seconds != null)
+        md += `- **Duration:** ${it.duration_seconds}s\n`;
+      md += `\n`;
+    }
+  }
+  if (screenshots.length > 0) {
+    md += `\n## Screenshots (${screenshots.length})\n\n`;
+    for (const s of screenshots) {
+      md += `- **[${s.phase}]** ${s.description} — \`${s.file_path}\`\n`;
+    }
+  }
+  const artifacts = summary.artifacts || [];
+  if (artifacts.length > 0) {
+    md += `\n## Artifacts (${artifacts.length})\n\n`;
+    for (const a of artifacts) {
+      md += `- **[${a.artifact_type}]** ${a.description} — \`${a.file_path}\` (${a.mime_type || "unknown"})\n`;
+    }
+  }
+  const insights = summary.insights || [];
+  if (insights.length > 0) {
+    md += `\n## Insights (${insights.length})\n\n`;
+    for (const ins of insights) {
+      md += `### [${ins.insight_type}] ${ins.title}\n`;
+      md += `- **Category:** ${ins.category}\n`;
+      md += `- **Confidence:** ${(ins.confidence * 100).toFixed(0)}%\n`;
+      md += `- ${ins.description}\n`;
+      if (ins.evidence) md += `- **Evidence:** ${ins.evidence}\n`;
+      md += `\n`;
+    }
+  }
+  return md;
+}
+export function registerTools(server: McpServer, userEmail?: string): void {
+  // userEmail is injected from the MCP connection's API key auth.
+  // It's always set when auth is enabled, and falls back to "local@localhost" otherwise.
+  server.tool(
+    "log_session_start",
+    "Creates a new telemetry session for tracking an autonomous workflow",
+    {
+      worktree_name: z.string().describe("Name of the worktree"),
+      ticket_id: z.string().optional().describe("Linear ticket ID"),
+      ticket_title: z.string().optional().describe("Ticket title"),
+      branch_name: z.string().optional().describe("Git branch name"),
+      mode: z
+        .enum(["local", "remote"])
+        .optional()
+        .default("local")
+        .describe("Execution mode"),
+      max_iterations: z
+        .number()
+        .optional()
+        .default(10)
+        .describe("Maximum fix iterations allowed"),
+      parent_session_id: z
+        .string()
+        .optional()
+        .describe("Parent session ID — set this when spawning a child stream from a parent session"),
+    },
+    async (params) => {
+      const session_id = uuidv4();
+      createSession(
+        session_id,
+        params.worktree_name,
+        params.ticket_id,
+        params.ticket_title,
+        params.branch_name,
+        params.mode,
+        params.max_iterations,
+        params.parent_session_id,
+        userEmail
+      );
+      return jsonResponse({ session_id, user_email: userEmail });
+    }
+  );
+  server.tool(
+    "log_event",
+    "Logs a significant action, decision, or error during a session",
+    {
+      session_id: z.string().describe("Session ID"),
+      phase: z
+        .string()
+        .describe(
+          "Current phase: setup, scope, implementation, verification, deliverables"
+        ),
+      event_type: z
+        .enum(["decision", "action", "error", "fix", "info", "phase_change", "review_pass", "review_flag", "review_dispatch", "error_resolve", "cost_checkpoint", "pattern_detect", "knowledge_extract"])
+        .describe("Type of event"),
+      message: z.string().describe("Human-readable event description"),
+      metadata: z
+        .union([z.record(z.string(), z.unknown()), z.string()])
+        .optional()
+        .transform((val) => {
+          if (typeof val === "string") {
+            try { return JSON.parse(val) as Record<string, unknown>; } catch { return { raw: val }; }
+          }
+          return val;
+        })
+        .describe("Additional context as JSON"),
+      tokens_used: z.coerce.number().optional().describe("Tokens consumed by this action (legacy — prefer input_tokens/output_tokens)"),
+      input_tokens: z.coerce.number().optional().describe("Input tokens consumed by this action"),
+      output_tokens: z.coerce.number().optional().describe("Output tokens consumed by this action"),
+    },
+    async (params) => {
+      const event_id = insertEvent(
+        params.session_id,
+        params.phase,
+        params.event_type,
+        params.message,
+        params.metadata,
+        params.tokens_used,
+        params.input_tokens,
+        params.output_tokens
+      );
+      return jsonResponse({ event_id });
+    }
+  );
+  server.tool(
+    "log_iteration",
+    "Logs a test-fix iteration cycle",
+    {
+      session_id: z.string().describe("Session ID"),
+      iteration_number: z.coerce.number().describe("Iteration number (1-based)"),
+      trigger: z
+        .string()
+        .describe("What failed: test, lint, typecheck"),
+      error_summary: z.string().describe("Summary of the error"),
+      fix_description: z
+        .string()
+        .optional()
+        .describe("Description of the fix applied"),
+      files_changed: z
+        .array(z.string())
+        .optional()
+        .describe("List of file paths changed"),
+      result: z
+        .enum(["pass", "fail", "partial"])
+        .describe("Outcome of this iteration"),
+      duration_seconds: z
+        .number()
+        .optional()
+        .describe("How long the iteration took"),
+    },
+    async (params) => {
+      const iteration_id = insertIteration(
+        params.session_id,
+        params.iteration_number,
+        params.trigger,
+        params.error_summary,
+        params.fix_description,
+        params.files_changed,
+        params.result,
+        params.duration_seconds
+      );
+      return jsonResponse({ iteration_id });
+    }
+  );
+  server.tool(
+    "log_screenshot",
+    "Associates a screenshot file with a session. Copies the file into telemetry storage and generates a thumbnail for the dashboard.",
+    {
+      session_id: z.string().describe("Session ID"),
+      iteration_number: z
+        .coerce.number()
+        .optional()
+        .describe("Iteration number, or null for final screenshots"),
+      phase: z.string().describe("Phase when screenshot was taken"),
+      description: z.string().describe("What the screenshot shows"),
+      file_path: z.string().describe("Path to the screenshot file on host"),
+      event_id: z.coerce.number().optional().describe("ID of the event this screenshot belongs to — links screenshot to a specific timeline event"),
+    },
+    async (params) => {
+      const screenshot_id = insertScreenshot(
+        params.session_id,
+        params.iteration_number,
+        params.phase,
+        params.description,
+        params.file_path,
+        params.event_id
+      );
+      return jsonResponse({ screenshot_id });
+    }
+  );
+  server.tool(
+    "log_artifact",
+    "Logs a file artifact (template, rubric, report, PPTX, etc.) associated with a session. Copies the file into telemetry storage for serving via the dashboard.",
+    {
+      session_id: z.string().describe("Session ID"),
+      file_path: z.string().describe("Path to the artifact file on host"),
+      artifact_type: z.string().describe("Type of artifact: template, rubric, report, pptx, summary, config, output, etc."),
+      description: z.string().describe("What the artifact is / what it contains"),
+      metadata: z
+        .union([z.record(z.string(), z.unknown()), z.string()])
+        .optional()
+        .transform((val) => {
+          if (typeof val === "string") {
+            try { return JSON.parse(val) as Record<string, unknown>; } catch { return { raw: val }; }
+          }
+          return val;
+        })
+        .describe("Additional context as JSON"),
+      event_id: z.coerce.number().optional().describe("ID of the event this artifact belongs to"),
+    },
+    async (params) => {
+      const artifact_id = insertArtifact(
+        params.session_id,
+        params.file_path,
+        params.artifact_type,
+        params.description,
+        params.metadata,
+        params.event_id
+      );
+      return jsonResponse({ artifact_id });
+    }
+  );
+  server.tool(
+    "log_session_end",
+    "Closes a session with final status and stats",
+    {
+      session_id: z.string().describe("Session ID"),
+      status: z
+        .enum(["completed", "failed", "interrupted"])
+        .describe("Final session status"),
+      total_input_tokens: z
+        .coerce.number()
+        .optional()
+        .describe("Total input tokens used (overrides accumulated count if larger)"),
+      total_output_tokens: z
+        .coerce.number()
+        .optional()
+        .describe("Total output tokens used (overrides accumulated count if larger)"),
+      final_result: z
+        .object({
+          tests_passed: z.boolean().optional(),
+          lint_passed: z.boolean().optional(),
+          typecheck_passed: z.boolean().optional(),
+          pr_url: z.string().optional(),
+          pr_number: z.coerce.number().optional(),
+        })
+        .optional()
+        .describe("Final verification results"),
+    },
+    async (params) => {
+      const result = endSession(
+        params.session_id,
+        params.status,
+        params.total_input_tokens,
+        params.total_output_tokens,
+        params.final_result as FinalResult | undefined
+      );
+      return jsonResponse(result);
+    }
+  );
+  server.tool(
+    "get_session_summary",
+    "Retrieves a full session summary with all events, iterations, and screenshots",
+    {
+      session_id: z
+        .string()
+        .optional()
+        .describe("Session ID — omit to get the most recent session"),
+    },
+    async (params) => {
+      const summary = getSessionSummary(params.session_id);
+      if (!summary) {
+        return jsonResponse({ error: "No session found" });
+      }
+      const markdown = formatSessionSummary(summary);
+      return { content: [{ type: "text" as const, text: markdown }] };
+    }
+  );
+  server.tool(
+    "log_insight",
+    "Records a learning/insight from session reflection — mistakes, patterns, gotchas, or rules discovered during autonomous work",
+    {
+      session_id: z.string().describe("Session ID this insight came from"),
+      insight_type: z
+        .enum(["mistake", "success_pattern", "codebase_gotcha", "optimization", "rule_learned"])
+        .describe("Type of insight"),
+      category: z
+        .string()
+        .describe("Domain category: e.g., 'auth', 'testing', 'react-patterns', 'api-design', 'typescript', 'lint', 'state-management'"),
+      title: z.string().describe("Short title for this insight"),
+      description: z
+        .string()
+        .describe("Full description — what happened, why it matters, what to do differently"),
+      evidence: z
+        .string()
+        .optional()
+        .describe("The specific event/error/code that led to this insight"),
+      confidence: z
+        .number()
+        .min(0)
+        .max(1)
+        .optional()
+        .default(0.5)
+        .describe("How confident this insight is correct (0-1). Increases as it gets validated across sessions"),
+      file_patterns: z
+        .array(z.string())
+        .optional()
+        .describe("File glob patterns this insight applies to, e.g. ['src/components/**', '*.test.ts']"),
+      error_patterns: z
+        .array(z.string())
+        .optional()
+        .describe("Error message patterns that trigger this insight, e.g. ['Cannot read property', 'Type.*not assignable']"),
+    },
+    async (params) => {
+      const insight_id = insertInsight(
+        params.session_id,
+        params.insight_type,
+        params.category,
+        params.title,
+        params.description,
+        params.evidence,
+        params.confidence,
+        params.file_patterns,
+        params.error_patterns
+      );
+      return jsonResponse({ insight_id });
+    }
+  );
+  server.tool(
+    "validate_insight",
+    "Confirms an existing insight was relevant/correct in a new session — increases its confidence score",
+    {
+      insight_id: z.coerce.number().describe("ID of the insight to validate"),
+    },
+    async (params) => {
+      validateInsight(params.insight_id);
+      return jsonResponse({ validated: true });
+    }
+  );
+  server.tool(
+    "query_knowledge",
+    "Retrieves accumulated insights/learnings to inform the current session — call this at the start of work to learn from past mistakes",
+    {
+      category: z
+        .string()
+        .optional()
+        .describe("Filter by category (e.g., 'auth', 'testing', 'react-patterns')"),
+      file_pattern: z
+        .string()
+        .optional()
+        .describe("Filter by file pattern — finds insights related to files matching this pattern"),
+      limit: z.coerce.number().optional().default(20).describe("Max insights to return"),
+    },
+    async (params) => {
+      const insights = getInsightsForContext(
+        params.category,
+        params.file_pattern,
+        params.limit
+      );
+      if (insights.length === 0) {
+        return { content: [{ type: "text" as const, text: "No insights found for this context. This is a fresh area — be extra careful and log what you learn." }] };
+      }
+      let md = `# Knowledge Base — ${insights.length} insights\n\n`;
+      md += `Sorted by confidence (validated across sessions).\n\n`;
+      for (const ins of insights) {
+        const icon =
+          ins.insight_type === "mistake" ? "⚠" :
+          ins.insight_type === "success_pattern" ? "✓" :
+          ins.insight_type === "codebase_gotcha" ? "!" :
+          ins.insight_type === "optimization" ? "→" :
+          "§";
+        md += `### ${icon} [${ins.insight_type}] ${ins.title}\n`;
+        md += `**Category:** ${ins.category} | **Confidence:** ${(ins.confidence * 100).toFixed(0)}% | **Validated:** ${ins.times_validated}x\n`;
+        md += `${ins.description}\n`;
+        if (ins.evidence) md += `**Evidence:** ${ins.evidence}\n`;
+        if (ins.file_patterns) md += `**Applies to:** ${JSON.parse(ins.file_patterns).join(", ")}\n`;
+        if (ins.error_patterns) md += `**Error patterns:** ${JSON.parse(ins.error_patterns).join(", ")}\n`;
+        md += `\n---\n\n`;
+      }
+      return { content: [{ type: "text" as const, text: md }] };
+    }
+  );
+  server.tool(
+    "create_plan",
+    "Creates an implementation plan with numbered steps — the agent updates each step as it progresses. Steps appear live on the dashboard.",
+    {
+      session_id: z.string().describe("Session ID"),
+      steps: z.array(z.object({
+        step_number: z.coerce.number().describe("Step number (1-based, used for ordering)"),
+        title: z.string().describe("Short title for this step"),
+        description: z.string().optional().describe("Detailed description of what this step involves"),
+        parent_step: z.coerce.number().optional().describe("Parent step number if this is a sub-step"),
+        file_targets: z.array(z.string()).optional().describe("Files this step will touch"),
+      })).describe("Array of plan steps"),
+    },
+    async (params) => {
+      const ids = createPlan(params.session_id, params.steps);
+      return jsonResponse({ step_ids: ids, total_steps: ids.length });
+    }
+  );
+  server.tool(
+    "update_plan_step",
+    "Updates the status of a plan step — call this as you start, complete, skip, or fail each step",
+    {
+      step_id: z.coerce.number().describe("Step ID (returned by create_plan)"),
+      status: z.enum(["pending", "in_progress", "completed", "skipped", "failed"]).describe("New status"),
+      outcome: z.string().optional().describe("What happened — result summary, error message, or reason for skip"),
+    },
+    async (params) => {
+      updatePlanStep(params.step_id, params.status, params.outcome);
+      return jsonResponse({ updated: true });
+    }
+  );
+  server.tool(
+    "get_plan",
+    "Retrieves the current plan for a session — use this to check what's next or review progress",
+    {
+      session_id: z.string().describe("Session ID"),
+    },
+    async (params) => {
+      const steps = getPlanSteps(params.session_id);
+      if (steps.length === 0) {
+        return { content: [{ type: "text" as const, text: "No plan found for this session." }] };
+      }
+      const completed = steps.filter(s => s.status === "completed").length;
+      const failed = steps.filter(s => s.status === "failed").length;
+      const inProgress = steps.filter(s => s.status === "in_progress").length;
+      const pending = steps.filter(s => s.status === "pending").length;
+      let md = `# Plan Progress: ${completed}/${steps.length} completed\n\n`;
+      if (failed > 0) md += `**${failed} failed** | `;
+      if (inProgress > 0) md += `**${inProgress} in progress** | `;
+      md += `**${pending} pending**\n\n`;
+      for (const step of steps) {
+        const icon =
+          step.status === "completed" ? "[x]" :
+          step.status === "in_progress" ? "[>]" :
+          step.status === "failed" ? "[!]" :
+          step.status === "skipped" ? "[-]" :
+          "[ ]";
+        const indent = step.parent_step ? "  " : "";
+        md += `${indent}${icon} **Step ${step.step_number}:** ${step.title}\n`;
+        if (step.description) md += `${indent}    ${step.description}\n`;
+        if (step.outcome) md += `${indent}    → ${step.outcome}\n`;
+        if (step.file_targets) {
+          const files = JSON.parse(step.file_targets) as string[];
+          md += `${indent}    files: ${files.join(", ")}\n`;
+        }
+        if (step.duration_seconds != null) md += `${indent}    (${step.duration_seconds}s)\n`;
+        md += `\n`;
+      }
+      return { content: [{ type: "text" as const, text: md }] };
+    }
+  );
+  server.tool(
+    "list_sessions",
+    "Browse past telemetry sessions. By default only shows top-level sessions (not children). Pass parent_session_id to list child streams. Pass user_email to filter by user.",
+    {
+      limit: z.coerce.number().optional().default(10).describe("Max sessions to return"),
+      status: z
+        .string()
+        .optional()
+        .describe("Filter by status: in_progress, completed, failed, interrupted"),
+      parent_session_id: z
+        .string()
+        .optional()
+        .describe("Filter to children of this parent session ID"),
+      user_email: z
+        .string()
+        .optional()
+        .describe("Filter to sessions owned by this user email"),
+    },
+    async (params) => {
+      const sessions = listSessions(params.limit, params.status, params.parent_session_id, params.user_email);
+      const rows = sessions.map((s) => ({
+        id: s.id,
+        worktree: s.worktree_name,
+        ticket: s.ticket_id ?? "-",
+        status: s.status,
+        iterations: `${s.total_iterations}/${s.max_iterations}`,
+        tokens: `${s.total_input_tokens} in / ${s.total_output_tokens} out`,
+        duration: s.duration_seconds != null ? `${s.duration_seconds}s` : "ongoing",
+        started: s.started_at,
+        user: (s as Session & { user_email?: string }).user_email ?? "-",
+      }));
+      return jsonResponse(rows);
+    }
+  );
+  // --- Walkthrough tools ---
+  server.tool(
+    "log_walkthrough_start",
+    "Creates a new walkthrough session for tracking a UI walkthrough/scenario run",
+    {
+      url: z.string().describe("Starting URL of the walkthrough"),
+      app_name: z.string().optional().describe("Application name (e.g., 'my-app')"),
+      scenario: z.string().optional().describe("Scenario name (e.g., 'login-happy')"),
+    },
+    async (params) => {
+      const walkthrough_id = createWalkthrough(params.url, params.app_name, params.scenario);
+      return jsonResponse({ walkthrough_id });
+    }
+  );
+  server.tool(
+    "log_walkthrough_action",
+    "Logs an action taken during a walkthrough — navigate, click, type, assert, wait, or screenshot",
+    {
+      walkthrough_id: z.coerce.number().describe("Walkthrough ID"),
+      action_type: z.enum(["navigate", "click", "type", "assert", "wait", "screenshot"]).describe("Type of action"),
+      target: z.string().describe("Target selector or URL"),
+      value: z.string().optional().describe("Value typed or assertion expected"),
+      status: z.enum(["pass", "fail"]).describe("Whether the action succeeded"),
+      message: z.string().optional().describe("Additional context or error message"),
+    },
+    async (params) => {
+      const action_id = insertWalkthroughAction(
+        params.walkthrough_id,
+        params.action_type,
+        params.target,
+        params.value,
+        params.status,
+        params.message
+      );
+      return jsonResponse({ action_id });
+    }
+  );
+  server.tool(
+    "log_walkthrough_screenshot",
+    "Uploads a screenshot taken during a walkthrough. Provide either file_path (preferred — path to screenshot on disk) or image_data (base64-encoded).",
+    {
+      walkthrough_id: z.coerce.number().describe("Walkthrough ID"),
+      name: z.string().describe("Screenshot name (e.g., 'login-page')"),
+      file_path: z.string().optional().describe("Path to screenshot file on disk (preferred over base64)"),
+      image_data: z.string().optional().describe("Base64-encoded image data (fallback if file_path not available)"),
+      annotation: z.string().optional().describe("Description of what the screenshot shows"),
+      action_id: z.coerce.number().optional().describe("ID of the action this screenshot belongs to"),
+    },
+    async (params) => {
+      const screenshot_id = insertWalkthroughScreenshot(
+        params.walkthrough_id,
+        params.name,
+        params.image_data,
+        params.annotation,
+        params.action_id,
+        params.file_path
+      );
+      return jsonResponse({ screenshot_id });
+    }
+  );
+  server.tool(
+    "log_walkthrough_end",
+    "Finalizes a walkthrough with pass/fail status and summary stats",
+    {
+      walkthrough_id: z.coerce.number().describe("Walkthrough ID"),
+      status: z.enum(["pass", "fail", "partial"]).describe("Overall walkthrough result"),
+      summary: z.string().describe("Summary of walkthrough results"),
+      total_actions: z.coerce.number().optional().describe("Total number of actions taken"),
+      passed: z.coerce.number().optional().describe("Number of actions that passed"),
+      failed: z.coerce.number().optional().describe("Number of actions that failed"),
+    },
+    async (params) => {
+      endWalkthrough(
+        params.walkthrough_id,
+        params.status,
+        params.summary,
+        params.total_actions,
+        params.passed,
+        params.failed
+      );
+      return jsonResponse({ ended: true });
+    }
+  );
+  // --- Coordinator tools ---
+  server.tool(
+    "save_coordinator_state",
+    "Save or update the coordinator session state. Merges with existing state — partial updates are fine. Use this to track workspace statuses, pending tasks, and decisions.",
+    {
+      session_id: z.string().describe("Coordinator session ID"),
+      workspaces: z
+        .record(z.string(), z.object({
+          name: z.string(),
+          status: z.enum(["idle", "working", "done", "failed", "paused"]),
+          last_task: z.string().optional(),
+          pending: z.array(z.string()).optional(),
+          session_id: z.string().optional(),
+          last_updated: z.string().optional(),
+        }))
+        .optional()
+        .describe("Workspace states keyed by workspace ID"),
+      pending_tasks: z.array(z.string()).optional().describe("Tasks queued for dispatch"),
+      decisions: z.array(z.string()).optional().describe("Key decisions made this session"),
+      notes: z.string().optional().describe("Free-form notes"),
+    },
+    async (params) => {
+      const state = saveCoordinatorState(params.session_id, {
+        workspaces: params.workspaces,
+        pending_tasks: params.pending_tasks,
+        decisions: params.decisions,
+        notes: params.notes,
+      });
+      return jsonResponse({ saved: true, workspace_count: Object.keys(state.workspaces).length });
+    }
+  );
+  server.tool(
+    "get_coordinator_state",
+    "Load coordinator session state. Omit session_id to get the most recent coordinator session. Returns full state with workspaces, pending tasks, decisions, notes, and context history.",
+    {
+      session_id: z.string().optional().describe("Coordinator session ID — omit for most recent"),
+    },
+    async (params) => {
+      const state = params.session_id
+        ? getCoordinatorState(params.session_id)
+        : getLatestCoordinatorSession();
+      if (!state) {
+        return { content: [{ type: "text" as const, text: "No coordinator session found. Create one with save_coordinator_state." }] };
+      }
+      const history = getCoordinatorHistory(state.session_id);
+      let md = `# Coordinator Session: ${state.session_id}\n\n`;
+      md += `**Started:** ${state.started_at}\n`;
+      md += `**Last updated:** ${state.last_updated}\n\n`;
+      // Workspaces
+      md += `## Workspaces (${Object.keys(state.workspaces).length})\n\n`;
+      for (const [id, ws] of Object.entries(state.workspaces)) {
+        const icon = ws.status === "done" ? "[x]" : ws.status === "working" ? "[>]" : ws.status === "failed" ? "[!]" : ws.status === "paused" ? "[-]" : "[ ]";
+        md += `${icon} **${id}** — ${ws.name} (${ws.status})`;
+        if (ws.last_task) md += ` — last: ${ws.last_task}`;
+        md += `\n`;
+        if (ws.pending && ws.pending.length > 0) {
+          for (const p of ws.pending) md += `    → ${p}\n`;
+        }
+      }
+      // Pending tasks
+      if (state.pending_tasks.length > 0) {
+        md += `\n## Pending Tasks\n\n`;
+        for (const t of state.pending_tasks) md += `- ${t}\n`;
+      }
+      // Decisions
+      if (state.decisions.length > 0) {
+        md += `\n## Decisions\n\n`;
+        for (const d of state.decisions) md += `- ${d}\n`;
+      }
+      // Notes
+      if (state.notes) {
+        md += `\n## Notes\n\n${state.notes}\n`;
+      }
+      // Context history
+      if (history.length > 0) {
+        md += `\n## Context History (${history.length} snapshots)\n\n`;
+        for (const h of history) {
+          md += `### ${h.timestamp}\n`;
+          md += `${h.summary}\n`;
+          if (h.key_decisions.length > 0) {
+            md += `**Decisions:** ${h.key_decisions.join("; ")}\n`;
+          }
+          if (h.pending_work.length > 0) {
+            md += `**Pending:** ${h.pending_work.join("; ")}\n`;
+          }
+          md += `\n`;
+        }
+      }
+      return { content: [{ type: "text" as const, text: md }] };
+    }
+  );
+  server.tool(
+    "list_coordinator_sessions",
+    "List past coordinator sessions with summaries",
+    {
+      limit: z.coerce.number().optional().default(5).describe("Max sessions to return"),
+    },
+    async (params) => {
+      const sessions = listCoordinatorSessions(params.limit);
+      if (sessions.length === 0) {
+        return { content: [{ type: "text" as const, text: "No coordinator sessions found." }] };
+      }
+      const rows = sessions.map(s => ({
+        session_id: s.session_id,
+        started: s.started_at,
+        last_updated: s.last_updated,
+        workspaces: Object.keys(s.workspaces).length,
+        pending_tasks: s.pending_tasks.length,
+        decisions: s.decisions.length,
+      }));
+      return jsonResponse(rows);
+    }
+  );
+  server.tool(
+    "compact_coordinator_context",
+    "Save a context snapshot before conversation compaction. Appends to the session's history.jsonl. Load it on resume to recover the full picture.",
+    {
+      session_id: z.string().describe("Coordinator session ID"),
+      summary: z.string().describe("Summary of what happened in this conversation segment"),
+      key_decisions: z.array(z.string()).optional().describe("Important decisions made"),
+      workspace_states: z
+        .record(z.string(), z.object({
+          name: z.string(),
+          status: z.enum(["idle", "working", "done", "failed", "paused"]),
+          last_task: z.string().optional(),
+          pending: z.array(z.string()).optional(),
+        }))
+        .optional()
+        .describe("Current workspace states"),
+      pending_work: z.array(z.string()).optional().describe("Work items still pending"),
+    },
+    async (params) => {
+      compactCoordinatorContext(params.session_id, {
+        timestamp: new Date().toISOString(),
+        summary: params.summary,
+        key_decisions: params.key_decisions || [],
+        workspace_states: params.workspace_states || {},
+        pending_work: params.pending_work || [],
+      });
+      return jsonResponse({ compacted: true });
+    }
+  );
+  server.tool(
+    "get_comparison_results",
+    "Retrieves visual comparison findings from a /compare-image session. Returns per-slide severity, diff percentage, findings, and screenshot URLs. Use this to decide what needs iteration after a visual QA run.",
+    {
+      session_id: z.string().describe("Session ID of the compare-image run"),
+      min_severity: z
+        .enum(["CRITICAL", "NOTABLE", "MINOR", "GOOD"])
+        .optional()
+        .describe("Minimum severity to include — e.g. 'NOTABLE' returns NOTABLE + CRITICAL only"),
+    },
+    async (params) => {
+      const { results, summary } = getComparisonResults(
+        params.session_id,
+        params.min_severity
+      );
+      if (results.length === 0) {
+        return jsonResponse({
+          error: "No comparison results found for this session. Ensure the session used /compare-image.",
+        });
+      }
+      return jsonResponse({ results, summary, total: results.length });
+    }
+  );
+}