npm - nodebench-mcp - Versions diffs - 2.25.0 → 2.27.0 - Mend

nodebench-mcp 2.25.0 → 2.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/NODEBENCH_AGENTS.md +5 -4
package/README.md +145 -16
package/dist/__tests__/architectComplex.test.js +3 -5
package/dist/__tests__/architectComplex.test.js.map +1 -1
package/dist/__tests__/batchAutopilot.test.d.ts +8 -0
package/dist/__tests__/batchAutopilot.test.js +218 -0
package/dist/__tests__/batchAutopilot.test.js.map +1 -0
package/dist/__tests__/cliSubcommands.test.d.ts +1 -0
package/dist/__tests__/cliSubcommands.test.js +138 -0
package/dist/__tests__/cliSubcommands.test.js.map +1 -0
package/dist/__tests__/evalHarness.test.js +1 -1
package/dist/__tests__/forecastingDogfood.test.d.ts +9 -0
package/dist/__tests__/forecastingDogfood.test.js +284 -0
package/dist/__tests__/forecastingDogfood.test.js.map +1 -0
package/dist/__tests__/forecastingScoring.test.d.ts +9 -0
package/dist/__tests__/forecastingScoring.test.js +202 -0
package/dist/__tests__/forecastingScoring.test.js.map +1 -0
package/dist/__tests__/localDashboard.test.d.ts +1 -0
package/dist/__tests__/localDashboard.test.js +226 -0
package/dist/__tests__/localDashboard.test.js.map +1 -0
package/dist/__tests__/multiHopDogfood.test.js +11 -11
package/dist/__tests__/multiHopDogfood.test.js.map +1 -1
package/dist/__tests__/openclawDogfood.test.d.ts +23 -0
package/dist/__tests__/openclawDogfood.test.js +535 -0
package/dist/__tests__/openclawDogfood.test.js.map +1 -0
package/dist/__tests__/openclawMessaging.test.d.ts +14 -0
package/dist/__tests__/openclawMessaging.test.js +232 -0
package/dist/__tests__/openclawMessaging.test.js.map +1 -0
package/dist/__tests__/presetRealWorldBench.test.js +0 -2
package/dist/__tests__/presetRealWorldBench.test.js.map +1 -1
package/dist/__tests__/tools.test.js +9 -157
package/dist/__tests__/tools.test.js.map +1 -1
package/dist/__tests__/toolsetGatingEval.test.js +0 -2
package/dist/__tests__/toolsetGatingEval.test.js.map +1 -1
package/dist/__tests__/traceabilityDogfood.test.d.ts +12 -0
package/dist/__tests__/traceabilityDogfood.test.js +241 -0
package/dist/__tests__/traceabilityDogfood.test.js.map +1 -0
package/dist/__tests__/webmcpTools.test.d.ts +7 -0
package/dist/__tests__/webmcpTools.test.js +195 -0
package/dist/__tests__/webmcpTools.test.js.map +1 -0
package/dist/dashboard/briefHtml.d.ts +20 -0
package/dist/dashboard/briefHtml.js +1000 -0
package/dist/dashboard/briefHtml.js.map +1 -0
package/dist/dashboard/briefServer.d.ts +18 -0
package/dist/dashboard/briefServer.js +320 -0
package/dist/dashboard/briefServer.js.map +1 -0
package/dist/dashboard/html.js +1470 -1230
package/dist/dashboard/html.js.map +1 -1
package/dist/dashboard/server.js +166 -41
package/dist/dashboard/server.js.map +1 -1
package/dist/index.js +210 -14
package/dist/index.js.map +1 -1
package/dist/tools/critterTools.js +4 -0
package/dist/tools/critterTools.js.map +1 -1
package/dist/tools/forecastingTools.d.ts +11 -0
package/dist/tools/forecastingTools.js +616 -0
package/dist/tools/forecastingTools.js.map +1 -0
package/dist/tools/localDashboardTools.d.ts +8 -0
package/dist/tools/localDashboardTools.js +332 -0
package/dist/tools/localDashboardTools.js.map +1 -0
package/dist/tools/metaTools.js +170 -1
package/dist/tools/metaTools.js.map +1 -1
package/dist/tools/openclawTools.d.ts +11 -0
package/dist/tools/openclawTools.js +1017 -0
package/dist/tools/openclawTools.js.map +1 -0
package/dist/tools/overstoryTools.d.ts +14 -0
package/dist/tools/overstoryTools.js +426 -0
package/dist/tools/overstoryTools.js.map +1 -0
package/dist/tools/progressiveDiscoveryTools.js +50 -115
package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
package/dist/tools/selfEvalTools.js +8 -1
package/dist/tools/selfEvalTools.js.map +1 -1
package/dist/tools/sessionMemoryTools.js +14 -2
package/dist/tools/sessionMemoryTools.js.map +1 -1
package/dist/tools/toolRegistry.d.ts +1 -15
package/dist/tools/toolRegistry.js +243 -228
package/dist/tools/toolRegistry.js.map +1 -1
package/dist/tools/visualQaTools.d.ts +2 -0
package/dist/tools/visualQaTools.js +1088 -0
package/dist/tools/visualQaTools.js.map +1 -0
package/dist/tools/webmcpTools.d.ts +16 -0
package/dist/tools/webmcpTools.js +703 -0
package/dist/tools/webmcpTools.js.map +1 -0
package/dist/toolsetRegistry.js +6 -2
package/dist/toolsetRegistry.js.map +1 -1
package/package.json +2 -2

package/dist/tools/toolRegistry.js CHANGED Viewed

@@ -2073,14 +2073,14 @@ const REGISTRY_ENTRIES = [
     {
         name: "ingest_dive_screenshots",
         category: "ui_ux_dive",
-        tags: ["ui", "screenshot", "ingest", "import", "bulk", "png", "jpg", "gallery", "dive", "disk", "file"],
+        tags: ["ui", "screenshot", "ingest", "import", "bulk", "disk", "gallery", "dive", "png", "jpg"],
         quickRef: {
-            nextAction: "Screenshots ingested into session DB. View them in the dashboard gallery or reference in dive_changelog entries.",
-            nextTools: ["dive_changelog", "get_dive_report", "open_dive_dashboard"],
+            nextAction: "Screenshots ingested into dive session. View them in the dashboard or use dive_screenshot to capture new ones.",
+            nextTools: ["dive_screenshot", "tag_ui_bug", "end_component_flow", "get_dive_tree"],
             methodology: "agentic_vision",
-            tip: "Use after external Playwright MCP captures screenshots to disk. Scans directory recursively, base64-encodes, and inserts into the session DB for dashboard display.",
+            tip: "Scans a directory for PNG/JPG files and bulk-imports them into a dive session's screenshot gallery. Use after external Playwright captures.",
         },
-        phase: "utility",
+        phase: "test",
     },
     // ═══════════════════════════════════════════
     // UI/UX DIVE V2 — Deep interaction testing,
@@ -2266,7 +2266,7 @@ const REGISTRY_ENTRIES = [
     {
         name: "register_skill",
         category: "skill_update",
-        tags: ["skill", "rule", "register", "source", "hash", "frontmatter", "provenance", "memory", "agents-md", "cursor", "windsurf", "update", "reexamine", "related_"],
+        tags: ["skill", "rule", "register", "source", "hash", "frontmatter", "provenance", "memory", "agents-md", "cursor", "windsurf", "update"],
         quickRef: {
             nextAction: "Skill registered. Use check_skill_freshness periodically to detect when source files change.",
             nextTools: ["check_skill_freshness", "list_skills"],
@@ -2312,28 +2312,6 @@ const REGISTRY_ENTRIES = [
         phase: "utility",
     },
     // ═══════════════════════════════════════════
-    // RE-EXAMINE 11/10 — Fresh-eyes quality pass
-    // Modular rules: reexamine_process → a11y,
-    // resilience, polish, keyboard, performance
-    // Cross-ref via related_ frontmatter hops
-    // ═══════════════════════════════════════════
-    // NOTE: These are not MCP tools — they are rule
-    // files in .cursor/rules/ and .windsurf/rules/.
-    // The skill_update tools above (register_skill,
-    // check_skill_freshness) track their freshness.
-    // The related_ field in each rule's frontmatter
-    // enables one-hop and two-hop cross-referencing:
-    //
-    //   reexamine_process
-    //     └─ related_: [a11y, resilience, polish, keyboard, performance]
-    //          └─ reexamine_a11y.related_: [keyboard, polish, process]
-    //          └─ reexamine_resilience.related_: [performance, process, polish]
-    //          └─ reexamine_polish.related_: [a11y, performance, process]
-    //          └─ reexamine_keyboard.related_: [a11y, process]
-    //          └─ reexamine_performance.related_: [resilience, polish, process]
-    //
-    // Two-hop example: process → a11y → keyboard (discovers keyboard via a11y)
-    // ═══════════════════════════════════════════
     // MCP BRIDGE — Connect external MCP servers
     // ═══════════════════════════════════════════
     {
@@ -2432,43 +2410,178 @@ const REGISTRY_ENTRIES = [
         phase: "implement",
     },
     // ═══════════════════════════════════════════
-    // PR REPORT — Visual PR creation from dives
+    // QA ORCHESTRATION — Overstory multi-agent QA
     // ═══════════════════════════════════════════
     {
-        name: "generate_pr_report",
-        category: "pr_report",
-        tags: ["pr", "pull-request", "report", "markdown", "visual", "screenshot", "before-after", "timeline", "changelog", "dive", "github", "review", "evidence"],
+        name: "overstory_fleet_status",
+        category: "qa_orchestration",
+        tags: ["overstory", "agent", "fleet", "status", "health", "multi-agent", "orchestration", "qa", "dogfood", "worktree"],
         quickRef: {
-            nextAction: "PR report generated. Use the markdown with `gh pr create --body-file` or call create_visual_pr for end-to-end PR creation.",
-            nextTools: ["create_visual_pr", "export_pr_screenshots", "review_pr_checklist"],
-            methodology: "agentic_vision",
-            tip: "Pass asset_dir to export screenshots as PNGs that can be committed alongside the PR.",
+            nextAction: "Review agent states. If agents are idle, run dogfood:overstory to start a QA session.",
+            nextTools: ["overstory_qa_summary", "overstory_mail_log", "run_visual_qa_suite"],
+            methodology: "ai_flywheel",
+            tip: "Reads .overstory/agent-manifest.json and overstory.db. Shows configured agents, capabilities, gate policy, and live agent health.",
         },
-        phase: "ship",
+        phase: "utility",
     },
     {
-        name: "export_pr_screenshots",
-        category: "pr_report",
-        tags: ["pr", "screenshot", "export", "png", "before-after", "visual", "evidence", "assets", "commit", "dive", "changelog", "fix"],
+        name: "overstory_qa_summary",
+        category: "qa_orchestration",
+        tags: ["overstory", "qa", "gate", "summary", "stability", "grade", "ssim", "triage", "p0", "p1", "dogfood"],
         quickRef: {
-            nextAction: "Screenshots exported. Stage and commit them, then use generate_pr_report or create_visual_pr to reference them in the PR body.",
-            nextTools: ["generate_pr_report", "create_visual_pr"],
-            methodology: "agentic_vision",
-            tip: "Naming convention: {index}-{type}-before.png / after.png. Commit these with your branch.",
+            nextAction: "If gate fails, check failing routes and fix p0/p1 issues. If gate passes, proceed to merge.",
+            nextTools: ["overstory_mail_log", "overstory_fleet_status", "run_visual_qa_suite", "burst_capture"],
+            methodology: "ai_flywheel",
+            tip: "Aggregates SSIM stability grades from visual_qa_runs and Gemini QA triage from Overstory mail. Returns gate pass/fail verdict.",
         },
-        phase: "ship",
+        phase: "verify",
     },
     {
-        name: "create_visual_pr",
-        category: "pr_report",
-        tags: ["pr", "pull-request", "create", "github", "gh", "visual", "screenshot", "end-to-end", "push", "merge", "review", "dive", "timeline", "evidence"],
+        name: "overstory_mail_log",
+        category: "qa_orchestration",
+        tags: ["overstory", "mail", "log", "message", "route", "triage", "dispatch", "agent", "coordination"],
         quickRef: {
-            nextAction: "PR created! Share the URL with reviewers. The PR body contains visual evidence and dashboard links for interactive browsing.",
-            nextTools: ["review_pr_checklist", "enforce_merge_gate"],
-            methodology: "agentic_vision",
-            tip: "Set draft:true for WIP PRs. Combines export_pr_screenshots + generate_pr_report + gh pr create in one call.",
+            nextAction: "Review messages to understand QA session state. Filter by type or agent for focused view.",
+            nextTools: ["overstory_qa_summary", "overstory_fleet_status", "overstory_merge_queue"],
+            methodology: "ai_flywheel",
+            tip: "Supports type_filter (result/dispatch/worker_done/escalation) and agent_filter. Shows structured mail payloads from the QA agent fleet.",
         },
-        phase: "ship",
+        phase: "utility",
+    },
+    {
+        name: "overstory_merge_queue",
+        category: "qa_orchestration",
+        tags: ["overstory", "merge", "queue", "branch", "conflict", "gate", "builder", "qa", "resolution"],
+        quickRef: {
+            nextAction: "If branches are blocked, check QA gate failures. If pending, trigger merge with overstory merge --all.",
+            nextTools: ["overstory_qa_summary", "overstory_mail_log", "overstory_fleet_status"],
+            methodology: "ai_flywheel",
+            tip: "Shows FIFO merge queue with conflict resolution tiers. Use include_completed:true to see merge history.",
+        },
+        phase: "utility",
+    },
+    // ═══════════════════════════════════════════
+    // VISUAL QA — Deep interaction captures & stability
+    // ═══════════════════════════════════════════
+    {
+        name: "burst_capture",
+        category: "visual_qa",
+        tags: ["burst", "capture", "screenshot", "rapid", "interaction", "deep", "animation", "transition", "hover", "click", "popup", "drawer", "modal", "streaming", "agent", "component"],
+        quickRef: {
+            nextAction: "Burst captured. Run compute_web_stability to measure SSIM across frames, or generate_grid_collage for visual comparison.",
+            nextTools: ["compute_web_stability", "generate_grid_collage", "run_visual_qa_suite"],
+            methodology: "ai_flywheel",
+            tip: "Use burst capture for deep interaction testing — popups, hover states, streaming responses, drawer opens, thread switches. Captures rapid frame sequences during UI transitions.",
+        },
+        phase: "test",
+        complexity: "medium",
+    },
+    {
+        name: "generate_grid_collage",
+        category: "visual_qa",
+        tags: ["grid", "collage", "visual", "comparison", "before-after", "screenshot", "composite", "overview", "review"],
+        quickRef: {
+            nextAction: "Collage generated. Review visually for inconsistencies. Use run_visual_qa_suite for automated scoring.",
+            nextTools: ["run_visual_qa_suite", "compute_web_stability", "analyze_screenshot"],
+            methodology: "ai_flywheel",
+            tip: "Generates a composite grid image from multiple screenshots — useful for comparing dark/light, desktop/mobile, or before/after states side-by-side.",
+        },
+        phase: "verify",
+        complexity: "low",
+    },
+    {
+        name: "compute_web_stability",
+        category: "visual_qa",
+        tags: ["stability", "ssim", "structural", "similarity", "flicker", "jank", "layout-shift", "regression", "diff", "frame", "comparison"],
+        quickRef: {
+            nextAction: "Stability computed. If SSIM < 0.95, investigate layout shifts or animation jank. Log issues with tag_ui_bug.",
+            nextTools: ["burst_capture", "tag_ui_bug", "log_gap", "run_visual_qa_suite"],
+            methodology: "ai_flywheel",
+            tip: "Computes block-based SSIM between frame pairs to detect visual instability — layout shifts, flicker, and rendering regressions.",
+        },
+        phase: "test",
+        complexity: "medium",
+    },
+    {
+        name: "run_visual_qa_suite",
+        category: "visual_qa",
+        tags: ["visual", "qa", "suite", "end-to-end", "automated", "gemini", "scoring", "jony-ive", "design", "review", "deep-interaction", "scenario", "agent", "streaming", "popup", "drawer"],
+        quickRef: {
+            nextAction: "QA suite complete. Fix P0/P1 issues first (highest score impact), then P2/P3. Re-run to verify improvements.",
+            nextTools: ["burst_capture", "log_gap", "record_learning", "save_session_note"],
+            methodology: "ai_flywheel",
+            tip: "End-to-end visual QA: captures all routes + deep interactions (agent queries, streaming, popups, drawers) → Gemini scores against Jony Ive design principles → auto-triages by P-level. Formula: 100 - P1×6 - P2×2 - P3×1.",
+        },
+        phase: "verify",
+        complexity: "high",
+    },
+    // ═══════════════════════════════════════════
+    // LOCAL DASHBOARD — Daily brief + narrative + ops
+    // ═══════════════════════════════════════════
+    {
+        name: "sync_daily_brief",
+        category: "local_dashboard",
+        tags: ["sync", "daily", "brief", "convex", "sqlite", "pull", "refresh", "narrative", "dashboard", "data"],
+        quickRef: {
+            nextAction: "Data synced. Call get_daily_brief_summary to read the brief, or open_local_dashboard for visual review.",
+            nextTools: ["get_daily_brief_summary", "get_narrative_status", "open_local_dashboard"],
+            methodology: "ai_flywheel",
+            tip: "Pulls latest dashboard snapshot + narrative threads from Convex into local SQLite. Requires CONVEX_SITE_URL and MCP_SECRET env vars.",
+        },
+        phase: "research",
+        complexity: "medium",
+    },
+    {
+        name: "get_daily_brief_summary",
+        category: "local_dashboard",
+        tags: ["daily", "brief", "summary", "metrics", "features", "sources", "dashboard", "offline", "local"],
+        quickRef: {
+            nextAction: "Review the brief. Check key signals and source quality. Use get_narrative_status for thread analysis.",
+            nextTools: ["get_narrative_status", "get_ops_dashboard", "open_local_dashboard"],
+            methodology: "ai_flywheel",
+            tip: "Reads from local SQLite — zero network needed. Returns dashboard metrics, features, and source summary from the last sync.",
+        },
+        phase: "research",
+        complexity: "low",
+    },
+    {
+        name: "get_narrative_status",
+        category: "local_dashboard",
+        tags: ["narrative", "thread", "status", "phase", "emerging", "escalating", "climax", "resolution", "dormant", "story"],
+        quickRef: {
+            nextAction: "Review thread distribution. Focus on escalating/climax threads for timely action. Use get_ops_dashboard for pipeline health.",
+            nextTools: ["get_daily_brief_summary", "get_ops_dashboard", "open_local_dashboard"],
+            methodology: "ai_flywheel",
+            tip: "Returns narrative threads grouped by phase with event counts. Filter by phase to focus on specific lifecycle stages.",
+        },
+        phase: "research",
+        complexity: "low",
+    },
+    {
+        name: "get_ops_dashboard",
+        category: "local_dashboard",
+        tags: ["ops", "operational", "dashboard", "sync", "tool-call", "frequency", "verification", "health", "monitoring"],
+        quickRef: {
+            nextAction: "Review ops health. If tool error rates are high, investigate root causes. If sync is stale, run sync_daily_brief.",
+            nextTools: ["sync_daily_brief", "get_daily_brief_summary", "open_local_dashboard"],
+            methodology: "ai_flywheel",
+            tip: "Returns last sync info, tool call frequency (24h), active verification cycles, data counts, and privacy mode status.",
+        },
+        phase: "utility",
+        complexity: "low",
+    },
+    {
+        name: "open_local_dashboard",
+        category: "local_dashboard",
+        tags: ["open", "dashboard", "browser", "server", "html", "visual", "brief", "narrative", "ops", "local", "ui"],
+        quickRef: {
+            nextAction: "Dashboard is running. Open the URL in a browser to see Brief metrics, Narrative thread lanes, and Ops status.",
+            nextTools: ["sync_daily_brief", "get_daily_brief_summary", "get_narrative_status"],
+            methodology: "ai_flywheel",
+            tip: "Starts the local dashboard server on port 6275 if not already running. Auto-refreshes every 30s from local SQLite.",
+        },
+        phase: "utility",
+        complexity: "low",
     },
 ];
 // ── Exported lookup structures ───────────────────────────────────────────
@@ -2476,110 +2589,6 @@ const REGISTRY_ENTRIES = [
 export const TOOL_REGISTRY = new Map(REGISTRY_ENTRIES.map((e) => [e.name, e]));
 /** All registry entries as array */
 export const ALL_REGISTRY_ENTRIES = REGISTRY_ENTRIES;
-// ── Auto-derive relatedTools for entries that don't have manual overrides ──
-// Uses 3 signals: same-category siblings, DOMAIN_CLUSTERS neighbors, tag overlap.
-// Must run after REGISTRY_ENTRIES is fully built. Forward-reference to DOMAIN_CLUSTERS
-// is fine because this runs at module load time (DOMAIN_CLUSTERS is defined below).
-/** Late-init: populated by _populateRelatedTools() at bottom of file */
-let _domainClusters = null;
-export function _setDomainClustersRef(clusters) {
-    _domainClusters = clusters;
-}
-function computeRelatedTools(entry) {
-    // If manually specified, use that
-    if (entry.quickRef.relatedTools && entry.quickRef.relatedTools.length > 0) {
-        return entry.quickRef.relatedTools;
-    }
-    const related = new Set();
-    const nextToolsSet = new Set(entry.quickRef.nextTools);
-    // 1. Same-category siblings (excluding self and nextTools), up to 3
-    let sibCount = 0;
-    for (const e of REGISTRY_ENTRIES) {
-        if (sibCount >= 3)
-            break;
-        if (e.category === entry.category && e.name !== entry.name && !nextToolsSet.has(e.name)) {
-            related.add(e.name);
-            sibCount++;
-        }
-    }
-    // 2. DOMAIN_CLUSTERS neighbors: tools from related categories, up to 2
-    if (_domainClusters) {
-        let clusterCount = 0;
-        for (const cluster of Object.values(_domainClusters)) {
-            if (clusterCount >= 2)
-                break;
-            if (cluster.includes(entry.category)) {
-                for (const neighborCat of cluster) {
-                    if (clusterCount >= 2)
-                        break;
-                    if (neighborCat === entry.category)
-                        continue;
-                    for (const e of REGISTRY_ENTRIES) {
-                        if (e.category === neighborCat && !nextToolsSet.has(e.name) && !related.has(e.name)) {
-                            related.add(e.name);
-                            clusterCount++;
-                            break; // one tool per neighbor category
-                        }
-                    }
-                }
-            }
-        }
-    }
-    // 3. Tag overlap: tools sharing 2+ tags (not in nextTools or already related), up to 2
-    const myTags = new Set(entry.tags);
-    let tagCount = 0;
-    for (const other of REGISTRY_ENTRIES) {
-        if (tagCount >= 2)
-            break;
-        if (other.name === entry.name || nextToolsSet.has(other.name) || related.has(other.name))
-            continue;
-        let overlap = 0;
-        for (const t of other.tags) {
-            if (myTags.has(t))
-                overlap++;
-            if (overlap >= 2)
-                break;
-        }
-        if (overlap >= 2) {
-            related.add(other.name);
-            tagCount++;
-        }
-    }
-    // 4. Fallback: if still empty (small category, all siblings in nextTools), accept 1-tag overlap
-    if (related.size === 0) {
-        for (const other of REGISTRY_ENTRIES) {
-            if (related.size >= 3)
-                break;
-            if (other.name === entry.name || nextToolsSet.has(other.name))
-                continue;
-            const hasTagOverlap = other.tags.some((t) => myTags.has(t));
-            if (hasTagOverlap) {
-                related.add(other.name);
-            }
-        }
-    }
-    // 5. Last resort: if STILL empty, pick tools from the same phase (workflow adjacency)
-    if (related.size === 0) {
-        for (const other of REGISTRY_ENTRIES) {
-            if (related.size >= 3)
-                break;
-            if (other.name === entry.name || nextToolsSet.has(other.name))
-                continue;
-            if (other.phase === entry.phase) {
-                related.add(other.name);
-            }
-        }
-    }
-    return [...related].slice(0, 7); // hard cap at 7
-}
-/** Populate relatedTools for all registry entries. Called once at module load after DOMAIN_CLUSTERS exists. */
-export function _populateRelatedTools() {
-    for (const entry of REGISTRY_ENTRIES) {
-        if (!entry.quickRef.relatedTools || entry.quickRef.relatedTools.length === 0) {
-            entry.quickRef.relatedTools = computeRelatedTools(entry);
-        }
-    }
-}
 /** Get quick ref for a tool, with fallback for unregistered tools */
 export function getQuickRef(toolName) {
     return TOOL_REGISTRY.get(toolName)?.quickRef ?? null;
@@ -2631,7 +2640,9 @@ const CATEGORY_COMPLEXITY = {
     email: "medium",
     rss: "low",
     architect: "low",
-    pr_report: "medium",
+    qa_orchestration: "low",
+    visual_qa: "medium",
+    local_dashboard: "low",
 };
 /** Per-tool complexity overrides (when category default is wrong) */
 const TOOL_COMPLEXITY_OVERRIDES = {
@@ -2910,9 +2921,6 @@ const DOMAIN_CLUSTERS = {
     writing: ["research_writing", "documentation"],
     measurement: ["eval", "benchmark", "self_eval"],
 };
-// Wire up domain clusters and auto-populate relatedTools for all registry entries
-_setDomainClustersRef(DOMAIN_CLUSTERS);
-_populateRelatedTools();
 // ── Execution trace edges — co-occurrence mining from tool_call_log ────────
 // Based on Agent-as-a-Graph (arxiv:2511.18194): execution trace edges
 // mine sequential co-occurrence patterns to discover implicit tool relationships.
@@ -2953,36 +2961,17 @@ export function _setDbAccessor(accessor) {
  *
  * Approach: for each session, pull the ordered tool sequence, then count
  * pairs within a sliding window of 5 calls. O(n) per session, no self-join.
- *
- * When transitive=true, infer A→C via A→B + B→C (two-hop co-occurrence).
- * Extended cap of 15 edges/tool (vs 10 for direct-only).
  */
-let _transitiveCooccurrenceCache = null;
-let _transitiveCooccurrenceCacheTime = 0;
-function getCooccurrenceEdges(options) {
-    const transitive = options?.transitive ?? false;
+function getCooccurrenceEdges() {
     const now = Date.now();
-    // Check appropriate cache
-    if (transitive) {
-        if (_transitiveCooccurrenceCache && now - _transitiveCooccurrenceCacheTime < COOCCURRENCE_TTL_MS) {
-            return _transitiveCooccurrenceCache;
-        }
-    }
-    else {
-        if (_cooccurrenceCache && now - _cooccurrenceCacheTime < COOCCURRENCE_TTL_MS) {
-            return _cooccurrenceCache;
-        }
+    if (_cooccurrenceCache && now - _cooccurrenceCacheTime < COOCCURRENCE_TTL_MS) {
+        return _cooccurrenceCache;
     }
-    // Build direct edges first (always needed)
-    const directEdges = new Map();
+    const edges = new Map();
     if (!_dbAccessor) {
-        _cooccurrenceCache = directEdges;
+        _cooccurrenceCache = edges;
         _cooccurrenceCacheTime = now;
-        if (transitive) {
-            _transitiveCooccurrenceCache = directEdges;
-            _transitiveCooccurrenceCacheTime = now;
-        }
-        return directEdges;
+        return edges;
     }
     try {
         const db = _dbAccessor();
@@ -3023,51 +3012,24 @@ function getCooccurrenceEdges(options) {
             .sort((a, b) => b[1] - a[1]);
         for (const [key] of sorted) {
             const [toolA, toolB] = key.split("\0");
-            const list = directEdges.get(toolA) ?? [];
+            const list = edges.get(toolA) ?? [];
             if (list.length < 10) {
                 list.push(toolB);
-                directEdges.set(toolA, list);
+                edges.set(toolA, list);
             }
         }
     }
     catch {
         // No DB or table not yet created — return empty (graceful degradation)
     }
-    // Cache direct edges
-    _cooccurrenceCache = directEdges;
+    _cooccurrenceCache = edges;
     _cooccurrenceCacheTime = now;
-    if (!transitive)
-        return directEdges;
-    // Transitive inference: A→B and B→C ⟹ A→C (two-hop)
-    const transitiveEdges = new Map([...directEdges.entries()].map(([k, v]) => [k, [...v]]));
-    for (const [toolA, directNeighbors] of directEdges) {
-        const existingSet = new Set(directNeighbors);
-        existingSet.add(toolA); // avoid self-loops
-        for (const toolB of directNeighbors) {
-            const bNeighbors = directEdges.get(toolB);
-            if (!bNeighbors)
-                continue;
-            const list = transitiveEdges.get(toolA);
-            for (const toolC of bNeighbors) {
-                if (existingSet.has(toolC))
-                    continue;
-                if (list.length >= 15)
-                    break; // extended cap for transitive
-                list.push(toolC);
-                existingSet.add(toolC);
-            }
-        }
-    }
-    _transitiveCooccurrenceCache = transitiveEdges;
-    _transitiveCooccurrenceCacheTime = now;
-    return transitiveEdges;
+    return edges;
 }
 /** Reset co-occurrence cache — for testing only. */
 export function _resetCooccurrenceCache() {
     _cooccurrenceCache = null;
     _cooccurrenceCacheTime = 0;
-    _transitiveCooccurrenceCache = null;
-    _transitiveCooccurrenceCacheTime = 0;
 }
 /** Inject co-occurrence edges directly — for testing only. */
 export function _setCooccurrenceForTesting(edges) {
@@ -3459,8 +3421,7 @@ export function hybridSearch(query, tools, options) {
         });
     }
     results.sort((a, b) => b.score - a.score);
-    const offset = options?.offset ?? 0;
-    return results.slice(offset, offset + limit);
+    return results.slice(0, limit);
 }
 /** Available search modes for discover_tools */
 export const SEARCH_MODES = ["hybrid", "fuzzy", "regex", "prefix", "semantic", "exact", "dense", "embedding"];
@@ -3482,6 +3443,7 @@ export const WORKFLOW_CHAINS = {
             { tool: "run_mandatory_flywheel", action: "6-step final verification" },
             { tool: "record_learning", action: "Capture what you learned" },
             { tool: "promote_to_eval", action: "Feed into eval batch" },
+            { tool: "save_session_note", action: "Save traceability note — cite original request, summarize what was delivered" },
         ],
     },
     fix_bug: {
@@ -3494,6 +3456,7 @@ export const WORKFLOW_CHAINS = {
             { tool: "log_test_result", action: "Record regression test" },
             { tool: "run_mandatory_flywheel", action: "6-step verification" },
             { tool: "record_learning", action: "Record the gotcha/pattern" },
+            { tool: "save_session_note", action: "Save traceability note — cite original request, record root cause and fix" },
         ],
     },
     ui_change: {
@@ -3507,6 +3470,7 @@ export const WORKFLOW_CHAINS = {
             { tool: "run_quality_gate", action: "Run ui_ux_qa gate" },
             { tool: "run_mandatory_flywheel", action: "Final verification" },
             { tool: "record_learning", action: "Record UI patterns" },
+            { tool: "save_session_note", action: "Save traceability note — cite original request, record visual evidence path" },
         ],
     },
     parallel_project: {
@@ -3824,16 +3788,67 @@ export const WORKFLOW_CHAINS = {
             { tool: "save_session_note", action: "Log sent emails so you have an audit trail that survives compaction" },
         ],
     },
-    pr_creation: {
-        name: "Visual PR Creation",
-        description: "Create a PR with visual evidence from a UI Dive session — screenshots, timeline, bug fixes, past session links",
+    webmcp_discovery: {
+        name: "WebMCP Origin Discovery",
+        description: "Connect to a WebMCP-enabled origin, discover its tools, and invoke them from the agent",
+        steps: [
+            { tool: "connect_webmcp_origin", action: "Connect to the target origin URL and establish a WebMCP session" },
+            { tool: "list_webmcp_tools", action: "List all tools exposed by the origin with schemas and annotations" },
+            { tool: "call_webmcp_tool", action: "Invoke a specific tool on the remote origin with arguments" },
+            { tool: "disconnect_webmcp_origin", action: "Clean up the WebMCP session when done" },
+        ],
+    },
+    batch_autopilot: {
+        name: "Batch Autopilot Run",
+        description: "Set up an operator profile and run a batch autopilot session for autonomous agent tasks",
+        steps: [
+            { tool: "setup_operator_profile", action: "Create or update USER.md and operator profile for autopilot context" },
+            { tool: "get_autopilot_status", action: "Check current autopilot readiness, profile completeness, and last run status" },
+            { tool: "trigger_batch_run", action: "Start a batch autopilot run using the operator profile as context" },
+            { tool: "get_batch_run_history", action: "Review history of past batch runs, outcomes, and timing" },
+            { tool: "sync_operator_profile", action: "Sync operator profile state from disk after manual edits" },
+        ],
+    },
+    daily_review: {
+        name: "Daily Brief Review",
+        description: "Pull the latest daily brief, review narrative threads, check ops dashboard, and sync to local storage",
+        steps: [
+            { tool: "sync_daily_brief", action: "Pull today's brief and narrative from Convex into local SQLite" },
+            { tool: "get_daily_brief_summary", action: "Get the full brief summary with key signals and insights" },
+            { tool: "get_narrative_status", action: "Check narrative thread status — dominant story, under-reported angle, evidence scores" },
+            { tool: "get_ops_dashboard", action: "Review pipeline health: posting status, tool usage, active workflows" },
+            { tool: "open_local_dashboard", action: "Open the local HTML dashboard in the browser for visual review" },
+        ],
+    },
+    deep_interaction: {
+        name: "Deep Interaction Discovery & Capture",
+        description: "Systematically discover, capture, and verify interactive UI behaviors — popups, drawers, streaming responses, hover states, agent conversations, thread management, keyboard shortcuts. Goes beyond static screenshot routes to test real user behavior flows.",
+        steps: [
+            { tool: "dive_auto_discover", action: "Auto-discover interactive components (buttons, drawers, modals, expandable rows) across all routes" },
+            { tool: "start_ui_dive", action: "Start a structured UI dive session to track interaction coverage" },
+            { tool: "burst_capture", action: "Rapid-fire capture during interaction transitions (open drawer, hover tooltip, type in agent panel)" },
+            { tool: "dive_interaction_test", action: "Test specific interaction patterns: click→open→verify, type→submit→stream, hover→preview→dismiss" },
+            { tool: "compute_web_stability", action: "Measure SSIM stability across interaction frames — detect layout shifts, flicker, animation jank" },
+            { tool: "dive_record_test_step", action: "Record each interaction test step with expected vs actual behavior" },
+            { tool: "run_visual_qa_suite", action: "Run full visual QA suite including deep interaction captures" },
+            { tool: "tag_ui_bug", action: "Tag issues found during interaction testing (broken hover, drawer z-index, missing focus trap)" },
+            { tool: "get_dive_report", action: "Generate interaction coverage report — which components were tested, which remain" },
+            { tool: "record_learning", action: "Record interaction patterns, common failure modes, and selector strategies" },
+        ],
+    },
+    gemini_qa: {
+        name: "Gemini Vision QA Loop",
+        description: "Automated UI/UX quality gate — capture screenshots (dark/light × desktop/mobile), send to Gemini Flash for Jony Ive product design review, fix issues, loop until 100/100",
         steps: [
-            { tool: "get_dive_report", action: "Review the dive findings and health score before creating PR" },
-            { tool: "export_pr_screenshots", action: "Export before/after screenshot pairs to a directory for committing" },
-            { tool: "generate_pr_report", action: "Generate rich markdown PR body with visual evidence, timeline, and past session links" },
-            { tool: "create_visual_pr", action: "End-to-end PR creation: exports assets, generates markdown, pushes branch, creates GitHub PR" },
-            { tool: "review_pr_checklist", action: "Validate the PR against the checklist (title, description, tests, verification)" },
-            { tool: "enforce_merge_gate", action: "Pre-merge validation — git state, quality gates, verification cycles" },
+            { tool: "check_mcp_setup", action: "Verify Gemini API key (GOOGLE_AI_KEY) and vision domain are ready" },
+            { tool: "start_verification_cycle", action: "Open a verification cycle titled 'Gemini QA Loop' to track progress" },
+            { tool: "save_session_note", action: "Shell: `npx vite build` then `npx playwright test tests/e2e/full-ui-dogfood.spec.ts --project=chromium --workers=1` — capture 4-variant screenshots" },
+            { tool: "save_session_note", action: "Shell: `npm run dogfood:publish` — copy screenshots to public/dogfood/ with variant metadata manifest" },
+            { tool: "save_session_note", action: "Shell: `npx vite build && node scripts/ui/runDogfoodGeminiQa.mjs` — rebuild, launch preview, trigger Gemini QA" },
+            { tool: "log_test_result", action: "Log QA score from public/dogfood/qa-results.json — formula: 100 - P1×6 - P2×2 - P3×1" },
+            { tool: "save_session_note", action: "Fix P1 issues (6pts each) then P2 (2pts) then P3 (1pt) — root-cause each before fixing" },
+            { tool: "get_overstory_qa_gate", action: "Check QA gate for per-route stability grades and issue counts" },
+            { tool: "record_learning", action: "Record QA trajectory and Gemini finding patterns for regression tracking" },
         ],
     },
 };