npm - @hover-dev/core - Versions diffs - 0.14.1 → 0.16.0 - Mend

@hover-dev/core 0.14.1 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

package/README.md +73 -1
package/dist/agents/aider.d.ts.map +1 -1
package/dist/agents/aider.js +6 -14
package/dist/agents/claude.d.ts.map +1 -1
package/dist/agents/claude.js +14 -0
package/dist/agents/codex.d.ts.map +1 -1
package/dist/agents/codex.js +10 -4
package/dist/agents/cursor.d.ts.map +1 -1
package/dist/agents/cursor.js +8 -17
package/dist/agents/gemini.d.ts.map +1 -1
package/dist/agents/gemini.js +3 -14
package/dist/agents/invoke.d.ts.map +1 -1
package/dist/agents/invoke.js +10 -1
package/dist/agents/qwen.d.ts.map +1 -1
package/dist/agents/qwen.js +3 -14
package/dist/agents/shared.d.ts +28 -0
package/dist/agents/shared.d.ts.map +1 -0
package/dist/agents/shared.js +35 -0
package/dist/agents/types.d.ts +11 -0
package/dist/agents/types.d.ts.map +1 -1
package/dist/mcp/sourceFence.d.ts +23 -0
package/dist/mcp/sourceFence.d.ts.map +1 -0
package/dist/mcp/sourceFence.js +75 -0
package/dist/mcp/sourceServer.d.ts +3 -0
package/dist/mcp/sourceServer.d.ts.map +1 -0
package/dist/mcp/sourceServer.js +116 -0
package/dist/playwright/preflight.d.ts.map +1 -1
package/dist/playwright/preflight.js +6 -1
package/dist/playwright/raiseWindow.d.ts.map +1 -1
package/dist/playwright/raiseWindow.js +22 -3
package/dist/playwright/resolveMcpConfig.d.ts +11 -0
package/dist/playwright/resolveMcpConfig.d.ts.map +1 -1
package/dist/playwright/resolveMcpConfig.js +17 -3
package/dist/plugin-api.d.ts +7 -0
package/dist/plugin-api.d.ts.map +1 -1
package/dist/runSession.d.ts +42 -0
package/dist/runSession.d.ts.map +1 -0
package/dist/runSession.js +81 -0
package/dist/service/cdpHandlers.d.ts +3 -7
package/dist/service/cdpHandlers.d.ts.map +1 -1
package/dist/service/cdpHandlers.js +4 -16
package/dist/service/cdpHint.d.ts.map +1 -1
package/dist/service/cdpHint.js +30 -14
package/dist/service/conventions.d.ts +8 -0
package/dist/service/conventions.d.ts.map +1 -0
package/dist/service/conventions.js +42 -0
package/dist/service/saveHandlers.d.ts +10 -13
package/dist/service/saveHandlers.d.ts.map +1 -1
package/dist/service/saveHandlers.js +9 -25
package/dist/service/types.d.ts +5 -0
package/dist/service/types.d.ts.map +1 -1
package/dist/service.d.ts +13 -4
package/dist/service.d.ts.map +1 -1
package/dist/service.js +264 -148
package/dist/skills/writeSkill.d.ts +12 -35
package/dist/skills/writeSkill.d.ts.map +1 -1
package/dist/skills/writeSkill.js +10 -166
package/dist/specs/detectSharedFlows.d.ts +35 -0
package/dist/specs/detectSharedFlows.d.ts.map +1 -0
package/dist/specs/detectSharedFlows.js +171 -0
package/dist/specs/extractPageObjects.d.ts +18 -0
package/dist/specs/extractPageObjects.d.ts.map +1 -0
package/dist/specs/extractPageObjects.js +98 -0
package/dist/specs/generatePageObject.d.ts +29 -0
package/dist/specs/generatePageObject.d.ts.map +1 -0
package/dist/specs/generatePageObject.js +149 -0
package/dist/specs/listSpecs.d.ts +12 -0
package/dist/specs/listSpecs.d.ts.map +1 -1
package/dist/specs/listSpecs.js +27 -2
package/dist/specs/optimizationSuggestion.d.ts +26 -0
package/dist/specs/optimizationSuggestion.d.ts.map +1 -0
package/dist/specs/optimizationSuggestion.js +28 -0
package/dist/specs/optimizeSpec.d.ts +42 -0
package/dist/specs/optimizeSpec.d.ts.map +1 -0
package/dist/specs/optimizeSpec.js +188 -0
package/dist/specs/optimizeSpecWithAgent.d.ts +11 -0
package/dist/specs/optimizeSpecWithAgent.d.ts.map +1 -0
package/dist/specs/optimizeSpecWithAgent.js +40 -0
package/dist/specs/pageObjectManifest.d.ts +20 -0
package/dist/specs/pageObjectManifest.d.ts.map +1 -0
package/dist/specs/pageObjectManifest.js +40 -0
package/dist/specs/seeds.d.ts +36 -0
package/dist/specs/seeds.d.ts.map +1 -0
package/dist/specs/seeds.js +74 -0
package/dist/specs/sidecar.d.ts +25 -0
package/dist/specs/sidecar.d.ts.map +1 -0
package/dist/specs/sidecar.js +38 -0
package/dist/specs/softBatch.d.ts +14 -0
package/dist/specs/softBatch.d.ts.map +1 -0
package/dist/specs/softBatch.js +177 -0
package/dist/specs/text.d.ts +17 -0
package/dist/specs/text.d.ts.map +1 -0
package/dist/specs/text.js +24 -0
package/dist/specs/writeCaseCsv.d.ts.map +1 -1
package/dist/specs/writeCaseCsv.js +2 -8
package/dist/specs/writeSpec.d.ts +50 -0
package/dist/specs/writeSpec.d.ts.map +1 -1
package/dist/specs/writeSpec.js +251 -84
package/package.json +5 -3

package/dist/service.js CHANGED Viewed

@@ -10,10 +10,8 @@
  *     { type: 'hello',           payload: { agentId, model, version } }
  *     { type: 'event',           payload: InvokeEvent }              // see agents/types.ts
  *     { type: 'cdp-status',      payload: { state, reason?, matchingTabUrl?, browser?, launching? } }
- *     { type: 'skill-saved',     payload: { name, path } }
- *     { type: 'skill-exists',    payload: { slug, existingPath } }
- *     { type: 'skills-list',     payload: { skills: SkillSummary[] } }
  *     { type: 'specs-list',      payload: { specs: SpecSummary[] } }
+ *     { type: 'seeds-list',      payload: { seeds: { name, note, signature, code, source }[] } }
  *     { type: 'spec-saved',      payload: { name, path } }
  *     { type: 'spec-exists',     payload: { slug, existingPath } }
  *     { type: 'case-csv-saved',  payload: { name, path } }
@@ -31,11 +29,10 @@
  *     { type: 'check-cdp',     payload: { pageUrl } }                 // "is this widget in the debug Chrome?"
  *     { type: 'launch-chrome', payload: { pageUrl } }                 // start debug Chrome, navigate to pageUrl
  *     { type: 'focus-debug',   payload: { pageUrl } }                 // bringToFront the matching tab in debug Chrome
- *     { type: 'save-skill',    payload: { name, description, steps, overwrite? } }
  *     { type: 'save-spec',     payload: { name, description, steps, assertions?, overwrite? } }
  *     { type: 'save-case-csv', payload: { name, description, steps, assertions?, jiraProjectKey?, labels?, overwrite? } }
- *     { type: 'list-skills' }
  *     { type: 'list-specs' }                                            // ask for every spec under __vibe_tests__/, with parsed JSDoc headers
+ *     { type: 'list-seeds' }                                            // ask for built-in + .hover/rules/ translation seeds (read-only)
  *     { type: 'list-agents' }                                          // ask for the full agent registry + install status
  *     { type: 'switch-agent',  payload: { agentId } }                  // set the service's current agent; broadcasts to all connections
  *
@@ -49,19 +46,29 @@
  *     { type: 'list-modes' }
  */
 import { WebSocketServer, WebSocket } from 'ws';
-import { invokeAgent } from './agents/invoke.js';
+import { fileURLToPath } from 'node:url';
+import { dirname, resolve } from 'node:path';
+import { runSession } from './runSession.js';
+import { readConventions } from './service/conventions.js';
+import { optimizeSpecWithAgent } from './specs/optimizeSpecWithAgent.js';
+import { promoteOptimized, discardOptimized } from './specs/optimizeSpec.js';
 import { listAgentAvailability, pickPrimaryAgent, } from './agents/detect.js';
 import { getAgent } from './agents/registry.js';
 import { getPreflight, invalidatePreflight } from './playwright/preflightCache.js';
-import { resolveMcpConfig } from './playwright/resolveMcpConfig.js';
+import { resolveMcpConfig, mcpToolPrefix } from './playwright/resolveMcpConfig.js';
 import { launchDebugChrome } from './playwright/launchChrome.js';
-import { listSkills } from './skills/writeSkill.js';
 import { listSpecs } from './specs/listSpecs.js';
+import { readSeeds, BUILTIN_SEEDS } from './specs/seeds.js';
 import { send, sendIfOpen } from './service/types.js';
 import { buildCdpHint, buildCdpHintResume } from './service/cdpHint.js';
 import { handleCheckCdp, handleLaunchChrome, handleFocusDebug, } from './service/cdpHandlers.js';
-import { handleSaveArtifact, SKILL_CONFIG, SPEC_CONFIG, CASE_CSV_CONFIG, } from './service/saveHandlers.js';
+import { handleSaveArtifact, SPEC_CONFIG, CASE_CSV_CONFIG, } from './service/saveHandlers.js';
 import { CURRENT_API_VERSION, } from './plugin-api.js';
+/** The source-reader MCP server (codeContext). Id → the `mcp__hover_source`
+ *  tool prefix; script path resolved relative to this module so it works from
+ *  dist/. Spawned only when codeContext is enabled. */
+const SOURCE_MCP_ID = 'hover-source';
+const SOURCE_MCP_SCRIPT = resolve(dirname(fileURLToPath(import.meta.url)), 'mcp', 'sourceServer.js');
 // ClientMessage + send moved to ./service/types.ts so the cdp + save
 // handler modules can share them. See those files for the wire shape.
 const PROTOCOL_VERSION = 1;
@@ -129,6 +136,11 @@ export async function startService(opts) {
     const preferred = opts.agentId ?? process.env.HOVER_AGENT;
     const primary = await pickPrimaryAgent(preferred);
     let currentAgentId = primary?.descriptor.id ?? preferred ?? 'claude';
+    // Optional model API key the widget supplied (set-api-key). Held in memory
+    // for this service's lifetime only — never written to disk, never logged.
+    // Injected into the spawned CLI's env so a user without a logged-in
+    // subscription can drive Hover on their own key.
+    let currentApiKey = process.env.ANTHROPIC_API_KEY ?? process.env.OPENAI_API_KEY ?? undefined;
     if (!primary) {
         // Nothing installed — still bind so the widget can show a helpful
         // "install one of these" dialog. Commands will fail with
@@ -147,6 +159,7 @@ export async function startService(opts) {
     // so the user can hit Stop when they've seen enough. Pass maxBudgetUsd
     // explicitly (or via the Vite plugin option) if a hard ceiling is needed.
     const maxBudgetUsd = opts.maxBudgetUsd;
+    const optimizeMode = opts.optimizeMode ?? 'suggest';
     const cdpUrl = opts.cdpUrl ?? 'http://localhost:9222';
     const devRoot = opts.devRoot ?? process.cwd();
     const wss = await pickAndBind('127.0.0.1', requestedPort, PORT_RETRIES);
@@ -189,6 +202,15 @@ export async function startService(opts) {
                 }
             }
         }
+        // codeContext (opt-in, all modes): the fenced read-only source reader.
+        if (opts.codeContext) {
+            extra.push({
+                id: SOURCE_MCP_ID,
+                command: process.execPath,
+                args: [SOURCE_MCP_SCRIPT],
+                env: { HOVER_PROJECT_ROOT: devRoot },
+            });
+        }
         // Single-Chrome model: the Playwright MCP always points at the one debug
         // Chrome on the normal cdpUrl. (Pre-single-Chrome this branched to a
         // mode-specific port like 9333; there's no second Chrome anymore.)
@@ -233,6 +255,25 @@ export async function startService(opts) {
     }
     /** id of the currently-active mode, or null for normal (unmoded) mode. */
     let currentModeId = null;
+    /**
+     * The single in-flight agent run, held at SERVICE scope (not per-connection)
+     * so it SURVIVES the widget's WS dropping. The widget lives in the page the
+     * agent drives, so any agent navigation (a pentest payload in the URL, an
+     * HMR reload) tears the widget down and closes its socket — but the agent is
+     * still happily driving the tab over CDP and recording findings server-side.
+     * Killing it on every navigation made pentest mode (which navigates
+     * constantly) unusable. Instead: detach on close, keep streaming to whichever
+     * ws is attached, and only abort if no widget reconnects within the grace
+     * window. Single active run — Hover binds 127.0.0.1 for one local user.
+     */
+    const RECONNECT_GRACE_MS = 15_000;
+    let activeRun = null;
+    /** Send a run event to whichever ws is currently attached (survives reconnect). */
+    const emitToRun = (msg) => {
+        const c = activeRun?.client;
+        if (c && c.readyState === WebSocket.OPEN)
+            send(c, msg);
+    };
     /** Chrome-proxy settings a plugin's `hover:service:start` hook set on us
      *  (security's resident MITM). RESIDENT for the whole session — set once
      *  before Chrome launches, never cleared on mode change — so the single
@@ -267,6 +308,9 @@ export async function startService(opts) {
             id: p.mode.id,
             label: p.mode.label,
             description: p.mode.description,
+            // Widget retints to this while the mode is engaged (falls back to
+            // security orange in the widget when absent).
+            accent: p.mode.accent,
             pluginName: p.name,
         }));
         const payload = { current: currentModeId, available };
@@ -384,7 +428,7 @@ export async function startService(opts) {
     wss.on('connection', ws => {
         send(ws, {
             type: 'hello',
-            payload: { agentId: currentAgentId, model, version: PROTOCOL_VERSION },
+            payload: { agentId: currentAgentId, model, version: PROTOCOL_VERSION, optimizeMode },
         });
         // Send the agent list as a follow-up event so the widget can render the
         // dropdown immediately on connect / reconnect (e.g. after HMR). The
@@ -404,20 +448,42 @@ export async function startService(opts) {
         // Send the mode catalogue too, so the widget can render the mode
         // toggle immediately. Empty list when no plugins are loaded.
         broadcastModes(ws);
-        let busy = false;
-        let inflight = null;
-        let cancelled = false;
-        // If the page reloads (e.g. AI navigated to a same-origin URL), the WS
-        // connection drops. Abort the in-flight agent so we don't leave an
-        // orphan claude process driving the now-vanished browser tab.
+        // Re-attach to a run that's still in flight (the previous widget dropped —
+        // most commonly the agent navigated and reloaded the page the widget lives
+        // in). Cancel the pending abort, point the run's event stream at this fresh
+        // socket, and tell the widget so it can restore its "running" UI. Without
+        // this the run would be killed on every agent navigation.
+        // Only re-attach during a genuine reconnect GAP (the prior client is gone).
+        // If a live client is still attached, this is a SECOND widget (e.g. the
+        // user's regular tab alongside the debug-Chrome tab — both inject a widget
+        // on the same origin and open their own socket). Seizing the stream would
+        // silence the first widget and let the second's close abort a healthy run,
+        // so leave a second concurrent widget in idle UI rather than hijacking.
+        if (activeRun && activeRun.client === null) {
+            if (activeRun.graceTimer) {
+                clearTimeout(activeRun.graceTimer);
+                activeRun.graceTimer = null;
+            }
+            activeRun.client = ws;
+            send(ws, { type: 'run-active', payload: { prompt: activeRun.prompt } });
+        }
+        // If the widget's socket closes while a run it owns is in flight, DON'T
+        // abort — the agent is still driving the tab over CDP. Detach this ws and
+        // start a grace window; a reconnecting widget (above) cancels the abort.
+        // Only if nobody comes back do we abort, so we still never leave an orphan.
         ws.on('close', () => {
-            inflight?.abort();
+            if (activeRun && activeRun.client === ws) {
+                activeRun.client = null;
+                activeRun.graceTimer = setTimeout(() => {
+                    activeRun?.abort.abort();
+                }, RECONNECT_GRACE_MS);
+            }
         });
         const cancel = () => {
-            if (!busy)
+            if (!activeRun)
                 return;
-            cancelled = true;
-            inflight?.abort();
+            activeRun.cancelled = true;
+            activeRun.abort.abort();
             // Send a synthetic session_end so the widget resets to idle immediately.
             // The for-await loop below short-circuits on `cancelled`, so no events
             // from the dying child will arrive after this.
@@ -427,7 +493,7 @@ export async function startService(opts) {
             // stays false because the agent didn't fail: the user chose to
             // end the run. The widget renders this as a neutral "Stopped"
             // state rather than a red Failed card.
-            send(ws, {
+            emitToRun({
                 type: 'event',
                 payload: {
                     kind: 'session_end',
@@ -454,7 +520,7 @@ export async function startService(opts) {
                 return;
             }
             if (msg.type === 'set-mode') {
-                if (busy) {
+                if (activeRun) {
                     send(ws, {
                         type: 'error',
                         payload: { message: 'set-mode: a command is already running; stop it first' },
@@ -509,7 +575,7 @@ export async function startService(opts) {
                 // Refuse to switch mid-flight; the user's running command would
                 // otherwise outlive its own descriptor and the events it produces
                 // would be parsed against the wrong wire format.
-                if (busy) {
+                if (activeRun) {
                     send(ws, {
                         type: 'error',
                         payload: { message: 'switch-agent: a command is already running; stop it first' },
@@ -531,13 +597,14 @@ export async function startService(opts) {
                 await broadcastAgents();
                 return;
             }
-            if (msg.type === 'save-skill') {
-                await handleSaveArtifact(ws, msg, devRoot, SKILL_CONFIG);
-                return;
-            }
-            if (msg.type === 'list-skills') {
-                const skills = await listSkills(devRoot);
-                send(ws, { type: 'skills-list', payload: { skills } });
+            if (msg.type === 'set-api-key') {
+                // The widget supplies (or clears) a model API key. Stored in memory
+                // only and injected into the spawned CLI's env at invoke time — never
+                // persisted, never logged, never echoed back. Empty/missing clears it.
+                const key = msg.payload?.key;
+                currentApiKey = typeof key === 'string' && key.trim() ? key.trim() : undefined;
+                const envVar = getAgent(currentAgentId)?.apiKeyEnv;
+                send(ws, { type: 'api-key-status', payload: { hasKey: !!currentApiKey, envVar } });
                 return;
             }
             if (msg.type === 'list-specs') {
@@ -549,6 +616,21 @@ export async function startService(opts) {
                 send(ws, { type: 'specs-list', payload: { specs } });
                 return;
             }
+            if (msg.type === 'list-seeds') {
+                // Widget's Seeds tab: show which translation seeds Hover sees — the
+                // built-in set + whatever the user dropped in <devRoot>/.hover/rules/.
+                // Read-only; users add seeds by hand (no download path).
+                const builtinNames = new Set(BUILTIN_SEEDS.map(s => s.name));
+                const seeds = (await readSeeds(devRoot)).map(s => ({
+                    name: s.name,
+                    note: s.note ?? '',
+                    signature: s.signature,
+                    code: s.example?.code ?? '',
+                    source: builtinNames.has(s.name) ? 'builtin' : 'project',
+                }));
+                send(ws, { type: 'seeds-list', payload: { seeds } });
+                return;
+            }
             if (msg.type === 'save-spec') {
                 await handleSaveArtifact(ws, msg, devRoot, SPEC_CONFIG);
                 return;
@@ -557,6 +639,55 @@ export async function startService(opts) {
                 await handleSaveArtifact(ws, msg, devRoot, CASE_CSV_CONFIG);
                 return;
             }
+            // Stage 7 (F7) widget flow: optimize a saved spec, then promote/discard
+            // the candidate after the human reviews the diff. optimizeSpecWithAgent
+            // spawns the codegen LLM (no browser, no MCP); the original spec is never
+            // touched until an explicit promote.
+            if (msg.type === 'optimize-spec') {
+                const slug = msg.payload?.slug;
+                if (typeof slug !== 'string' || !slug) {
+                    send(ws, { type: 'error', payload: { message: 'optimize-spec: slug is required' } });
+                    return;
+                }
+                try {
+                    const res = await optimizeSpecWithAgent(devRoot, slug, {
+                        agentId: currentAgentId, model, maxBudgetUsd, apiKey: currentApiKey,
+                    });
+                    send(ws, { type: 'optimize-result', payload: { slug, original: res.original, candidate: res.code } });
+                }
+                catch (err) {
+                    const reason = err instanceof Error ? err.message : String(err);
+                    send(ws, { type: 'optimize-failed', payload: { slug, reason } });
+                }
+                return;
+            }
+            if (msg.type === 'promote-optimized') {
+                const slug = msg.payload?.slug;
+                if (typeof slug !== 'string' || !slug) {
+                    send(ws, { type: 'error', payload: { message: 'promote-optimized: slug is required' } });
+                    return;
+                }
+                try {
+                    const path = await promoteOptimized(devRoot, slug);
+                    send(ws, { type: 'optimized-promoted', payload: { slug, path } });
+                    send(ws, { type: 'specs-list', payload: { specs: await listSpecs(devRoot) } });
+                }
+                catch (err) {
+                    const m = err instanceof Error ? err.message : String(err);
+                    send(ws, { type: 'error', payload: { message: `promote-optimized: ${m}` } });
+                }
+                return;
+            }
+            if (msg.type === 'discard-optimized') {
+                const slug = msg.payload?.slug;
+                if (typeof slug !== 'string' || !slug) {
+                    send(ws, { type: 'error', payload: { message: 'discard-optimized: slug is required' } });
+                    return;
+                }
+                await discardOptimized(devRoot, slug);
+                send(ws, { type: 'optimized-discarded', payload: { slug } });
+                return;
+            }
             // v0.12 — plugin-contributed save handlers. Lookup is O(plugins),
             // which is fine because there's at most a handful of plugins ever
             // loaded. Each plugin's manifest declares `saveHandlers[].type`
@@ -609,36 +740,31 @@ export async function startService(opts) {
                 ? msg.payload.sessionId
                 : undefined;
             // Re-record mode: when the client (widget Specs tab or hover CLI)
-            // passes `reRecord: { slug }`, we collect tool_use events server-side
-            // into a SkillStep[] and, on session_end with no error, overwrite the
-            // existing __vibe_tests__/<slug>.spec.ts. This is the same flow the
-            // widget uses for "Save as Spec", but the spec already exists and is
-            // being regenerated for the current UI.
+            // passes `reRecord: { slug }`, runSession collects the tool_use events
+            // into a SpecStep[] and, on a clean finish, we overwrite the existing
+            // __vibe_tests__/<slug>.spec.ts. Same flow the widget uses for "Save as
+            // Spec", but the spec already exists and is being regenerated for the
+            // current UI.
             const reRecordSlug = msg.payload && typeof msg.payload === 'object' && 'reRecord' in msg.payload
                 ? msg.payload.reRecord?.slug
                 : undefined;
             if (typeof text !== 'string' || !text.trim())
                 return;
-            if (busy) {
+            if (activeRun) {
                 send(ws, {
                     type: 'error',
-                    payload: { message: 'A command is already running on this connection.' },
+                    payload: { message: 'A command is already running.' },
                 });
                 return;
             }
-            busy = true;
-            cancelled = false;
-            inflight = new AbortController();
-            // Re-record step collector — populated as tool_use events stream by,
-            // consumed at session_end to overwrite the original spec. Empty unless
-            // reRecordSlug is set on this command. We seed with a synthetic
-            // `user` step so writeSpec's JSDoc Original-prompt: line carries the
-            // text the agent was actually given (which is the prompt we read out
-            // of the existing spec — the same one we're regenerating).
-            const reRecordSteps = [];
-            if (reRecordSlug) {
-                reRecordSteps.push({ kind: 'user', text });
-            }
+            const run = {
+                abort: new AbortController(),
+                cancelled: false,
+                client: ws,
+                graceTimer: null,
+                prompt: text,
+            };
+            activeRun = run;
             try {
                 // Build the MCP config first — it's pure local file IO and lets
                 // us assert plugin-contributed servers landed in the config even
@@ -649,13 +775,7 @@ export async function startService(opts) {
                 // Playwright MCP server would silently launch its own Chromium —
                 // and Hover's premise is to drive the user's existing Chrome (with
                 // their dev state, cookies, devtools open), never spawn a fresh one.
-                // In an active mode, the relevant CDP endpoint may be the mode's
-                // own port (e.g. 9333 for security), not the default cdpUrl.
-                const preflightExtras = effectiveLaunchExtras();
-                const preflightCdpUrl = preflightExtras?.cdpPort
-                    ? `http://localhost:${preflightExtras.cdpPort}`
-                    : cdpUrl;
-                const cdp = await getPreflight(preflightCdpUrl);
+                const cdp = await getPreflight(cdpUrl);
                 if (!cdp.ok) {
                     send(ws, {
                         type: 'event',
@@ -682,6 +802,15 @@ export async function startService(opts) {
                 let appendSystemPrompt = resumeSessionId
                     ? buildCdpHintResume(cdp.tabs)
                     : buildCdpHint(cdp.tabs);
+                // Knowledge layer (F5): on the first turn, fold in the project's
+                // .hover/conventions.md (static, like cdpHint's rules — skipped on
+                // resume to keep the prompt cache intact). The service reads the file;
+                // the agent never gains filesystem access (D2).
+                if (!resumeSessionId) {
+                    const conventions = await readConventions(devRoot);
+                    if (conventions)
+                        appendSystemPrompt = `${appendSystemPrompt}\n\n${conventions}`;
+                }
                 // Add plugin-contributed prompt additions whose scope includes the
                 // current mode (or '*' for always-on). Walks ALL loaded plugins,
                 // not just the active-mode plugin — a plugin that contributes
@@ -701,6 +830,13 @@ export async function startService(opts) {
                         }
                     }
                 }
+                // codeContext: tell the agent the fenced source reader exists, so it
+                // proactively reads the real code (better selectors/routes when
+                // authoring; white-box confirmation when probing) instead of only
+                // guessing from the rendered DOM.
+                if (opts.codeContext) {
+                    appendSystemPrompt = `${appendSystemPrompt}\n\nYou also have read-only access to this project's source via mcp__hover_source (read_source / list_source), fenced to the repo (secrets, keys, .env, .git, node_modules and build output are refused). Use it to read the actual component / route / API code — write tests against the real selectors and, when probing for security issues, confirm a finding against the server code (the query, the authz check) rather than guessing from the page alone.`;
+                }
                 // Mirror the prompt's language in the agent's *prose* output — the
                 // verification summary (Result card), the ## Findings block, and the
                 // step narration — the same way Voice mode mirrors it in TTS. A
@@ -714,16 +850,9 @@ export async function startService(opts) {
                 }
                 // Snapshot the agent id so a switch-agent message during the run
                 // can't smear two agents across one invocation. (We also gate
-                // switch-agent on `busy`, but defense in depth.)
+                // switch-agent on an active run, but defense in depth.) runSession gates
+                // the allow/deny lists on the agent's sandboxStrength internally.
                 const invokedAgentId = currentAgentId;
-                const invokedDescriptor = getAgent(invokedAgentId);
-                // Only Claude's `--allowedTools`/`--disallowedTools` flags are
-                // honoured — passing them to a soft-sandbox agent like codex is a
-                // no-op (its buildArgs ignores them). We still gate at the service
-                // layer for clarity: a hard-sandbox agent gets the tight allowlist,
-                // a soft one gets nothing and relies on its descriptor's built-in
-                // sandbox flags + developer_instructions.
-                const isHardSandbox = invokedDescriptor?.sandboxStrength === 'hard';
                 // Active mode's plugin-contributed MCP server ids — added to the
                 // hard-sandbox allow list so Claude can actually call them. Claude
                 // sanitises non-alphanumeric chars in the id when forming tool
@@ -731,99 +860,77 @@ export async function startService(opts) {
                 // and `--allowedTools mcp__foo` matches every tool under that
                 // prefix. We pass the prefix `mcp__<sanitized>` so all of the
                 // server's tools are reachable.
-                const sanitize = (s) => s.replace(/[^a-zA-Z0-9]+/g, '_').replace(/^_+|_+$/g, '');
                 const activePluginMcpIds = [];
                 if (currentModeId) {
                     for (const p of plugins) {
                         for (const srv of p.mcpServers ?? []) {
                             const scope = srv.activeInModes ?? (p.mode ? [p.mode.id] : []);
                             if (scope.includes('*') || scope.includes(currentModeId)) {
-                                activePluginMcpIds.push(`mcp__${sanitize(srv.id)}`);
+                                activePluginMcpIds.push(mcpToolPrefix(srv.id));
                             }
                         }
                     }
                 }
-                for await (const ev of invokeAgent({
+                // codeContext: the fenced source reader is allowed in every mode.
+                if (opts.codeContext)
+                    activePluginMcpIds.push(mcpToolPrefix(SOURCE_MCP_ID));
+                const runResult = await runSession({
                     agentId: invokedAgentId,
                     prompt: text,
                     sessionId: resumeSessionId,
                     mcpConfig,
-                    // cwd = devRoot so Claude Code auto-discovers `.claude/skills/`
-                    // saved from this project (and CLAUDE.md, if any).
+                    // cwd = devRoot so the agent runs against the project (and Claude
+                    // Code reads its CLAUDE.md, if any).
                     cwd: devRoot,
                     appendSystemPrompt,
-                    // Skill stays in the allow list so saved skills under
-                    // <devRoot>/.claude/skills/ can be invoked. mcp__playwright covers
-                    // every browser tool. Plugin-contributed MCPs are appended when
-                    // the corresponding mode is active.
-                    allowedTools: isHardSandbox
-                        ? ['mcp__playwright', 'Skill', ...activePluginMcpIds]
-                        : undefined,
-                    disallowedTools: isHardSandbox
-                        ? (invokedDescriptor?.defaultDisallowedTools
-                            ? [...invokedDescriptor.defaultDisallowedTools]
-                            : undefined)
-                        : undefined,
+                    // mcp__playwright covers every browser tool; active-mode plugin MCP
+                    // servers are appended. (Save-as-Skill retired → no Skill tool.)
+                    allowedToolsExtra: activePluginMcpIds,
                     maxBudgetUsd,
                     model,
-                    signal: inflight.signal,
-                })) {
-                    if (cancelled || ws.readyState !== WebSocket.OPEN)
+                    apiKey: currentApiKey,
+                    signal: run.abort.signal,
+                }, (ev) => {
+                    // Stream to whichever ws is attached NOW — survives the widget
+                    // reconnecting mid-run (emitToRun is a no-op during a reconnect gap).
+                    if (run.cancelled)
                         return;
-                    send(ws, { type: 'event', payload: ev });
-                    // Re-record collection. Mirror what widget client.js does on the
-                    // way past tool_use events: accumulate into a SkillStep[] so we
-                    // can write a fresh spec when the session ends. We do this only
-                    // when this command was launched in re-record mode; ordinary
-                    // commands don't need server-side step retention (widget owns
-                    // that for normal saves).
-                    if (reRecordSlug && ev.kind === 'tool_use') {
-                        reRecordSteps.push({
-                            kind: 'step',
-                            tool: ev.tool,
-                            input: ev.input,
+                    emitToRun({ type: 'event', payload: ev });
+                });
+                // Re-record: write a fresh spec from the steps runSession accumulated
+                // (`user` → `step`* → `done`). Only on a clean, non-cancelled finish —
+                // a cancelled/aborted run throws out of runSession into the catch
+                // below, and an errored agent leaves the original spec untouched.
+                if (reRecordSlug && !run.cancelled) {
+                    if (runResult.isError) {
+                        emitToRun({
+                            type: 'error',
+                            payload: {
+                                message: `Re-record failed: ${runResult.summary || 'agent reported an error'}. ` +
+                                    `Original spec left unchanged.`,
+                            },
                         });
                     }
-                    if (reRecordSlug && ev.kind === 'session_end') {
-                        // Cancelled or errored runs: don't overwrite — the existing
-                        // spec is still valid. Tell the client what happened.
-                        if (ev.isError) {
-                            sendIfOpen(ws, {
-                                type: 'error',
-                                payload: {
-                                    message: `Re-record failed: ${ev.summary ?? 'agent reported an error'}. ` +
-                                        `Original spec left unchanged.`,
-                                },
+                    else {
+                        try {
+                            const { writeSpec } = await import('./specs/writeSpec.js');
+                            const written = await writeSpec({
+                                devRoot,
+                                name: reRecordSlug,
+                                steps: runResult.steps,
+                                overwrite: true,
+                            });
+                            emitToRun({
+                                type: 'spec-saved',
+                                payload: { name: reRecordSlug, path: written.path },
                             });
                         }
-                        else {
-                            // Snapshot the agent's final summary into a synthetic `done`
-                            // step so writeSpec's `Outcome:` header reflects the new run.
-                            if (ev.summary) {
-                                reRecordSteps.push({ kind: 'done', summary: ev.summary });
-                            }
-                            // Overwrite. writeSpec uses the slug to name the file; we
-                            // pass the original slug verbatim so the path is stable.
-                            try {
-                                const { writeSpec } = await import('./specs/writeSpec.js');
-                                const result = await writeSpec({
-                                    devRoot,
-                                    name: reRecordSlug,
-                                    steps: reRecordSteps,
-                                    overwrite: true,
-                                });
-                                sendIfOpen(ws, {
-                                    type: 'spec-saved',
-                                    payload: { name: reRecordSlug, path: result.path },
-                                });
-                            }
-                            catch (e) {
-                                const m = e instanceof Error ? e.message : String(e);
-                                sendIfOpen(ws, {
-                                    type: 'error',
-                                    payload: { message: `Re-record could not write spec: ${m}` },
-                                });
-                            }
+                        catch (e) {
+                            const m = e instanceof Error ? e.message : String(e);
+                            emitToRun({
+                                type: 'error',
+                                payload: { message: `Re-record could not write spec: ${m}` },
+                            });
                         }
                     }
                 }
@@ -835,30 +942,25 @@ export async function startService(opts) {
                 // widget to reconcile two terminal events for one run. CDP isn't
                 // suspect either — the user just stopped — so skip preflight
                 // invalidation too.
-                if (!cancelled) {
+                if (!run.cancelled) {
                     const message = err instanceof Error ? err.message : String(err);
                     const errorEvent = {
                         kind: 'session_end',
                         isError: true,
                         summary: message,
                     };
-                    sendIfOpen(ws, { type: 'event', payload: errorEvent });
+                    emitToRun({ type: 'event', payload: errorEvent });
                     // Force the next command to re-probe CDP. The error could be from
                     // Chrome dying, MCP spawning a stray Chromium, the user closing
                     // their debug window — anything that would make a cached "all
-                    // healthy" result lie. Invalidate the mode-effective URL (see
-                    // preflightCdpUrl above) — not the static cdpUrl — so security
-                    // mode invalidations don't no-op against the default port.
-                    const invalExtras = effectiveLaunchExtras();
-                    const invalCdpUrl = invalExtras?.cdpPort
-                        ? `http://localhost:${invalExtras.cdpPort}`
-                        : cdpUrl;
-                    invalidatePreflight(invalCdpUrl);
+                    // healthy" result lie.
+                    invalidatePreflight(cdpUrl);
                 }
             }
             finally {
-                busy = false;
-                inflight = null;
+                if (run.graceTimer)
+                    clearTimeout(run.graceTimer);
+                activeRun = null;
             }
         });
     });
@@ -917,6 +1019,20 @@ export async function startService(opts) {
     return {
         port,
         async close() {
+            // Kill any in-flight run FIRST. The run is held at service scope and is
+            // only torn down by aborting its signal (invoke.ts SIGTERMs the agent
+            // child on abort). wss.close() below stops the listener but does NOT
+            // terminate established client sockets, so no ws.on('close') fires — so
+            // without this the agent child would keep driving the debug Chrome as an
+            // orphan after the dev server is gone, and a pending grace timer would
+            // fire abort() 15s into the void.
+            if (activeRun) {
+                if (activeRun.graceTimer)
+                    clearTimeout(activeRun.graceTimer);
+                activeRun.cancelled = true;
+                activeRun.abort.abort();
+                activeRun = null;
+            }
             // Deactivate the active mode first, then run every plugin's
             // shutdown hook (regardless of which mode is active — a plugin may
             // own background state even outside its mode). Best-effort: log