@hover-dev/core 0.14.1 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/README.md +73 -1
  2. package/dist/agents/aider.d.ts.map +1 -1
  3. package/dist/agents/aider.js +6 -14
  4. package/dist/agents/claude.d.ts.map +1 -1
  5. package/dist/agents/claude.js +14 -0
  6. package/dist/agents/codex.d.ts.map +1 -1
  7. package/dist/agents/codex.js +10 -4
  8. package/dist/agents/cursor.d.ts.map +1 -1
  9. package/dist/agents/cursor.js +8 -17
  10. package/dist/agents/gemini.d.ts.map +1 -1
  11. package/dist/agents/gemini.js +3 -14
  12. package/dist/agents/invoke.d.ts.map +1 -1
  13. package/dist/agents/invoke.js +10 -1
  14. package/dist/agents/qwen.d.ts.map +1 -1
  15. package/dist/agents/qwen.js +3 -14
  16. package/dist/agents/shared.d.ts +28 -0
  17. package/dist/agents/shared.d.ts.map +1 -0
  18. package/dist/agents/shared.js +35 -0
  19. package/dist/agents/types.d.ts +11 -0
  20. package/dist/agents/types.d.ts.map +1 -1
  21. package/dist/mcp/sourceFence.d.ts +23 -0
  22. package/dist/mcp/sourceFence.d.ts.map +1 -0
  23. package/dist/mcp/sourceFence.js +75 -0
  24. package/dist/mcp/sourceServer.d.ts +3 -0
  25. package/dist/mcp/sourceServer.d.ts.map +1 -0
  26. package/dist/mcp/sourceServer.js +116 -0
  27. package/dist/playwright/preflight.d.ts.map +1 -1
  28. package/dist/playwright/preflight.js +6 -1
  29. package/dist/playwright/raiseWindow.d.ts.map +1 -1
  30. package/dist/playwright/raiseWindow.js +22 -3
  31. package/dist/playwright/resolveMcpConfig.d.ts +11 -0
  32. package/dist/playwright/resolveMcpConfig.d.ts.map +1 -1
  33. package/dist/playwright/resolveMcpConfig.js +17 -3
  34. package/dist/plugin-api.d.ts +7 -0
  35. package/dist/plugin-api.d.ts.map +1 -1
  36. package/dist/runSession.d.ts +42 -0
  37. package/dist/runSession.d.ts.map +1 -0
  38. package/dist/runSession.js +81 -0
  39. package/dist/service/cdpHandlers.d.ts +3 -7
  40. package/dist/service/cdpHandlers.d.ts.map +1 -1
  41. package/dist/service/cdpHandlers.js +4 -16
  42. package/dist/service/cdpHint.d.ts.map +1 -1
  43. package/dist/service/cdpHint.js +30 -14
  44. package/dist/service/conventions.d.ts +8 -0
  45. package/dist/service/conventions.d.ts.map +1 -0
  46. package/dist/service/conventions.js +42 -0
  47. package/dist/service/saveHandlers.d.ts +10 -13
  48. package/dist/service/saveHandlers.d.ts.map +1 -1
  49. package/dist/service/saveHandlers.js +9 -25
  50. package/dist/service/types.d.ts +5 -0
  51. package/dist/service/types.d.ts.map +1 -1
  52. package/dist/service.d.ts +13 -4
  53. package/dist/service.d.ts.map +1 -1
  54. package/dist/service.js +264 -148
  55. package/dist/skills/writeSkill.d.ts +12 -35
  56. package/dist/skills/writeSkill.d.ts.map +1 -1
  57. package/dist/skills/writeSkill.js +10 -166
  58. package/dist/specs/detectSharedFlows.d.ts +35 -0
  59. package/dist/specs/detectSharedFlows.d.ts.map +1 -0
  60. package/dist/specs/detectSharedFlows.js +171 -0
  61. package/dist/specs/extractPageObjects.d.ts +18 -0
  62. package/dist/specs/extractPageObjects.d.ts.map +1 -0
  63. package/dist/specs/extractPageObjects.js +98 -0
  64. package/dist/specs/generatePageObject.d.ts +29 -0
  65. package/dist/specs/generatePageObject.d.ts.map +1 -0
  66. package/dist/specs/generatePageObject.js +149 -0
  67. package/dist/specs/listSpecs.d.ts +12 -0
  68. package/dist/specs/listSpecs.d.ts.map +1 -1
  69. package/dist/specs/listSpecs.js +27 -2
  70. package/dist/specs/optimizationSuggestion.d.ts +26 -0
  71. package/dist/specs/optimizationSuggestion.d.ts.map +1 -0
  72. package/dist/specs/optimizationSuggestion.js +28 -0
  73. package/dist/specs/optimizeSpec.d.ts +42 -0
  74. package/dist/specs/optimizeSpec.d.ts.map +1 -0
  75. package/dist/specs/optimizeSpec.js +188 -0
  76. package/dist/specs/optimizeSpecWithAgent.d.ts +11 -0
  77. package/dist/specs/optimizeSpecWithAgent.d.ts.map +1 -0
  78. package/dist/specs/optimizeSpecWithAgent.js +40 -0
  79. package/dist/specs/pageObjectManifest.d.ts +20 -0
  80. package/dist/specs/pageObjectManifest.d.ts.map +1 -0
  81. package/dist/specs/pageObjectManifest.js +40 -0
  82. package/dist/specs/seeds.d.ts +36 -0
  83. package/dist/specs/seeds.d.ts.map +1 -0
  84. package/dist/specs/seeds.js +74 -0
  85. package/dist/specs/sidecar.d.ts +25 -0
  86. package/dist/specs/sidecar.d.ts.map +1 -0
  87. package/dist/specs/sidecar.js +38 -0
  88. package/dist/specs/softBatch.d.ts +14 -0
  89. package/dist/specs/softBatch.d.ts.map +1 -0
  90. package/dist/specs/softBatch.js +177 -0
  91. package/dist/specs/text.d.ts +17 -0
  92. package/dist/specs/text.d.ts.map +1 -0
  93. package/dist/specs/text.js +24 -0
  94. package/dist/specs/writeCaseCsv.d.ts.map +1 -1
  95. package/dist/specs/writeCaseCsv.js +2 -8
  96. package/dist/specs/writeSpec.d.ts +50 -0
  97. package/dist/specs/writeSpec.d.ts.map +1 -1
  98. package/dist/specs/writeSpec.js +251 -84
  99. package/package.json +5 -3
package/dist/service.js CHANGED
@@ -10,10 +10,8 @@
10
10
  * { type: 'hello', payload: { agentId, model, version } }
11
11
  * { type: 'event', payload: InvokeEvent } // see agents/types.ts
12
12
  * { type: 'cdp-status', payload: { state, reason?, matchingTabUrl?, browser?, launching? } }
13
- * { type: 'skill-saved', payload: { name, path } }
14
- * { type: 'skill-exists', payload: { slug, existingPath } }
15
- * { type: 'skills-list', payload: { skills: SkillSummary[] } }
16
13
  * { type: 'specs-list', payload: { specs: SpecSummary[] } }
14
+ * { type: 'seeds-list', payload: { seeds: { name, note, signature, code, source }[] } }
17
15
  * { type: 'spec-saved', payload: { name, path } }
18
16
  * { type: 'spec-exists', payload: { slug, existingPath } }
19
17
  * { type: 'case-csv-saved', payload: { name, path } }
@@ -31,11 +29,10 @@
31
29
  * { type: 'check-cdp', payload: { pageUrl } } // "is this widget in the debug Chrome?"
32
30
  * { type: 'launch-chrome', payload: { pageUrl } } // start debug Chrome, navigate to pageUrl
33
31
  * { type: 'focus-debug', payload: { pageUrl } } // bringToFront the matching tab in debug Chrome
34
- * { type: 'save-skill', payload: { name, description, steps, overwrite? } }
35
32
  * { type: 'save-spec', payload: { name, description, steps, assertions?, overwrite? } }
36
33
  * { type: 'save-case-csv', payload: { name, description, steps, assertions?, jiraProjectKey?, labels?, overwrite? } }
37
- * { type: 'list-skills' }
38
34
  * { type: 'list-specs' } // ask for every spec under __vibe_tests__/, with parsed JSDoc headers
35
+ * { type: 'list-seeds' } // ask for built-in + .hover/rules/ translation seeds (read-only)
39
36
  * { type: 'list-agents' } // ask for the full agent registry + install status
40
37
  * { type: 'switch-agent', payload: { agentId } } // set the service's current agent; broadcasts to all connections
41
38
  *
@@ -49,19 +46,29 @@
49
46
  * { type: 'list-modes' }
50
47
  */
51
48
  import { WebSocketServer, WebSocket } from 'ws';
52
- import { invokeAgent } from './agents/invoke.js';
49
+ import { fileURLToPath } from 'node:url';
50
+ import { dirname, resolve } from 'node:path';
51
+ import { runSession } from './runSession.js';
52
+ import { readConventions } from './service/conventions.js';
53
+ import { optimizeSpecWithAgent } from './specs/optimizeSpecWithAgent.js';
54
+ import { promoteOptimized, discardOptimized } from './specs/optimizeSpec.js';
53
55
  import { listAgentAvailability, pickPrimaryAgent, } from './agents/detect.js';
54
56
  import { getAgent } from './agents/registry.js';
55
57
  import { getPreflight, invalidatePreflight } from './playwright/preflightCache.js';
56
- import { resolveMcpConfig } from './playwright/resolveMcpConfig.js';
58
+ import { resolveMcpConfig, mcpToolPrefix } from './playwright/resolveMcpConfig.js';
57
59
  import { launchDebugChrome } from './playwright/launchChrome.js';
58
- import { listSkills } from './skills/writeSkill.js';
59
60
  import { listSpecs } from './specs/listSpecs.js';
61
+ import { readSeeds, BUILTIN_SEEDS } from './specs/seeds.js';
60
62
  import { send, sendIfOpen } from './service/types.js';
61
63
  import { buildCdpHint, buildCdpHintResume } from './service/cdpHint.js';
62
64
  import { handleCheckCdp, handleLaunchChrome, handleFocusDebug, } from './service/cdpHandlers.js';
63
- import { handleSaveArtifact, SKILL_CONFIG, SPEC_CONFIG, CASE_CSV_CONFIG, } from './service/saveHandlers.js';
65
+ import { handleSaveArtifact, SPEC_CONFIG, CASE_CSV_CONFIG, } from './service/saveHandlers.js';
64
66
  import { CURRENT_API_VERSION, } from './plugin-api.js';
67
+ /** The source-reader MCP server (codeContext). Id → the `mcp__hover_source`
68
+ * tool prefix; script path resolved relative to this module so it works from
69
+ * dist/. Spawned only when codeContext is enabled. */
70
+ const SOURCE_MCP_ID = 'hover-source';
71
+ const SOURCE_MCP_SCRIPT = resolve(dirname(fileURLToPath(import.meta.url)), 'mcp', 'sourceServer.js');
65
72
  // ClientMessage + send moved to ./service/types.ts so the cdp + save
66
73
  // handler modules can share them. See those files for the wire shape.
67
74
  const PROTOCOL_VERSION = 1;
@@ -129,6 +136,11 @@ export async function startService(opts) {
129
136
  const preferred = opts.agentId ?? process.env.HOVER_AGENT;
130
137
  const primary = await pickPrimaryAgent(preferred);
131
138
  let currentAgentId = primary?.descriptor.id ?? preferred ?? 'claude';
139
+ // Optional model API key the widget supplied (set-api-key). Held in memory
140
+ // for this service's lifetime only — never written to disk, never logged.
141
+ // Injected into the spawned CLI's env so a user without a logged-in
142
+ // subscription can drive Hover on their own key.
143
+ let currentApiKey = process.env.ANTHROPIC_API_KEY ?? process.env.OPENAI_API_KEY ?? undefined;
132
144
  if (!primary) {
133
145
  // Nothing installed — still bind so the widget can show a helpful
134
146
  // "install one of these" dialog. Commands will fail with
@@ -147,6 +159,7 @@ export async function startService(opts) {
147
159
  // so the user can hit Stop when they've seen enough. Pass maxBudgetUsd
148
160
  // explicitly (or via the Vite plugin option) if a hard ceiling is needed.
149
161
  const maxBudgetUsd = opts.maxBudgetUsd;
162
+ const optimizeMode = opts.optimizeMode ?? 'suggest';
150
163
  const cdpUrl = opts.cdpUrl ?? 'http://localhost:9222';
151
164
  const devRoot = opts.devRoot ?? process.cwd();
152
165
  const wss = await pickAndBind('127.0.0.1', requestedPort, PORT_RETRIES);
@@ -189,6 +202,15 @@ export async function startService(opts) {
189
202
  }
190
203
  }
191
204
  }
205
+ // codeContext (opt-in, all modes): the fenced read-only source reader.
206
+ if (opts.codeContext) {
207
+ extra.push({
208
+ id: SOURCE_MCP_ID,
209
+ command: process.execPath,
210
+ args: [SOURCE_MCP_SCRIPT],
211
+ env: { HOVER_PROJECT_ROOT: devRoot },
212
+ });
213
+ }
192
214
  // Single-Chrome model: the Playwright MCP always points at the one debug
193
215
  // Chrome on the normal cdpUrl. (Pre-single-Chrome this branched to a
194
216
  // mode-specific port like 9333; there's no second Chrome anymore.)
@@ -233,6 +255,25 @@ export async function startService(opts) {
233
255
  }
234
256
  /** id of the currently-active mode, or null for normal (unmoded) mode. */
235
257
  let currentModeId = null;
258
+ /**
259
+ * The single in-flight agent run, held at SERVICE scope (not per-connection)
260
+ * so it SURVIVES the widget's WS dropping. The widget lives in the page the
261
+ * agent drives, so any agent navigation (a pentest payload in the URL, an
262
+ * HMR reload) tears the widget down and closes its socket — but the agent is
263
+ * still happily driving the tab over CDP and recording findings server-side.
264
+ * Killing it on every navigation made pentest mode (which navigates
265
+ * constantly) unusable. Instead: detach on close, keep streaming to whichever
266
+ * ws is attached, and only abort if no widget reconnects within the grace
267
+ * window. Single active run — Hover binds 127.0.0.1 for one local user.
268
+ */
269
+ const RECONNECT_GRACE_MS = 15_000;
270
+ let activeRun = null;
271
+ /** Send a run event to whichever ws is currently attached (survives reconnect). */
272
+ const emitToRun = (msg) => {
273
+ const c = activeRun?.client;
274
+ if (c && c.readyState === WebSocket.OPEN)
275
+ send(c, msg);
276
+ };
236
277
  /** Chrome-proxy settings a plugin's `hover:service:start` hook set on us
237
278
  * (security's resident MITM). RESIDENT for the whole session — set once
238
279
  * before Chrome launches, never cleared on mode change — so the single
@@ -267,6 +308,9 @@ export async function startService(opts) {
267
308
  id: p.mode.id,
268
309
  label: p.mode.label,
269
310
  description: p.mode.description,
311
+ // Widget retints to this while the mode is engaged (falls back to
312
+ // security orange in the widget when absent).
313
+ accent: p.mode.accent,
270
314
  pluginName: p.name,
271
315
  }));
272
316
  const payload = { current: currentModeId, available };
@@ -384,7 +428,7 @@ export async function startService(opts) {
384
428
  wss.on('connection', ws => {
385
429
  send(ws, {
386
430
  type: 'hello',
387
- payload: { agentId: currentAgentId, model, version: PROTOCOL_VERSION },
431
+ payload: { agentId: currentAgentId, model, version: PROTOCOL_VERSION, optimizeMode },
388
432
  });
389
433
  // Send the agent list as a follow-up event so the widget can render the
390
434
  // dropdown immediately on connect / reconnect (e.g. after HMR). The
@@ -404,20 +448,42 @@ export async function startService(opts) {
404
448
  // Send the mode catalogue too, so the widget can render the mode
405
449
  // toggle immediately. Empty list when no plugins are loaded.
406
450
  broadcastModes(ws);
407
- let busy = false;
408
- let inflight = null;
409
- let cancelled = false;
410
- // If the page reloads (e.g. AI navigated to a same-origin URL), the WS
411
- // connection drops. Abort the in-flight agent so we don't leave an
412
- // orphan claude process driving the now-vanished browser tab.
451
+ // Re-attach to a run that's still in flight (the previous widget dropped —
452
+ // most commonly the agent navigated and reloaded the page the widget lives
453
+ // in). Cancel the pending abort, point the run's event stream at this fresh
454
+ // socket, and tell the widget so it can restore its "running" UI. Without
455
+ // this the run would be killed on every agent navigation.
456
+ // Only re-attach during a genuine reconnect GAP (the prior client is gone).
457
+ // If a live client is still attached, this is a SECOND widget (e.g. the
458
+ // user's regular tab alongside the debug-Chrome tab — both inject a widget
459
+ // on the same origin and open their own socket). Seizing the stream would
460
+ // silence the first widget and let the second's close abort a healthy run,
461
+ // so leave a second concurrent widget in idle UI rather than hijacking.
462
+ if (activeRun && activeRun.client === null) {
463
+ if (activeRun.graceTimer) {
464
+ clearTimeout(activeRun.graceTimer);
465
+ activeRun.graceTimer = null;
466
+ }
467
+ activeRun.client = ws;
468
+ send(ws, { type: 'run-active', payload: { prompt: activeRun.prompt } });
469
+ }
470
+ // If the widget's socket closes while a run it owns is in flight, DON'T
471
+ // abort — the agent is still driving the tab over CDP. Detach this ws and
472
+ // start a grace window; a reconnecting widget (above) cancels the abort.
473
+ // Only if nobody comes back do we abort, so we still never leave an orphan.
413
474
  ws.on('close', () => {
414
- inflight?.abort();
475
+ if (activeRun && activeRun.client === ws) {
476
+ activeRun.client = null;
477
+ activeRun.graceTimer = setTimeout(() => {
478
+ activeRun?.abort.abort();
479
+ }, RECONNECT_GRACE_MS);
480
+ }
415
481
  });
416
482
  const cancel = () => {
417
- if (!busy)
483
+ if (!activeRun)
418
484
  return;
419
- cancelled = true;
420
- inflight?.abort();
485
+ activeRun.cancelled = true;
486
+ activeRun.abort.abort();
421
487
  // Send a synthetic session_end so the widget resets to idle immediately.
422
488
  // The for-await loop below short-circuits on `cancelled`, so no events
423
489
  // from the dying child will arrive after this.
@@ -427,7 +493,7 @@ export async function startService(opts) {
427
493
  // stays false because the agent didn't fail: the user chose to
428
494
  // end the run. The widget renders this as a neutral "Stopped"
429
495
  // state rather than a red Failed card.
430
- send(ws, {
496
+ emitToRun({
431
497
  type: 'event',
432
498
  payload: {
433
499
  kind: 'session_end',
@@ -454,7 +520,7 @@ export async function startService(opts) {
454
520
  return;
455
521
  }
456
522
  if (msg.type === 'set-mode') {
457
- if (busy) {
523
+ if (activeRun) {
458
524
  send(ws, {
459
525
  type: 'error',
460
526
  payload: { message: 'set-mode: a command is already running; stop it first' },
@@ -509,7 +575,7 @@ export async function startService(opts) {
509
575
  // Refuse to switch mid-flight; the user's running command would
510
576
  // otherwise outlive its own descriptor and the events it produces
511
577
  // would be parsed against the wrong wire format.
512
- if (busy) {
578
+ if (activeRun) {
513
579
  send(ws, {
514
580
  type: 'error',
515
581
  payload: { message: 'switch-agent: a command is already running; stop it first' },
@@ -531,13 +597,14 @@ export async function startService(opts) {
531
597
  await broadcastAgents();
532
598
  return;
533
599
  }
534
- if (msg.type === 'save-skill') {
535
- await handleSaveArtifact(ws, msg, devRoot, SKILL_CONFIG);
536
- return;
537
- }
538
- if (msg.type === 'list-skills') {
539
- const skills = await listSkills(devRoot);
540
- send(ws, { type: 'skills-list', payload: { skills } });
600
+ if (msg.type === 'set-api-key') {
601
+ // The widget supplies (or clears) a model API key. Stored in memory
602
+ // only and injected into the spawned CLI's env at invoke time — never
603
+ // persisted, never logged, never echoed back. Empty/missing clears it.
604
+ const key = msg.payload?.key;
605
+ currentApiKey = typeof key === 'string' && key.trim() ? key.trim() : undefined;
606
+ const envVar = getAgent(currentAgentId)?.apiKeyEnv;
607
+ send(ws, { type: 'api-key-status', payload: { hasKey: !!currentApiKey, envVar } });
541
608
  return;
542
609
  }
543
610
  if (msg.type === 'list-specs') {
@@ -549,6 +616,21 @@ export async function startService(opts) {
549
616
  send(ws, { type: 'specs-list', payload: { specs } });
550
617
  return;
551
618
  }
619
+ if (msg.type === 'list-seeds') {
620
+ // Widget's Seeds tab: show which translation seeds Hover sees — the
621
+ // built-in set + whatever the user dropped in <devRoot>/.hover/rules/.
622
+ // Read-only; users add seeds by hand (no download path).
623
+ const builtinNames = new Set(BUILTIN_SEEDS.map(s => s.name));
624
+ const seeds = (await readSeeds(devRoot)).map(s => ({
625
+ name: s.name,
626
+ note: s.note ?? '',
627
+ signature: s.signature,
628
+ code: s.example?.code ?? '',
629
+ source: builtinNames.has(s.name) ? 'builtin' : 'project',
630
+ }));
631
+ send(ws, { type: 'seeds-list', payload: { seeds } });
632
+ return;
633
+ }
552
634
  if (msg.type === 'save-spec') {
553
635
  await handleSaveArtifact(ws, msg, devRoot, SPEC_CONFIG);
554
636
  return;
@@ -557,6 +639,55 @@ export async function startService(opts) {
557
639
  await handleSaveArtifact(ws, msg, devRoot, CASE_CSV_CONFIG);
558
640
  return;
559
641
  }
642
+ // Stage 7 (F7) widget flow: optimize a saved spec, then promote/discard
643
+ // the candidate after the human reviews the diff. optimizeSpecWithAgent
644
+ // spawns the codegen LLM (no browser, no MCP); the original spec is never
645
+ // touched until an explicit promote.
646
+ if (msg.type === 'optimize-spec') {
647
+ const slug = msg.payload?.slug;
648
+ if (typeof slug !== 'string' || !slug) {
649
+ send(ws, { type: 'error', payload: { message: 'optimize-spec: slug is required' } });
650
+ return;
651
+ }
652
+ try {
653
+ const res = await optimizeSpecWithAgent(devRoot, slug, {
654
+ agentId: currentAgentId, model, maxBudgetUsd, apiKey: currentApiKey,
655
+ });
656
+ send(ws, { type: 'optimize-result', payload: { slug, original: res.original, candidate: res.code } });
657
+ }
658
+ catch (err) {
659
+ const reason = err instanceof Error ? err.message : String(err);
660
+ send(ws, { type: 'optimize-failed', payload: { slug, reason } });
661
+ }
662
+ return;
663
+ }
664
+ if (msg.type === 'promote-optimized') {
665
+ const slug = msg.payload?.slug;
666
+ if (typeof slug !== 'string' || !slug) {
667
+ send(ws, { type: 'error', payload: { message: 'promote-optimized: slug is required' } });
668
+ return;
669
+ }
670
+ try {
671
+ const path = await promoteOptimized(devRoot, slug);
672
+ send(ws, { type: 'optimized-promoted', payload: { slug, path } });
673
+ send(ws, { type: 'specs-list', payload: { specs: await listSpecs(devRoot) } });
674
+ }
675
+ catch (err) {
676
+ const m = err instanceof Error ? err.message : String(err);
677
+ send(ws, { type: 'error', payload: { message: `promote-optimized: ${m}` } });
678
+ }
679
+ return;
680
+ }
681
+ if (msg.type === 'discard-optimized') {
682
+ const slug = msg.payload?.slug;
683
+ if (typeof slug !== 'string' || !slug) {
684
+ send(ws, { type: 'error', payload: { message: 'discard-optimized: slug is required' } });
685
+ return;
686
+ }
687
+ await discardOptimized(devRoot, slug);
688
+ send(ws, { type: 'optimized-discarded', payload: { slug } });
689
+ return;
690
+ }
560
691
  // v0.12 — plugin-contributed save handlers. Lookup is O(plugins),
561
692
  // which is fine because there's at most a handful of plugins ever
562
693
  // loaded. Each plugin's manifest declares `saveHandlers[].type`
@@ -609,36 +740,31 @@ export async function startService(opts) {
609
740
  ? msg.payload.sessionId
610
741
  : undefined;
611
742
  // Re-record mode: when the client (widget Specs tab or hover CLI)
612
- // passes `reRecord: { slug }`, we collect tool_use events server-side
613
- // into a SkillStep[] and, on session_end with no error, overwrite the
614
- // existing __vibe_tests__/<slug>.spec.ts. This is the same flow the
615
- // widget uses for "Save as Spec", but the spec already exists and is
616
- // being regenerated for the current UI.
743
+ // passes `reRecord: { slug }`, runSession collects the tool_use events
744
+ // into a SpecStep[] and, on a clean finish, we overwrite the existing
745
+ // __vibe_tests__/<slug>.spec.ts. Same flow the widget uses for "Save as
746
+ // Spec", but the spec already exists and is being regenerated for the
747
+ // current UI.
617
748
  const reRecordSlug = msg.payload && typeof msg.payload === 'object' && 'reRecord' in msg.payload
618
749
  ? msg.payload.reRecord?.slug
619
750
  : undefined;
620
751
  if (typeof text !== 'string' || !text.trim())
621
752
  return;
622
- if (busy) {
753
+ if (activeRun) {
623
754
  send(ws, {
624
755
  type: 'error',
625
- payload: { message: 'A command is already running on this connection.' },
756
+ payload: { message: 'A command is already running.' },
626
757
  });
627
758
  return;
628
759
  }
629
- busy = true;
630
- cancelled = false;
631
- inflight = new AbortController();
632
- // Re-record step collector — populated as tool_use events stream by,
633
- // consumed at session_end to overwrite the original spec. Empty unless
634
- // reRecordSlug is set on this command. We seed with a synthetic
635
- // `user` step so writeSpec's JSDoc Original-prompt: line carries the
636
- // text the agent was actually given (which is the prompt we read out
637
- // of the existing spec — the same one we're regenerating).
638
- const reRecordSteps = [];
639
- if (reRecordSlug) {
640
- reRecordSteps.push({ kind: 'user', text });
641
- }
760
+ const run = {
761
+ abort: new AbortController(),
762
+ cancelled: false,
763
+ client: ws,
764
+ graceTimer: null,
765
+ prompt: text,
766
+ };
767
+ activeRun = run;
642
768
  try {
643
769
  // Build the MCP config first — it's pure local file IO and lets
644
770
  // us assert plugin-contributed servers landed in the config even
@@ -649,13 +775,7 @@ export async function startService(opts) {
649
775
  // Playwright MCP server would silently launch its own Chromium —
650
776
  // and Hover's premise is to drive the user's existing Chrome (with
651
777
  // their dev state, cookies, devtools open), never spawn a fresh one.
652
- // In an active mode, the relevant CDP endpoint may be the mode's
653
- // own port (e.g. 9333 for security), not the default cdpUrl.
654
- const preflightExtras = effectiveLaunchExtras();
655
- const preflightCdpUrl = preflightExtras?.cdpPort
656
- ? `http://localhost:${preflightExtras.cdpPort}`
657
- : cdpUrl;
658
- const cdp = await getPreflight(preflightCdpUrl);
778
+ const cdp = await getPreflight(cdpUrl);
659
779
  if (!cdp.ok) {
660
780
  send(ws, {
661
781
  type: 'event',
@@ -682,6 +802,15 @@ export async function startService(opts) {
682
802
  let appendSystemPrompt = resumeSessionId
683
803
  ? buildCdpHintResume(cdp.tabs)
684
804
  : buildCdpHint(cdp.tabs);
805
+ // Knowledge layer (F5): on the first turn, fold in the project's
806
+ // .hover/conventions.md (static, like cdpHint's rules — skipped on
807
+ // resume to keep the prompt cache intact). The service reads the file;
808
+ // the agent never gains filesystem access (D2).
809
+ if (!resumeSessionId) {
810
+ const conventions = await readConventions(devRoot);
811
+ if (conventions)
812
+ appendSystemPrompt = `${appendSystemPrompt}\n\n${conventions}`;
813
+ }
685
814
  // Add plugin-contributed prompt additions whose scope includes the
686
815
  // current mode (or '*' for always-on). Walks ALL loaded plugins,
687
816
  // not just the active-mode plugin — a plugin that contributes
@@ -701,6 +830,13 @@ export async function startService(opts) {
701
830
  }
702
831
  }
703
832
  }
833
+ // codeContext: tell the agent the fenced source reader exists, so it
834
+ // proactively reads the real code (better selectors/routes when
835
+ // authoring; white-box confirmation when probing) instead of only
836
+ // guessing from the rendered DOM.
837
+ if (opts.codeContext) {
838
+ appendSystemPrompt = `${appendSystemPrompt}\n\nYou also have read-only access to this project's source via mcp__hover_source (read_source / list_source), fenced to the repo (secrets, keys, .env, .git, node_modules and build output are refused). Use it to read the actual component / route / API code — write tests against the real selectors and, when probing for security issues, confirm a finding against the server code (the query, the authz check) rather than guessing from the page alone.`;
839
+ }
704
840
  // Mirror the prompt's language in the agent's *prose* output — the
705
841
  // verification summary (Result card), the ## Findings block, and the
706
842
  // step narration — the same way Voice mode mirrors it in TTS. A
@@ -714,16 +850,9 @@ export async function startService(opts) {
714
850
  }
715
851
  // Snapshot the agent id so a switch-agent message during the run
716
852
  // can't smear two agents across one invocation. (We also gate
717
- // switch-agent on `busy`, but defense in depth.)
853
+ // switch-agent on an active run, but defense in depth.) runSession gates
854
+ // the allow/deny lists on the agent's sandboxStrength internally.
718
855
  const invokedAgentId = currentAgentId;
719
- const invokedDescriptor = getAgent(invokedAgentId);
720
- // Only Claude's `--allowedTools`/`--disallowedTools` flags are
721
- // honoured — passing them to a soft-sandbox agent like codex is a
722
- // no-op (its buildArgs ignores them). We still gate at the service
723
- // layer for clarity: a hard-sandbox agent gets the tight allowlist,
724
- // a soft one gets nothing and relies on its descriptor's built-in
725
- // sandbox flags + developer_instructions.
726
- const isHardSandbox = invokedDescriptor?.sandboxStrength === 'hard';
727
856
  // Active mode's plugin-contributed MCP server ids — added to the
728
857
  // hard-sandbox allow list so Claude can actually call them. Claude
729
858
  // sanitises non-alphanumeric chars in the id when forming tool
@@ -731,99 +860,77 @@ export async function startService(opts) {
731
860
  // and `--allowedTools mcp__foo` matches every tool under that
732
861
  // prefix. We pass the prefix `mcp__<sanitized>` so all of the
733
862
  // server's tools are reachable.
734
- const sanitize = (s) => s.replace(/[^a-zA-Z0-9]+/g, '_').replace(/^_+|_+$/g, '');
735
863
  const activePluginMcpIds = [];
736
864
  if (currentModeId) {
737
865
  for (const p of plugins) {
738
866
  for (const srv of p.mcpServers ?? []) {
739
867
  const scope = srv.activeInModes ?? (p.mode ? [p.mode.id] : []);
740
868
  if (scope.includes('*') || scope.includes(currentModeId)) {
741
- activePluginMcpIds.push(`mcp__${sanitize(srv.id)}`);
869
+ activePluginMcpIds.push(mcpToolPrefix(srv.id));
742
870
  }
743
871
  }
744
872
  }
745
873
  }
746
- for await (const ev of invokeAgent({
874
+ // codeContext: the fenced source reader is allowed in every mode.
875
+ if (opts.codeContext)
876
+ activePluginMcpIds.push(mcpToolPrefix(SOURCE_MCP_ID));
877
+ const runResult = await runSession({
747
878
  agentId: invokedAgentId,
748
879
  prompt: text,
749
880
  sessionId: resumeSessionId,
750
881
  mcpConfig,
751
- // cwd = devRoot so Claude Code auto-discovers `.claude/skills/`
752
- // saved from this project (and CLAUDE.md, if any).
882
+ // cwd = devRoot so the agent runs against the project (and Claude
883
+ // Code reads its CLAUDE.md, if any).
753
884
  cwd: devRoot,
754
885
  appendSystemPrompt,
755
- // Skill stays in the allow list so saved skills under
756
- // <devRoot>/.claude/skills/ can be invoked. mcp__playwright covers
757
- // every browser tool. Plugin-contributed MCPs are appended when
758
- // the corresponding mode is active.
759
- allowedTools: isHardSandbox
760
- ? ['mcp__playwright', 'Skill', ...activePluginMcpIds]
761
- : undefined,
762
- disallowedTools: isHardSandbox
763
- ? (invokedDescriptor?.defaultDisallowedTools
764
- ? [...invokedDescriptor.defaultDisallowedTools]
765
- : undefined)
766
- : undefined,
886
+ // mcp__playwright covers every browser tool; active-mode plugin MCP
887
+ // servers are appended. (Save-as-Skill retired → no Skill tool.)
888
+ allowedToolsExtra: activePluginMcpIds,
767
889
  maxBudgetUsd,
768
890
  model,
769
- signal: inflight.signal,
770
- })) {
771
- if (cancelled || ws.readyState !== WebSocket.OPEN)
891
+ apiKey: currentApiKey,
892
+ signal: run.abort.signal,
893
+ }, (ev) => {
894
+ // Stream to whichever ws is attached NOW — survives the widget
895
+ // reconnecting mid-run (emitToRun is a no-op during a reconnect gap).
896
+ if (run.cancelled)
772
897
  return;
773
- send(ws, { type: 'event', payload: ev });
774
- // Re-record collection. Mirror what widget client.js does on the
775
- // way past tool_use events: accumulate into a SkillStep[] so we
776
- // can write a fresh spec when the session ends. We do this only
777
- // when this command was launched in re-record mode; ordinary
778
- // commands don't need server-side step retention (widget owns
779
- // that for normal saves).
780
- if (reRecordSlug && ev.kind === 'tool_use') {
781
- reRecordSteps.push({
782
- kind: 'step',
783
- tool: ev.tool,
784
- input: ev.input,
898
+ emitToRun({ type: 'event', payload: ev });
899
+ });
900
+ // Re-record: write a fresh spec from the steps runSession accumulated
901
+ // (`user` `step`* `done`). Only on a clean, non-cancelled finish
902
+ // a cancelled/aborted run throws out of runSession into the catch
903
+ // below, and an errored agent leaves the original spec untouched.
904
+ if (reRecordSlug && !run.cancelled) {
905
+ if (runResult.isError) {
906
+ emitToRun({
907
+ type: 'error',
908
+ payload: {
909
+ message: `Re-record failed: ${runResult.summary || 'agent reported an error'}. ` +
910
+ `Original spec left unchanged.`,
911
+ },
785
912
  });
786
913
  }
787
- if (reRecordSlug && ev.kind === 'session_end') {
788
- // Cancelled or errored runs: don't overwrite — the existing
789
- // spec is still valid. Tell the client what happened.
790
- if (ev.isError) {
791
- sendIfOpen(ws, {
792
- type: 'error',
793
- payload: {
794
- message: `Re-record failed: ${ev.summary ?? 'agent reported an error'}. ` +
795
- `Original spec left unchanged.`,
796
- },
914
+ else {
915
+ try {
916
+ const { writeSpec } = await import('./specs/writeSpec.js');
917
+ const written = await writeSpec({
918
+ devRoot,
919
+ name: reRecordSlug,
920
+ steps: runResult.steps,
921
+ overwrite: true,
922
+ });
923
+ emitToRun({
924
+ type: 'spec-saved',
925
+ payload: { name: reRecordSlug, path: written.path },
797
926
  });
798
927
  }
799
- else {
800
- // Snapshot the agent's final summary into a synthetic `done`
801
- // step so writeSpec's `Outcome:` header reflects the new run.
802
- if (ev.summary) {
803
- reRecordSteps.push({ kind: 'done', summary: ev.summary });
804
- }
805
- // Overwrite. writeSpec uses the slug to name the file; we
806
- // pass the original slug verbatim so the path is stable.
807
- try {
808
- const { writeSpec } = await import('./specs/writeSpec.js');
809
- const result = await writeSpec({
810
- devRoot,
811
- name: reRecordSlug,
812
- steps: reRecordSteps,
813
- overwrite: true,
814
- });
815
- sendIfOpen(ws, {
816
- type: 'spec-saved',
817
- payload: { name: reRecordSlug, path: result.path },
818
- });
819
- }
820
- catch (e) {
821
- const m = e instanceof Error ? e.message : String(e);
822
- sendIfOpen(ws, {
823
- type: 'error',
824
- payload: { message: `Re-record could not write spec: ${m}` },
825
- });
826
- }
928
+ catch (e) {
929
+ const m = e instanceof Error ? e.message : String(e);
930
+ emitToRun({
931
+ type: 'error',
932
+ payload: { message: `Re-record could not write spec: ${m}` },
933
+ });
827
934
  }
828
935
  }
829
936
  }
@@ -835,30 +942,25 @@ export async function startService(opts) {
835
942
  // widget to reconcile two terminal events for one run. CDP isn't
836
943
  // suspect either — the user just stopped — so skip preflight
837
944
  // invalidation too.
838
- if (!cancelled) {
945
+ if (!run.cancelled) {
839
946
  const message = err instanceof Error ? err.message : String(err);
840
947
  const errorEvent = {
841
948
  kind: 'session_end',
842
949
  isError: true,
843
950
  summary: message,
844
951
  };
845
- sendIfOpen(ws, { type: 'event', payload: errorEvent });
952
+ emitToRun({ type: 'event', payload: errorEvent });
846
953
  // Force the next command to re-probe CDP. The error could be from
847
954
  // Chrome dying, MCP spawning a stray Chromium, the user closing
848
955
  // their debug window — anything that would make a cached "all
849
- // healthy" result lie. Invalidate the mode-effective URL (see
850
- // preflightCdpUrl above) — not the static cdpUrl — so security
851
- // mode invalidations don't no-op against the default port.
852
- const invalExtras = effectiveLaunchExtras();
853
- const invalCdpUrl = invalExtras?.cdpPort
854
- ? `http://localhost:${invalExtras.cdpPort}`
855
- : cdpUrl;
856
- invalidatePreflight(invalCdpUrl);
956
+ // healthy" result lie.
957
+ invalidatePreflight(cdpUrl);
857
958
  }
858
959
  }
859
960
  finally {
860
- busy = false;
861
- inflight = null;
961
+ if (run.graceTimer)
962
+ clearTimeout(run.graceTimer);
963
+ activeRun = null;
862
964
  }
863
965
  });
864
966
  });
@@ -917,6 +1019,20 @@ export async function startService(opts) {
917
1019
  return {
918
1020
  port,
919
1021
  async close() {
1022
+ // Kill any in-flight run FIRST. The run is held at service scope and is
1023
+ // only torn down by aborting its signal (invoke.ts SIGTERMs the agent
1024
+ // child on abort). wss.close() below stops the listener but does NOT
1025
+ // terminate established client sockets, so no ws.on('close') fires — so
1026
+ // without this the agent child would keep driving the debug Chrome as an
1027
+ // orphan after the dev server is gone, and a pending grace timer would
1028
+ // fire abort() 15s into the void.
1029
+ if (activeRun) {
1030
+ if (activeRun.graceTimer)
1031
+ clearTimeout(activeRun.graceTimer);
1032
+ activeRun.cancelled = true;
1033
+ activeRun.abort.abort();
1034
+ activeRun = null;
1035
+ }
920
1036
  // Deactivate the active mode first, then run every plugin's
921
1037
  // shutdown hook (regardless of which mode is active — a plugin may
922
1038
  // own background state even outside its mode). Best-effort: log