@hover-dev/core 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -55
- package/dist/agentDirectives.d.ts +55 -0
- package/dist/agentDirectives.d.ts.map +1 -0
- package/dist/agentDirectives.js +276 -0
- package/dist/agents/claude.d.ts.map +1 -1
- package/dist/agents/claude.js +28 -3
- package/dist/agents/codex.d.ts.map +1 -1
- package/dist/agents/codex.js +38 -18
- package/dist/agents/gemini.d.ts.map +1 -1
- package/dist/agents/gemini.js +3 -14
- package/dist/agents/invoke.d.ts.map +1 -1
- package/dist/agents/invoke.js +3 -6
- package/dist/agents/qwen.d.ts.map +1 -1
- package/dist/agents/qwen.js +3 -14
- package/dist/agents/registry.d.ts.map +1 -1
- package/dist/agents/registry.js +0 -4
- package/dist/agents/shared.d.ts +28 -0
- package/dist/agents/shared.d.ts.map +1 -0
- package/dist/agents/shared.js +35 -0
- package/dist/agents/types.d.ts +19 -11
- package/dist/agents/types.d.ts.map +1 -1
- package/dist/engine.d.ts +53 -0
- package/dist/engine.d.ts.map +1 -0
- package/dist/engine.js +78 -0
- package/dist/mcp/actuateServer.d.ts +3 -0
- package/dist/mcp/actuateServer.d.ts.map +1 -0
- package/dist/mcp/actuateServer.js +594 -0
- package/dist/mcp/sourceFence.d.ts +23 -0
- package/dist/mcp/sourceFence.d.ts.map +1 -0
- package/dist/mcp/sourceFence.js +79 -0
- package/dist/mcp/sourceServer.d.ts +3 -0
- package/dist/mcp/sourceServer.d.ts.map +1 -0
- package/dist/mcp/sourceServer.js +191 -0
- package/dist/memory/businessMemory.d.ts +29 -0
- package/dist/memory/businessMemory.d.ts.map +1 -0
- package/dist/memory/businessMemory.js +125 -0
- package/dist/modes.d.ts +39 -0
- package/dist/modes.d.ts.map +1 -0
- package/dist/modes.js +34 -0
- package/dist/playwright/cdpStatus.d.ts +0 -15
- package/dist/playwright/cdpStatus.d.ts.map +1 -1
- package/dist/playwright/cdpStatus.js +0 -67
- package/dist/playwright/launchChrome.d.ts +18 -0
- package/dist/playwright/launchChrome.d.ts.map +1 -1
- package/dist/playwright/launchChrome.js +46 -3
- package/dist/playwright/preflight.d.ts.map +1 -1
- package/dist/playwright/preflight.js +6 -1
- package/dist/playwright/resolveMcpConfig.d.ts +12 -0
- package/dist/playwright/resolveMcpConfig.d.ts.map +1 -1
- package/dist/playwright/resolveMcpConfig.js +36 -5
- package/dist/plugin-api.d.ts +35 -26
- package/dist/plugin-api.d.ts.map +1 -1
- package/dist/plugin-api.js +2 -2
- package/dist/qa/candidates.d.ts +32 -0
- package/dist/qa/candidates.d.ts.map +1 -0
- package/dist/qa/candidates.js +20 -0
- package/dist/qa/classify.d.ts +38 -0
- package/dist/qa/classify.d.ts.map +1 -0
- package/dist/qa/classify.js +138 -0
- package/dist/qa/intensity.d.ts +33 -0
- package/dist/qa/intensity.d.ts.map +1 -0
- package/dist/qa/intensity.js +25 -0
- package/dist/qa/qaReport.d.ts +19 -0
- package/dist/qa/qaReport.d.ts.map +1 -0
- package/dist/qa/qaReport.js +50 -0
- package/dist/runSession.d.ts +14 -3
- package/dist/runSession.d.ts.map +1 -1
- package/dist/runSession.js +31 -11
- package/dist/service/cdpHandlers.d.ts +3 -27
- package/dist/service/cdpHandlers.d.ts.map +1 -1
- package/dist/service/cdpHandlers.js +6 -53
- package/dist/service/cdpHint.d.ts +21 -28
- package/dist/service/cdpHint.d.ts.map +1 -1
- package/dist/service/cdpHint.js +106 -164
- package/dist/service/relayHandlers.d.ts +28 -0
- package/dist/service/relayHandlers.d.ts.map +1 -0
- package/dist/service/relayHandlers.js +105 -0
- package/dist/service/saveHandlers.d.ts +1 -3
- package/dist/service/saveHandlers.d.ts.map +1 -1
- package/dist/service/saveHandlers.js +17 -15
- package/dist/service/types.d.ts +108 -8
- package/dist/service/types.d.ts.map +1 -1
- package/dist/service.d.ts +13 -3
- package/dist/service.d.ts.map +1 -1
- package/dist/service.js +1022 -236
- package/dist/sessions/sessions.d.ts +125 -0
- package/dist/sessions/sessions.d.ts.map +1 -0
- package/dist/sessions/sessions.js +175 -0
- package/dist/specs/authFixture.d.ts +30 -0
- package/dist/specs/authFixture.d.ts.map +1 -0
- package/dist/specs/authFixture.js +145 -0
- package/dist/specs/businessMap.d.ts +29 -0
- package/dist/specs/businessMap.d.ts.map +1 -0
- package/dist/specs/businessMap.js +95 -0
- package/dist/specs/detectSharedFlows.d.ts +1 -1
- package/dist/specs/detectSharedFlows.d.ts.map +1 -1
- package/dist/specs/detectSharedFlows.js +20 -21
- package/dist/specs/generatePageObject.d.ts +1 -1
- package/dist/specs/generatePageObject.d.ts.map +1 -1
- package/dist/specs/healPrompt.d.ts +19 -0
- package/dist/specs/healPrompt.d.ts.map +1 -0
- package/dist/specs/healPrompt.js +48 -0
- package/dist/specs/humanSteps.d.ts +4 -8
- package/dist/specs/humanSteps.d.ts.map +1 -1
- package/dist/specs/humanSteps.js +6 -1
- package/dist/specs/optimizeSpec.d.ts +15 -8
- package/dist/specs/optimizeSpec.d.ts.map +1 -1
- package/dist/specs/optimizeSpec.js +98 -46
- package/dist/specs/optimizeSpecWithAgent.d.ts +0 -2
- package/dist/specs/optimizeSpecWithAgent.d.ts.map +1 -1
- package/dist/specs/optimizeSpecWithAgent.js +0 -1
- package/dist/specs/pageObjectManifest.d.ts +3 -1
- package/dist/specs/pageObjectManifest.d.ts.map +1 -1
- package/dist/specs/pageObjectManifest.js +13 -9
- package/dist/specs/replayGrounded.d.ts +45 -0
- package/dist/specs/replayGrounded.d.ts.map +1 -0
- package/dist/specs/replayGrounded.js +155 -0
- package/dist/specs/runFailures.d.ts +34 -0
- package/dist/specs/runFailures.d.ts.map +1 -0
- package/dist/specs/runFailures.js +93 -0
- package/dist/specs/seeds.d.ts +16 -15
- package/dist/specs/seeds.d.ts.map +1 -1
- package/dist/specs/seeds.js +86 -54
- package/dist/specs/sidecar.d.ts +34 -6
- package/dist/specs/sidecar.d.ts.map +1 -1
- package/dist/specs/sidecar.js +79 -9
- package/dist/specs/softBatch.d.ts +14 -0
- package/dist/specs/softBatch.d.ts.map +1 -0
- package/dist/specs/softBatch.js +177 -0
- package/dist/specs/specStep.d.ts +21 -0
- package/dist/specs/specStep.d.ts.map +1 -0
- package/dist/specs/specStep.js +1 -0
- package/dist/specs/text.d.ts +19 -0
- package/dist/specs/text.d.ts.map +1 -0
- package/dist/specs/text.js +27 -0
- package/dist/specs/writeSpec.d.ts +62 -1
- package/dist/specs/writeSpec.d.ts.map +1 -1
- package/dist/specs/writeSpec.js +598 -30
- package/package.json +10 -10
- package/dist/agents/aider.d.ts +0 -16
- package/dist/agents/aider.d.ts.map +0 -1
- package/dist/agents/aider.js +0 -169
- package/dist/agents/cursor.d.ts +0 -18
- package/dist/agents/cursor.d.ts.map +0 -1
- package/dist/agents/cursor.js +0 -229
- package/dist/playwright/raiseWindow.d.ts +0 -10
- package/dist/playwright/raiseWindow.d.ts.map +0 -1
- package/dist/playwright/raiseWindow.js +0 -139
- package/dist/scripts/bench-multi-tab.d.ts +0 -2
- package/dist/scripts/bench-multi-tab.d.ts.map +0 -1
- package/dist/scripts/bench-multi-tab.js +0 -192
- package/dist/scripts/bench-ttfb.d.ts +0 -2
- package/dist/scripts/bench-ttfb.d.ts.map +0 -1
- package/dist/scripts/bench-ttfb.js +0 -127
- package/dist/scripts/start-chrome.d.ts +0 -3
- package/dist/scripts/start-chrome.d.ts.map +0 -1
- package/dist/scripts/start-chrome.js +0 -23
- package/dist/skills/writeSkill.d.ts +0 -27
- package/dist/skills/writeSkill.d.ts.map +0 -1
- package/dist/skills/writeSkill.js +0 -13
- package/dist/specs/listSpecs.d.ts +0 -52
- package/dist/specs/listSpecs.d.ts.map +0 -1
- package/dist/specs/listSpecs.js +0 -139
- package/dist/specs/optimizationSuggestion.d.ts +0 -26
- package/dist/specs/optimizationSuggestion.d.ts.map +0 -1
- package/dist/specs/optimizationSuggestion.js +0 -28
- package/dist/specs/writeCaseCsv.d.ts +0 -28
- package/dist/specs/writeCaseCsv.d.ts.map +0 -1
- package/dist/specs/writeCaseCsv.js +0 -140
package/dist/service.js
CHANGED
|
@@ -10,75 +10,133 @@
|
|
|
10
10
|
* { type: 'hello', payload: { agentId, model, version } }
|
|
11
11
|
* { type: 'event', payload: InvokeEvent } // see agents/types.ts
|
|
12
12
|
* { type: 'cdp-status', payload: { state, reason?, matchingTabUrl?, browser?, launching? } }
|
|
13
|
-
* { type: 'specs-list', payload: { specs: SpecSummary[] } }
|
|
14
|
-
* { type: 'seeds-list', payload: { seeds: { name, note, signature, code, source }[] } }
|
|
15
13
|
* { type: 'spec-saved', payload: { name, path } }
|
|
16
14
|
* { type: 'spec-exists', payload: { slug, existingPath } }
|
|
17
|
-
* { type: 'case-csv-saved', payload: { name, path } }
|
|
18
|
-
* { type: 'case-csv-exists', payload: { slug, existingPath } }
|
|
19
15
|
* { type: 'error', payload: { message } }
|
|
20
16
|
*
|
|
21
17
|
* client → server
|
|
22
|
-
* { type: 'command', payload: { text, sessionId
|
|
23
|
-
* // when reRecord.slug is set, the
|
|
24
|
-
* // service collects tool_use events
|
|
25
|
-
* // into a step list and on a clean
|
|
26
|
-
* // session_end overwrites
|
|
27
|
-
* // __vibe_tests__/<slug>.spec.ts
|
|
18
|
+
* { type: 'command', payload: { text, sessionId? } }
|
|
28
19
|
* { type: 'cancel' }
|
|
29
|
-
* { type: 'check-cdp', payload: { pageUrl } } // "is this widget in the debug Chrome?"
|
|
30
20
|
* { type: 'launch-chrome', payload: { pageUrl } } // start debug Chrome, navigate to pageUrl
|
|
31
|
-
* { type: 'focus-debug', payload: { pageUrl } } // bringToFront the matching tab in debug Chrome
|
|
32
21
|
* { type: 'save-spec', payload: { name, description, steps, assertions?, overwrite? } }
|
|
33
|
-
* { type: 'save-case-csv', payload: { name, description, steps, assertions?, jiraProjectKey?, labels?, overwrite? } }
|
|
34
|
-
* { type: 'list-specs' } // ask for every spec under __vibe_tests__/, with parsed JSDoc headers
|
|
35
|
-
* { type: 'list-seeds' } // ask for built-in + .hover/rules/ translation seeds (read-only)
|
|
36
|
-
* { type: 'list-agents' } // ask for the full agent registry + install status
|
|
37
22
|
* { type: 'switch-agent', payload: { agentId } } // set the service's current agent; broadcasts to all connections
|
|
23
|
+
* { type: 'reveal-source', payload: { source } } // relay a data-hover-source value to other clients (F2 page→editor)
|
|
38
24
|
*
|
|
39
25
|
* server → client (in addition to those documented in the file body):
|
|
26
|
+
* { type: 'reveal-source', payload: { source } } // relayed to non-origin clients (the VSCode ext jumps the editor)
|
|
40
27
|
* { type: 'agents', payload: { current: string, available: AgentAvailability[] } }
|
|
41
28
|
* { type: 'modes', payload: { current: string|null, available: ModeEntry[] } }
|
|
42
29
|
* { type: '<plugin-namespaced>', payload: <plugin-specific> }
|
|
43
30
|
*
|
|
44
31
|
* client → server (plugin-aware additions):
|
|
45
32
|
* { type: 'set-mode', payload: { modeId: string|null } } // null = exit moded operation
|
|
46
|
-
* { type: 'list-modes' }
|
|
47
33
|
*/
|
|
48
34
|
import { WebSocketServer, WebSocket } from 'ws';
|
|
35
|
+
import { fileURLToPath } from 'node:url';
|
|
36
|
+
import { dirname, join, resolve } from 'node:path';
|
|
37
|
+
import { runDir } from './specs/sidecar.js';
|
|
38
|
+
import { readdirSync, statSync, mkdirSync, readFileSync } from 'node:fs';
|
|
39
|
+
import { tmpdir } from 'node:os';
|
|
49
40
|
import { runSession } from './runSession.js';
|
|
50
41
|
import { readConventions } from './service/conventions.js';
|
|
51
42
|
import { optimizeSpecWithAgent } from './specs/optimizeSpecWithAgent.js';
|
|
52
|
-
import {
|
|
43
|
+
import { parseRunFailures } from './specs/runFailures.js';
|
|
44
|
+
import { buildHealPrompt, healLabel } from './specs/healPrompt.js';
|
|
53
45
|
import { listAgentAvailability, pickPrimaryAgent, } from './agents/detect.js';
|
|
54
46
|
import { getAgent } from './agents/registry.js';
|
|
55
47
|
import { getPreflight, invalidatePreflight } from './playwright/preflightCache.js';
|
|
56
|
-
import { resolveMcpConfig } from './playwright/resolveMcpConfig.js';
|
|
57
|
-
import { launchDebugChrome } from './playwright/launchChrome.js';
|
|
58
|
-
import {
|
|
59
|
-
import {
|
|
48
|
+
import { resolveMcpConfig, mcpToolPrefix } from './playwright/resolveMcpConfig.js';
|
|
49
|
+
import { launchDebugChrome, closeDebugChrome } from './playwright/launchChrome.js';
|
|
50
|
+
import { writeSessionRecord, parseFindings, tallyTools } from './sessions/sessions.js';
|
|
51
|
+
import { resolveModeBehavior, isBuiltinMode, BUILTIN_MODES } from './modes.js';
|
|
52
|
+
import { CJK_RE, ZH_OUTPUT_DIRECTIVE, GROUNDED_ACTUATION_DENY, REPORTING_DIRECTIVE, NARRATION_DIRECTIVE, ASK_FORMAT_DIRECTIVE, EXPLORATION_CHECKPOINT_DIRECTIVE, GROUNDED_ACTUATION_DIRECTIVE, RECON_DIRECTIVE, QA_EXPLORATION_DIRECTIVE, QA_VERIFY_DEFER_SECURITY_DIRECTIVE, } from './agentDirectives.js';
|
|
53
|
+
import { loadMemory, formatMemoryForPrompt, writeFact } from './memory/businessMemory.js';
|
|
54
|
+
import { writeQaReport } from './qa/qaReport.js';
|
|
55
|
+
import { finalizeCandidates } from './qa/candidates.js';
|
|
56
|
+
import { QA_INTENSITY, asQaIntensity, qaBudgetDirective } from './qa/intensity.js';
|
|
57
|
+
import { classifyInstruction } from './qa/classify.js';
|
|
60
58
|
import { send, sendIfOpen } from './service/types.js';
|
|
59
|
+
import { handleRelayMessage } from './service/relayHandlers.js';
|
|
61
60
|
import { buildCdpHint, buildCdpHintResume } from './service/cdpHint.js';
|
|
62
|
-
import {
|
|
63
|
-
import { handleSaveArtifact, SPEC_CONFIG,
|
|
61
|
+
import { handleLaunchChrome, } from './service/cdpHandlers.js';
|
|
62
|
+
import { handleSaveArtifact, SPEC_CONFIG, } from './service/saveHandlers.js';
|
|
64
63
|
import { CURRENT_API_VERSION, } from './plugin-api.js';
|
|
64
|
+
/** Tools whose steps crystallize to a replayable line (grounded actuations +
|
|
65
|
+
* navigation). Used to build a FALLBACK QA candidate from a completed run when
|
|
66
|
+
* the agent never called record_candidate — so crystallization doesn't depend
|
|
67
|
+
* on the agent's compliance. Structural typing avoids a SkillStep import. */
|
|
68
|
+
const CRYSTALLIZABLE_TOOLS = new Set([
|
|
69
|
+
'click_control', 'fill_control', 'select_control', 'check_control',
|
|
70
|
+
'upload_file', 'assert_visible', 'browser_navigate',
|
|
71
|
+
]);
|
|
72
|
+
function bareToolName(tool) {
|
|
73
|
+
return tool.replace(/^mcp__[a-z0-9_-]+?__/, '');
|
|
74
|
+
}
|
|
75
|
+
function isCrystallizableStep(s) {
|
|
76
|
+
return s.kind === 'step' && !!s.tool && !s.isError && CRYSTALLIZABLE_TOOLS.has(bareToolName(s.tool));
|
|
77
|
+
}
|
|
78
|
+
/** A real interaction (not just navigation) — so a fallback candidate isn't a
|
|
79
|
+
* lone goto with nothing to replay. */
|
|
80
|
+
function isRealAction(s) {
|
|
81
|
+
return !!s.tool && bareToolName(s.tool) !== 'browser_navigate';
|
|
82
|
+
}
|
|
83
|
+
/** The source-reader MCP server (codeContext). Id → the `mcp__hoversource`
|
|
84
|
+
* tool prefix; script path resolved relative to this module so it works from
|
|
85
|
+
* dist/. Spawned only when codeContext is enabled. */
|
|
86
|
+
const SOURCE_MCP_ID = 'hoversource'; // no hyphen — see CONTROL_MCP_ID note below
|
|
87
|
+
const SOURCE_MCP_SCRIPT = resolve(dirname(fileURLToPath(import.meta.url)), 'mcp', 'sourceServer.js');
|
|
88
|
+
/** The control-actuation MCP server (always on) — force-toggles sr-only hidden
|
|
89
|
+
* radios/checkboxes the locked-down Playwright `browser_click` can't actuate. */
|
|
90
|
+
// NOTE: no hyphen. Claude forms MCP tool names as `mcp__<config-id>__<tool>`
|
|
91
|
+
// keeping the id verbatim, but our allow-list prefix sanitizes non-alphanumerics
|
|
92
|
+
// to `_` (mcpToolPrefix). A hyphenated id ('hover-control') yields allow
|
|
93
|
+
// `mcp__hover_control` which does NOT prefix-match the tool `mcp__hover-control__*`,
|
|
94
|
+
// so every actuation call gets denied by the hard sandbox. Keep it alphanumeric.
|
|
95
|
+
const CONTROL_MCP_ID = 'hovercontrol';
|
|
96
|
+
const CONTROL_MCP_SCRIPT = resolve(dirname(fileURLToPath(import.meta.url)), 'mcp', 'actuateServer.js');
|
|
65
97
|
// ClientMessage + send moved to ./service/types.ts so the cdp + save
|
|
66
98
|
// handler modules can share them. See those files for the wire shape.
|
|
67
99
|
const PROTOCOL_VERSION = 1;
|
|
68
100
|
const PORT_RETRIES = 10;
|
|
69
|
-
/**
|
|
70
|
-
*
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
*
|
|
74
|
-
*
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
101
|
+
/** An isolated, empty cwd for the agent when the user picks "Isolated" memory.
|
|
102
|
+
* `claude` keys its auto-memory by the absolute cwd path and discovers CLAUDE.md
|
|
103
|
+
* by walking up from cwd — so running in a throwaway temp dir (no .git / no
|
|
104
|
+
* ancestor CLAUDE.md) loads NONE of the user's project memory or CLAUDE.md,
|
|
105
|
+
* while their ~/.claude credentials (OAuth) stay intact. The default ("shared")
|
|
106
|
+
* keeps cwd = devRoot so the agent gets the project's context. */
|
|
107
|
+
function isolatedAgentCwd() {
|
|
108
|
+
const dir = resolve(tmpdir(), 'hover-agent-cwd');
|
|
109
|
+
try {
|
|
110
|
+
mkdirSync(dir, { recursive: true });
|
|
111
|
+
}
|
|
112
|
+
catch { /* best-effort */ }
|
|
113
|
+
return dir;
|
|
114
|
+
}
|
|
115
|
+
/** The most-recently-written `.png` in a directory (by mtime), or null. Used to
|
|
116
|
+
* resolve which screenshot a `browser_take_screenshot` just produced — the
|
|
117
|
+
* agent often lets the MCP auto-name the file, so the name isn't in the tool
|
|
118
|
+
* input; the freshest png in the run's output dir is it. Best-effort: never
|
|
119
|
+
* throws (a missing dir / race just yields null). */
|
|
120
|
+
function newestPng(dir) {
|
|
121
|
+
try {
|
|
122
|
+
let best = null;
|
|
123
|
+
let bestMtime = -1;
|
|
124
|
+
for (const f of readdirSync(dir)) {
|
|
125
|
+
if (!f.toLowerCase().endsWith('.png'))
|
|
126
|
+
continue;
|
|
127
|
+
const p = resolve(dir, f);
|
|
128
|
+
const mtime = statSync(p).mtimeMs;
|
|
129
|
+
if (mtime > bestMtime) {
|
|
130
|
+
bestMtime = mtime;
|
|
131
|
+
best = p;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return best;
|
|
135
|
+
}
|
|
136
|
+
catch {
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
82
140
|
/**
|
|
83
141
|
* Try to bind a WebSocketServer to <host>:<port>. Resolves with the wss on
|
|
84
142
|
* success; rejects with the bind error (typically EADDRINUSE) on failure.
|
|
@@ -129,11 +187,6 @@ export async function startService(opts) {
|
|
|
129
187
|
const preferred = opts.agentId ?? process.env.HOVER_AGENT;
|
|
130
188
|
const primary = await pickPrimaryAgent(preferred);
|
|
131
189
|
let currentAgentId = primary?.descriptor.id ?? preferred ?? 'claude';
|
|
132
|
-
// Optional model API key the widget supplied (set-api-key). Held in memory
|
|
133
|
-
// for this service's lifetime only — never written to disk, never logged.
|
|
134
|
-
// Injected into the spawned CLI's env so a user without a logged-in
|
|
135
|
-
// subscription can drive Hover on their own key.
|
|
136
|
-
let currentApiKey = process.env.ANTHROPIC_API_KEY ?? process.env.OPENAI_API_KEY ?? undefined;
|
|
137
190
|
if (!primary) {
|
|
138
191
|
// Nothing installed — still bind so the widget can show a helpful
|
|
139
192
|
// "install one of these" dialog. Commands will fail with
|
|
@@ -145,15 +198,56 @@ export async function startService(opts) {
|
|
|
145
198
|
else if (preferred && preferred !== primary.descriptor.id) {
|
|
146
199
|
process.stderr.write(`[hover] requested agent "${preferred}" is not installed; falling back to "${primary.descriptor.id}".\n`);
|
|
147
200
|
}
|
|
148
|
-
|
|
201
|
+
let model = opts.model ?? 'sonnet';
|
|
202
|
+
// Reasoning-effort level for runs (set via set-effort; undefined = agent/model
|
|
203
|
+
// default). Threaded into invokeAgent alongside model.
|
|
204
|
+
let currentEffort = opts.effort;
|
|
205
|
+
// Local LLM endpoint (set via set-local-endpoint): when the qwen agent is
|
|
206
|
+
// active, this OpenAI-compatible base URL is injected so qwen drives the
|
|
207
|
+
// user's self-hosted model instead of a hosted one.
|
|
208
|
+
let currentLocalBaseUrl;
|
|
209
|
+
// BYOK (set via set-byok): when present, runs are driven by the protocol's
|
|
210
|
+
// matching CLI with the user's key + base URL + model injected via env,
|
|
211
|
+
// instead of the local-CLI agent's own logged-in auth. null = use the CLI.
|
|
212
|
+
let currentByok = null;
|
|
213
|
+
// Protocol → CLI: Anthropic drives claude (hard sandbox), Gemini drives the
|
|
214
|
+
// gemini CLI, OpenAI / Azure / OpenAI-compatible gateways drive codex.
|
|
215
|
+
const byokAgentFor = (protocol) => protocol === 'anthropic' ? 'claude' : protocol === 'gemini' ? 'gemini' : 'codex';
|
|
216
|
+
// Protocol → auth env vars the matching CLI reads. Only set what's provided
|
|
217
|
+
// so an empty base URL leaves the CLI on its own default endpoint.
|
|
218
|
+
const byokEnvFor = (b) => {
|
|
219
|
+
const env = {};
|
|
220
|
+
if (b.protocol === 'anthropic') {
|
|
221
|
+
if (b.apiKey)
|
|
222
|
+
env.ANTHROPIC_API_KEY = b.apiKey;
|
|
223
|
+
if (b.baseUrl)
|
|
224
|
+
env.ANTHROPIC_BASE_URL = b.baseUrl;
|
|
225
|
+
}
|
|
226
|
+
else if (b.protocol === 'gemini') {
|
|
227
|
+
if (b.apiKey) {
|
|
228
|
+
env.GEMINI_API_KEY = b.apiKey;
|
|
229
|
+
env.GOOGLE_API_KEY = b.apiKey;
|
|
230
|
+
}
|
|
231
|
+
if (b.baseUrl)
|
|
232
|
+
env.GOOGLE_GEMINI_BASE_URL = b.baseUrl;
|
|
233
|
+
}
|
|
234
|
+
else {
|
|
235
|
+
// openai / azure / gateways — OpenAI-compatible, driven via codex.
|
|
236
|
+
if (b.apiKey)
|
|
237
|
+
env.OPENAI_API_KEY = b.apiKey;
|
|
238
|
+
if (b.baseUrl)
|
|
239
|
+
env.OPENAI_BASE_URL = b.baseUrl;
|
|
240
|
+
}
|
|
241
|
+
return env;
|
|
242
|
+
};
|
|
149
243
|
// No default budget cap — long real-world flows (form filling, multi-step
|
|
150
244
|
// checkouts) routinely run past the old $0.50 ceiling and got cut off
|
|
151
245
|
// mid-run. The widget shows the running $ counter in the header instead,
|
|
152
246
|
// so the user can hit Stop when they've seen enough. Pass maxBudgetUsd
|
|
153
247
|
// explicitly (or via the Vite plugin option) if a hard ceiling is needed.
|
|
154
248
|
const maxBudgetUsd = opts.maxBudgetUsd;
|
|
155
|
-
const optimizeMode = opts.optimizeMode ?? 'suggest';
|
|
156
249
|
const cdpUrl = opts.cdpUrl ?? 'http://localhost:9222';
|
|
250
|
+
const userDataDir = opts.userDataDir;
|
|
157
251
|
const devRoot = opts.devRoot ?? process.cwd();
|
|
158
252
|
const wss = await pickAndBind('127.0.0.1', requestedPort, PORT_RETRIES);
|
|
159
253
|
const port = wss.address().port;
|
|
@@ -163,7 +257,7 @@ export async function startService(opts) {
|
|
|
163
257
|
// forced an explicit one, but in that case mode-contributed servers
|
|
164
258
|
// are silently dropped — we log a warning the first time it happens.
|
|
165
259
|
let warnedExplicitMcpOverride = false;
|
|
166
|
-
const buildMcpConfig = () => {
|
|
260
|
+
const buildMcpConfig = (shotDir, sourceGate = 'ask') => {
|
|
167
261
|
if (opts.mcpConfig) {
|
|
168
262
|
const activePlugin = currentModeId ? pluginsByModeId.get(currentModeId) : null;
|
|
169
263
|
if (activePlugin?.mcpServers?.length && !warnedExplicitMcpOverride) {
|
|
@@ -180,7 +274,7 @@ export async function startService(opts) {
|
|
|
180
274
|
for (const p of plugins) {
|
|
181
275
|
for (const srv of p.mcpServers ?? []) {
|
|
182
276
|
const scope = srv.activeInModes ?? (p.mode ? [p.mode.id] : []);
|
|
183
|
-
const inMode = scope.includes('*') || scope.includes(currentModeId);
|
|
277
|
+
const inMode = scope.includes('*') || scope.includes(currentModeId) || apiScopeOk(scope) || pentestScopeOk(scope);
|
|
184
278
|
if (!inMode)
|
|
185
279
|
continue;
|
|
186
280
|
extra.push({
|
|
@@ -195,6 +289,41 @@ export async function startService(opts) {
|
|
|
195
289
|
}
|
|
196
290
|
}
|
|
197
291
|
}
|
|
292
|
+
// codeContext (opt-in, all modes): the fenced read-only source reader.
|
|
293
|
+
// 'deny' drops it entirely; 'ask' makes it gate each read through the editor
|
|
294
|
+
// (HOVER_APPROVAL_PORT); 'always' lets it read without asking.
|
|
295
|
+
if (opts.codeContext && sourceGate !== 'deny') {
|
|
296
|
+
extra.push({
|
|
297
|
+
id: SOURCE_MCP_ID,
|
|
298
|
+
command: process.execPath,
|
|
299
|
+
args: [SOURCE_MCP_SCRIPT],
|
|
300
|
+
env: {
|
|
301
|
+
HOVER_PROJECT_ROOT: devRoot,
|
|
302
|
+
HOVER_SOURCE_GATE: sourceGate === 'ask' ? 'ask' : 'allow',
|
|
303
|
+
...(sourceGate === 'ask' ? { HOVER_APPROVAL_PORT: String(port) } : {}),
|
|
304
|
+
},
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
// Control actuation (always on, all modes): force-toggles sr-only hidden
|
|
308
|
+
// radios/checkboxes the locked-down Playwright click can't actuate. Drives
|
|
309
|
+
// the same debug Chrome over CDP; crystallizes to a normal .check() step.
|
|
310
|
+
extra.push({
|
|
311
|
+
id: CONTROL_MCP_ID,
|
|
312
|
+
command: process.execPath,
|
|
313
|
+
args: [CONTROL_MCP_SCRIPT],
|
|
314
|
+
// HOVER_APPROVAL_PORT: the control MCP's ask_user tool reaches the editor
|
|
315
|
+
// over the service WS. HOVER_PROJECT_ROOT: where upload_file writes its
|
|
316
|
+
// placeholder fixture and resolves relative paths. HOVER_SHOT_DIR: where
|
|
317
|
+
// take_screenshot writes (the same per-run dir the service scans), so its
|
|
318
|
+
// viewport PNGs surface in the chat exactly like browser_take_screenshot's.
|
|
319
|
+
env: {
|
|
320
|
+
HOVER_CDP_URL: cdpUrl,
|
|
321
|
+
HOVER_DEV_URL: opts.devUrl ?? cdpUrl,
|
|
322
|
+
HOVER_APPROVAL_PORT: String(port),
|
|
323
|
+
HOVER_PROJECT_ROOT: devRoot,
|
|
324
|
+
...(shotDir ? { HOVER_SHOT_DIR: shotDir } : {}),
|
|
325
|
+
},
|
|
326
|
+
});
|
|
198
327
|
// Single-Chrome model: the Playwright MCP always points at the one debug
|
|
199
328
|
// Chrome on the normal cdpUrl. (Pre-single-Chrome this branched to a
|
|
200
329
|
// mode-specific port like 9333; there's no second Chrome anymore.)
|
|
@@ -205,6 +334,10 @@ export async function startService(opts) {
|
|
|
205
334
|
// Suffix the filename by the mode so different mode toggles within
|
|
206
335
|
// one service produce distinct config files (debugging aid).
|
|
207
336
|
suffix: currentModeId ?? undefined,
|
|
337
|
+
// Screenshots / traces land in the run's own folder
|
|
338
|
+
// (.hover/runs/<conv>/<runId>/screenshots), grouped per run, instead of
|
|
339
|
+
// the MCP server's default OS temp dir.
|
|
340
|
+
outputDir: shotDir,
|
|
208
341
|
});
|
|
209
342
|
};
|
|
210
343
|
// Surface post-listen errors instead of crashing the host process.
|
|
@@ -239,11 +372,45 @@ export async function startService(opts) {
|
|
|
239
372
|
}
|
|
240
373
|
/** id of the currently-active mode, or null for normal (unmoded) mode. */
|
|
241
374
|
let currentModeId = null;
|
|
375
|
+
/**
|
|
376
|
+
* The single in-flight agent run, held at SERVICE scope (not per-connection)
|
|
377
|
+
* so it SURVIVES the widget's WS dropping. The widget lives in the page the
|
|
378
|
+
* agent drives, so any agent navigation (a pentest payload in the URL, an
|
|
379
|
+
* HMR reload) tears the widget down and closes its socket — but the agent is
|
|
380
|
+
* still happily driving the tab over CDP and recording findings server-side.
|
|
381
|
+
* Killing it on every navigation made pentest mode (which navigates
|
|
382
|
+
* constantly) unusable. Instead: detach on close, keep streaming to whichever
|
|
383
|
+
* ws is attached, and only abort if no widget reconnects within the grace
|
|
384
|
+
* window. Single active run — Hover binds 127.0.0.1 for one local user.
|
|
385
|
+
*/
|
|
386
|
+
const RECONNECT_GRACE_MS = 15_000;
|
|
387
|
+
let activeRun = null;
|
|
388
|
+
/** QA candidate flows recorded by the agent this run (via record_candidate).
|
|
389
|
+
* Buffered here (connection scope, visible to both the message handler and
|
|
390
|
+
* the run lifecycle); reset at each run start; resolved to real steps and
|
|
391
|
+
* emitted as `qa-candidates` at run end. */
|
|
392
|
+
let runCandidates = [];
|
|
393
|
+
/** Reset recipe discovered by recon this run (via record_reset_recipe). Buffered
|
|
394
|
+
* here, forwarded to the extension's env store (.hover/environments.json, which
|
|
395
|
+
* the extension owns) at run end, keyed to the run's env. */
|
|
396
|
+
let runResetRecipe = null;
|
|
397
|
+
/** In-flight source-read approval requests: correlation id → the source-MCP
|
|
398
|
+
* socket that asked, so the editor's response can be routed back to it. */
|
|
399
|
+
const pendingApprovals = new Map();
|
|
400
|
+
/** In-flight ask_user prompts: correlation id → the control-MCP socket that
|
|
401
|
+
* asked, so the editor's answer routes back to the waiting agent. */
|
|
402
|
+
const pendingAsks = new Map();
|
|
403
|
+
/** Send a run event to whichever ws is currently attached (survives reconnect). */
|
|
404
|
+
const emitToRun = (msg) => {
|
|
405
|
+
const c = activeRun?.client;
|
|
406
|
+
if (c && c.readyState === WebSocket.OPEN)
|
|
407
|
+
send(c, msg);
|
|
408
|
+
};
|
|
242
409
|
/** Chrome-proxy settings a plugin's `hover:service:start` hook set on us
|
|
243
410
|
* (security's resident MITM). RESIDENT for the whole session — set once
|
|
244
411
|
* before Chrome launches, never cleared on mode change — so the single
|
|
245
412
|
* debug Chrome is born with `--proxy-server` + the SPKI pin and entering
|
|
246
|
-
*
|
|
413
|
+
* API-testing mode is just a runtime flip of the proxy, not a Chrome relaunch.
|
|
247
414
|
* Read by `effectiveLaunchExtras()` and threaded into every cdp handler
|
|
248
415
|
* (check-cdp / launch-chrome / focus-debug) plus the initial auto-launch. */
|
|
249
416
|
let residentChromeProxy = null;
|
|
@@ -252,30 +419,67 @@ export async function startService(opts) {
|
|
|
252
419
|
* Merged with the manifest-declared env when the agent's spawn-time
|
|
253
420
|
* MCP config is built. */
|
|
254
421
|
const mcpEnvOverrides = new Map();
|
|
422
|
+
// QA "API capability": QA is a built-in mode, but when its API capability is
|
|
423
|
+
// on it COMPOSES the api-test plugin's runtime — flips the resident MITM to
|
|
424
|
+
// intercept, exposes the api-test MCP tools, and adds its prompt — so the QA
|
|
425
|
+
// agent can inspect/replay/test the app's API calls alongside the UI flows.
|
|
426
|
+
const apiTestPlugin = plugins.find((p) => p.mode?.id === 'api-test') ?? null;
|
|
427
|
+
/** Is the API capability ACTUALLY usable? The plugin must be loaded AND its
|
|
428
|
+
* resident MITM proxy must be up (set at service:start). "Available" gates the
|
|
429
|
+
* UI toggle so a user can never turn ON something that would then fail. */
|
|
430
|
+
const apiCapabilityAvailable = () => !!apiTestPlugin && residentChromeProxy !== null;
|
|
431
|
+
/** Set per QA run when the API capability is on + available — drives the MCP
|
|
432
|
+
* config, the prompt, and the activate/deactivate of the resident proxy. */
|
|
433
|
+
let apiActiveThisRun = false;
|
|
434
|
+
/** A plugin's mode-scoped contribution also applies when it's the api-test
|
|
435
|
+
* plugin being composed into the current QA run. */
|
|
436
|
+
const apiScopeOk = (scope) => apiActiveThisRun && scope.includes('api-test');
|
|
437
|
+
// QA "Pentest capability": same composition as API, but the pentest plugin —
|
|
438
|
+
// offensive (attacks the OWN dev app), origin-locked, writes a findings report.
|
|
439
|
+
// Mutually exclusive with the API capability (the plugins conflict). Default
|
|
440
|
+
// OFF; the editor confirms before enabling.
|
|
441
|
+
const pentestPlugin = plugins.find((p) => p.mode?.id === 'pentest') ?? null;
|
|
442
|
+
const pentestCapabilityAvailable = () => !!pentestPlugin && residentChromeProxy !== null;
|
|
443
|
+
let pentestActiveThisRun = false;
|
|
444
|
+
const pentestScopeOk = (scope) => pentestActiveThisRun && scope.includes('pentest');
|
|
445
|
+
// QA two-pass: when a QA run has BOTH API + Pentest on, run two sequenced
|
|
446
|
+
// phases (verify first, pentest last) so the destructive pentest can't corrupt
|
|
447
|
+
// the verification and each phase gets a fresh, budget-bounded context. The
|
|
448
|
+
// verify phase runs first; this holds the queued pentest-phase command, which
|
|
449
|
+
// the verify run's finally re-dispatches.
|
|
450
|
+
let pendingPhase2 = null;
|
|
255
451
|
/** The cdp-handler extras (proxy) threaded into launch-chrome / check-cdp /
|
|
256
452
|
* focus-debug and the initial auto-launch. In the single-Chrome model this
|
|
257
453
|
* is driven purely by the RESIDENT proxy (set in `hover:service:start`),
|
|
258
454
|
* NOT by the active mode — there is one Chrome on the normal CDP port that
|
|
259
|
-
* is always proxied; entering
|
|
455
|
+
* is always proxied; entering API-testing mode flips the proxy's behaviour,
|
|
260
456
|
* it does not relaunch Chrome on a different port. Returns undefined when
|
|
261
457
|
* no plugin set a resident proxy (the common no-security case), so plain
|
|
262
458
|
* Hover is byte-for-byte unchanged. */
|
|
263
459
|
const effectiveLaunchExtras = () => {
|
|
264
|
-
if (!residentChromeProxy)
|
|
460
|
+
if (!residentChromeProxy && !userDataDir)
|
|
265
461
|
return undefined;
|
|
266
|
-
return {
|
|
462
|
+
return {
|
|
463
|
+
...(residentChromeProxy ? { proxy: residentChromeProxy } : {}),
|
|
464
|
+
...(userDataDir ? { userDataDir } : {}),
|
|
465
|
+
};
|
|
267
466
|
};
|
|
268
467
|
/** Send the current mode catalogue to one ws (or all if undefined). */
|
|
269
468
|
const broadcastModes = (target) => {
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
469
|
+
// The picker lists ONLY the built-in modes (Flow implicit + QA). The
|
|
470
|
+
// api-test / pentest PLUGINS still load — but they're surfaced as QA
|
|
471
|
+
// capability TOGGLES (apiCapabilityAvailable / pentestCapabilityAvailable
|
|
472
|
+
// below), NOT as standalone modes. (Listing plugin-contributed modes here is
|
|
473
|
+
// the old, removed UX: the mode picker is now Flow + QA Testing only.)
|
|
474
|
+
const builtins = BUILTIN_MODES.map((m) => ({ id: m.id, label: m.label, description: m.description, accent: m.accent }));
|
|
475
|
+
const payload = {
|
|
476
|
+
current: currentModeId,
|
|
477
|
+
available: builtins,
|
|
478
|
+
// Whether QA's API / Pentest capabilities can actually run (plugin loaded +
|
|
479
|
+
// MITM up). Gates the QA toggles so "on" always works.
|
|
480
|
+
apiCapabilityAvailable: apiCapabilityAvailable(),
|
|
481
|
+
pentestCapabilityAvailable: pentestCapabilityAvailable(),
|
|
482
|
+
};
|
|
279
483
|
const targets = target ? [target] : [...wss.clients];
|
|
280
484
|
for (const client of targets) {
|
|
281
485
|
if (client.readyState === WebSocket.OPEN) {
|
|
@@ -325,6 +529,14 @@ export async function startService(opts) {
|
|
|
325
529
|
if (newModeId) {
|
|
326
530
|
const next = pluginsByModeId.get(newModeId);
|
|
327
531
|
if (!next) {
|
|
532
|
+
// A built-in non-Flow mode (QA) is core-owned, not plugin-contributed —
|
|
533
|
+
// no activate hook / sidecars to run, just record it. Anything else is a
|
|
534
|
+
// genuinely unknown mode.
|
|
535
|
+
if (isBuiltinMode(newModeId)) {
|
|
536
|
+
currentModeId = newModeId;
|
|
537
|
+
broadcastModes();
|
|
538
|
+
return;
|
|
539
|
+
}
|
|
328
540
|
throw new Error(`[hover] unknown modeId "${newModeId}"`);
|
|
329
541
|
}
|
|
330
542
|
currentModeId = newModeId;
|
|
@@ -390,7 +602,7 @@ export async function startService(opts) {
|
|
|
390
602
|
wss.on('connection', ws => {
|
|
391
603
|
send(ws, {
|
|
392
604
|
type: 'hello',
|
|
393
|
-
payload: { agentId: currentAgentId, model, version: PROTOCOL_VERSION
|
|
605
|
+
payload: { agentId: currentAgentId, model, version: PROTOCOL_VERSION },
|
|
394
606
|
});
|
|
395
607
|
// Send the agent list as a follow-up event so the widget can render the
|
|
396
608
|
// dropdown immediately on connect / reconnect (e.g. after HMR). The
|
|
@@ -410,20 +622,42 @@ export async function startService(opts) {
|
|
|
410
622
|
// Send the mode catalogue too, so the widget can render the mode
|
|
411
623
|
// toggle immediately. Empty list when no plugins are loaded.
|
|
412
624
|
broadcastModes(ws);
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
//
|
|
417
|
-
//
|
|
418
|
-
//
|
|
625
|
+
// Re-attach to a run that's still in flight (the previous widget dropped —
|
|
626
|
+
// most commonly the agent navigated and reloaded the page the widget lives
|
|
627
|
+
// in). Cancel the pending abort, point the run's event stream at this fresh
|
|
628
|
+
// socket, and tell the widget so it can restore its "running" UI. Without
|
|
629
|
+
// this the run would be killed on every agent navigation.
|
|
630
|
+
// Only re-attach during a genuine reconnect GAP (the prior client is gone).
|
|
631
|
+
// If a live client is still attached, this is a SECOND widget (e.g. the
|
|
632
|
+
// user's regular tab alongside the debug-Chrome tab — both inject a widget
|
|
633
|
+
// on the same origin and open their own socket). Seizing the stream would
|
|
634
|
+
// silence the first widget and let the second's close abort a healthy run,
|
|
635
|
+
// so leave a second concurrent widget in idle UI rather than hijacking.
|
|
636
|
+
if (activeRun && activeRun.client === null) {
|
|
637
|
+
if (activeRun.graceTimer) {
|
|
638
|
+
clearTimeout(activeRun.graceTimer);
|
|
639
|
+
activeRun.graceTimer = null;
|
|
640
|
+
}
|
|
641
|
+
activeRun.client = ws;
|
|
642
|
+
send(ws, { type: 'run-active', payload: { prompt: activeRun.prompt } });
|
|
643
|
+
}
|
|
644
|
+
// If the widget's socket closes while a run it owns is in flight, DON'T
|
|
645
|
+
// abort — the agent is still driving the tab over CDP. Detach this ws and
|
|
646
|
+
// start a grace window; a reconnecting widget (above) cancels the abort.
|
|
647
|
+
// Only if nobody comes back do we abort, so we still never leave an orphan.
|
|
419
648
|
ws.on('close', () => {
|
|
420
|
-
|
|
649
|
+
if (activeRun && activeRun.client === ws) {
|
|
650
|
+
activeRun.client = null;
|
|
651
|
+
activeRun.graceTimer = setTimeout(() => {
|
|
652
|
+
activeRun?.abort.abort();
|
|
653
|
+
}, RECONNECT_GRACE_MS);
|
|
654
|
+
}
|
|
421
655
|
});
|
|
422
656
|
const cancel = () => {
|
|
423
|
-
if (!
|
|
657
|
+
if (!activeRun)
|
|
424
658
|
return;
|
|
425
|
-
cancelled = true;
|
|
426
|
-
|
|
659
|
+
activeRun.cancelled = true;
|
|
660
|
+
activeRun.abort.abort();
|
|
427
661
|
// Send a synthetic session_end so the widget resets to idle immediately.
|
|
428
662
|
// The for-await loop below short-circuits on `cancelled`, so no events
|
|
429
663
|
// from the dying child will arrive after this.
|
|
@@ -433,7 +667,7 @@ export async function startService(opts) {
|
|
|
433
667
|
// stays false because the agent didn't fail: the user chose to
|
|
434
668
|
// end the run. The widget renders this as a neutral "Stopped"
|
|
435
669
|
// state rather than a red Failed card.
|
|
436
|
-
|
|
670
|
+
emitToRun({
|
|
437
671
|
type: 'event',
|
|
438
672
|
payload: {
|
|
439
673
|
kind: 'session_end',
|
|
@@ -443,7 +677,10 @@ export async function startService(opts) {
|
|
|
443
677
|
},
|
|
444
678
|
});
|
|
445
679
|
};
|
|
446
|
-
|
|
680
|
+
// Named (not an inline arrow) so a QA run with both API + Pentest on can
|
|
681
|
+
// re-enter it for a sequenced second phase — see the phase split + the
|
|
682
|
+
// re-dispatch in the command path's finally.
|
|
683
|
+
const onClientMessage = async (data) => {
|
|
447
684
|
let msg;
|
|
448
685
|
try {
|
|
449
686
|
msg = JSON.parse(data.toString());
|
|
@@ -455,12 +692,59 @@ export async function startService(opts) {
|
|
|
455
692
|
cancel();
|
|
456
693
|
return;
|
|
457
694
|
}
|
|
458
|
-
|
|
459
|
-
|
|
695
|
+
// Stateless relays (reveal-source / source-approval-* / ask-user-*) — see
|
|
696
|
+
// service/relayHandlers.ts. They route between sockets without touching the
|
|
697
|
+
// run's mutable state, so they live outside this closure.
|
|
698
|
+
if (handleRelayMessage(ws, msg, {
|
|
699
|
+
wss,
|
|
700
|
+
activeRunClient: () => activeRun?.client,
|
|
701
|
+
pendingApprovals,
|
|
702
|
+
pendingAsks,
|
|
703
|
+
}))
|
|
704
|
+
return;
|
|
705
|
+
// record-fact (from the control MCP's record_fact tool): persist a learned
|
|
706
|
+
// business rule into .hover/memory/. ONLY in QA/API modes — ignored
|
|
707
|
+
// elsewhere so Flow/Pentest never write business memory. Best-effort:
|
|
708
|
+
// a memory write must never break anything (it isn't even acked).
|
|
709
|
+
if (msg.type === 'record-fact') {
|
|
710
|
+
const f = msg.payload?.fact;
|
|
711
|
+
if (f && f.title && f.rule && (currentModeId === 'qa' || currentModeId === 'api-test')) {
|
|
712
|
+
const types = ['business-rule', 'expected-behavior', 'validation', 'access-policy'];
|
|
713
|
+
const type = types.includes(f.type) ? f.type : 'business-rule';
|
|
714
|
+
void writeFact(devRoot, { name: f.title, description: f.title, type, body: f.rule }).then((r) => {
|
|
715
|
+
if ('error' in r)
|
|
716
|
+
process.stderr.write(`[hover/qa] record-fact write failed: ${r.error}\n`);
|
|
717
|
+
});
|
|
718
|
+
}
|
|
719
|
+
return;
|
|
720
|
+
}
|
|
721
|
+
// record-candidate (from the control MCP's record_candidate tool): buffer a
|
|
722
|
+
// QA candidate flow. The MCP already captured the flow's real grounded
|
|
723
|
+
// steps, so we just hold them. ONLY in QA mode; emitted as `qa-candidates`
|
|
724
|
+
// at run end — never acked, never blocks a run.
|
|
725
|
+
if (msg.type === 'record-candidate') {
|
|
726
|
+
const c = msg.payload?.candidate;
|
|
727
|
+
if (c && typeof c.name === 'string' && Array.isArray(c.steps) && currentModeId === 'qa') {
|
|
728
|
+
runCandidates.push({
|
|
729
|
+
name: c.name,
|
|
730
|
+
description: typeof c.description === 'string' ? c.description : undefined,
|
|
731
|
+
steps: c.steps,
|
|
732
|
+
});
|
|
733
|
+
}
|
|
734
|
+
return;
|
|
735
|
+
}
|
|
736
|
+
// record-reset-recipe (from the control MCP's record_reset_recipe tool): the
|
|
737
|
+
// agent's state-reset classification for this app/env, discovered during
|
|
738
|
+
// recon. Buffer it; forwarded to the extension at run end (it owns
|
|
739
|
+
// .hover/environments.json), keyed to runEnv. Best-effort, never acked.
|
|
740
|
+
if (msg.type === 'record-reset-recipe') {
|
|
741
|
+
const r = msg.payload?.recipe;
|
|
742
|
+
if (r && typeof r.tier === 'number')
|
|
743
|
+
runResetRecipe = r;
|
|
460
744
|
return;
|
|
461
745
|
}
|
|
462
746
|
if (msg.type === 'set-mode') {
|
|
463
|
-
if (
|
|
747
|
+
if (activeRun) {
|
|
464
748
|
send(ws, {
|
|
465
749
|
type: 'error',
|
|
466
750
|
payload: { message: 'set-mode: a command is already running; stop it first' },
|
|
@@ -475,7 +759,7 @@ export async function startService(opts) {
|
|
|
475
759
|
});
|
|
476
760
|
return;
|
|
477
761
|
}
|
|
478
|
-
if (wanted !== null && !pluginsByModeId.has(wanted)) {
|
|
762
|
+
if (wanted !== null && !isBuiltinMode(wanted) && !pluginsByModeId.has(wanted)) {
|
|
479
763
|
send(ws, {
|
|
480
764
|
type: 'error',
|
|
481
765
|
payload: { message: `set-mode: unknown modeId "${wanted}"` },
|
|
@@ -495,13 +779,6 @@ export async function startService(opts) {
|
|
|
495
779
|
}
|
|
496
780
|
return;
|
|
497
781
|
}
|
|
498
|
-
if (msg.type === 'list-agents') {
|
|
499
|
-
// Force a refresh — the user may have just installed a new CLI
|
|
500
|
-
// and clicked the dropdown to see the change.
|
|
501
|
-
const available = await getAvailability(true);
|
|
502
|
-
send(ws, { type: 'agents', payload: { current: currentAgentId, available } });
|
|
503
|
-
return;
|
|
504
|
-
}
|
|
505
782
|
if (msg.type === 'switch-agent') {
|
|
506
783
|
const wanted = msg.payload?.agentId;
|
|
507
784
|
if (typeof wanted !== 'string' || !wanted) {
|
|
@@ -515,7 +792,7 @@ export async function startService(opts) {
|
|
|
515
792
|
// Refuse to switch mid-flight; the user's running command would
|
|
516
793
|
// otherwise outlive its own descriptor and the events it produces
|
|
517
794
|
// would be parsed against the wrong wire format.
|
|
518
|
-
if (
|
|
795
|
+
if (activeRun) {
|
|
519
796
|
send(ws, {
|
|
520
797
|
type: 'error',
|
|
521
798
|
payload: { message: 'switch-agent: a command is already running; stop it first' },
|
|
@@ -537,46 +814,68 @@ export async function startService(opts) {
|
|
|
537
814
|
await broadcastAgents();
|
|
538
815
|
return;
|
|
539
816
|
}
|
|
540
|
-
if (msg.type === 'set-
|
|
541
|
-
//
|
|
542
|
-
//
|
|
543
|
-
//
|
|
544
|
-
const
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
817
|
+
if (msg.type === 'set-model') {
|
|
818
|
+
// Persist the model for subsequent runs (sonnet / opus / haiku / …).
|
|
819
|
+
// Refuse mid-run so an in-flight invocation keeps the model it started
|
|
820
|
+
// with. Applies from the next command.
|
|
821
|
+
const wanted = msg.payload?.model;
|
|
822
|
+
if (typeof wanted !== 'string' || !wanted) {
|
|
823
|
+
send(ws, { type: 'error', payload: { message: 'set-model: model is required' } });
|
|
824
|
+
return;
|
|
825
|
+
}
|
|
826
|
+
if (activeRun) {
|
|
827
|
+
send(ws, { type: 'error', payload: { message: 'set-model: a command is already running; stop it first' } });
|
|
828
|
+
return;
|
|
829
|
+
}
|
|
830
|
+
model = wanted;
|
|
831
|
+
send(ws, { type: 'hello', payload: { agentId: currentAgentId, model, version: PROTOCOL_VERSION } });
|
|
548
832
|
return;
|
|
549
833
|
}
|
|
550
|
-
if (msg.type === '
|
|
551
|
-
//
|
|
552
|
-
//
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
834
|
+
if (msg.type === 'set-effort') {
|
|
835
|
+
// Reasoning-effort level for subsequent runs (empty string clears it →
|
|
836
|
+
// the agent/model default). Refused mid-run, like set-model.
|
|
837
|
+
const wanted = msg.payload?.effort;
|
|
838
|
+
if (typeof wanted !== 'string') {
|
|
839
|
+
send(ws, { type: 'error', payload: { message: 'set-effort: effort is required' } });
|
|
840
|
+
return;
|
|
841
|
+
}
|
|
842
|
+
if (activeRun) {
|
|
843
|
+
send(ws, { type: 'error', payload: { message: 'set-effort: a command is already running; stop it first' } });
|
|
844
|
+
return;
|
|
845
|
+
}
|
|
846
|
+
currentEffort = wanted || undefined;
|
|
557
847
|
return;
|
|
558
848
|
}
|
|
559
|
-
if (msg.type === '
|
|
560
|
-
//
|
|
561
|
-
//
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
code: s.example?.code ?? '',
|
|
569
|
-
source: builtinNames.has(s.name) ? 'builtin' : 'project',
|
|
570
|
-
}));
|
|
571
|
-
send(ws, { type: 'seeds-list', payload: { seeds } });
|
|
849
|
+
if (msg.type === 'set-local-endpoint') {
|
|
850
|
+
// Base URL of the user's self-hosted OpenAI-compatible endpoint for the
|
|
851
|
+
// Local LLM agent (qwen-code as host). Empty string clears it.
|
|
852
|
+
const url = msg.payload?.baseUrl;
|
|
853
|
+
if (typeof url !== 'string') {
|
|
854
|
+
send(ws, { type: 'error', payload: { message: 'set-local-endpoint: baseUrl is required' } });
|
|
855
|
+
return;
|
|
856
|
+
}
|
|
857
|
+
currentLocalBaseUrl = url || undefined;
|
|
572
858
|
return;
|
|
573
859
|
}
|
|
574
|
-
if (msg.type === '
|
|
575
|
-
|
|
860
|
+
if (msg.type === 'set-byok') {
|
|
861
|
+
// BYOK config for subsequent runs, or null to fall back to the
|
|
862
|
+
// local-CLI agent's own auth. Refused mid-run, like set-model.
|
|
863
|
+
if (activeRun) {
|
|
864
|
+
send(ws, { type: 'error', payload: { message: 'set-byok: a command is already running; stop it first' } });
|
|
865
|
+
return;
|
|
866
|
+
}
|
|
867
|
+
const c = msg.payload?.config;
|
|
868
|
+
currentByok = c && typeof c.protocol === 'string' ? c : null;
|
|
576
869
|
return;
|
|
577
870
|
}
|
|
578
|
-
if (msg.type === '
|
|
579
|
-
|
|
871
|
+
if (msg.type === 'refresh-agents') {
|
|
872
|
+
// Re-scan PATH (the user just installed a CLI) and re-broadcast.
|
|
873
|
+
await getAvailability(true);
|
|
874
|
+
await broadcastAgents();
|
|
875
|
+
return;
|
|
876
|
+
}
|
|
877
|
+
if (msg.type === 'save-spec') {
|
|
878
|
+
await handleSaveArtifact(ws, msg, devRoot, SPEC_CONFIG);
|
|
580
879
|
return;
|
|
581
880
|
}
|
|
582
881
|
// Stage 7 (F7) widget flow: optimize a saved spec, then promote/discard
|
|
@@ -590,8 +889,15 @@ export async function startService(opts) {
|
|
|
590
889
|
return;
|
|
591
890
|
}
|
|
592
891
|
try {
|
|
892
|
+
// Optimize is text-only refinement — run it on a CHEAP model: the
|
|
893
|
+
// user's `hover.optimizeModel` setting if set, else the agent's
|
|
894
|
+
// cheapModel (e.g. claude → haiku), else the session model. Keeps the
|
|
895
|
+
// refinement affordable (and viable to run often) without a big model.
|
|
896
|
+
const optimizeModel = (typeof msg.payload?.optimizeModel === 'string' && msg.payload.optimizeModel)
|
|
897
|
+
|| getAgent(currentAgentId)?.cheapModel
|
|
898
|
+
|| model;
|
|
593
899
|
const res = await optimizeSpecWithAgent(devRoot, slug, {
|
|
594
|
-
agentId: currentAgentId, model, maxBudgetUsd,
|
|
900
|
+
agentId: currentAgentId, model: optimizeModel, maxBudgetUsd,
|
|
595
901
|
});
|
|
596
902
|
send(ws, { type: 'optimize-result', payload: { slug, original: res.original, candidate: res.code } });
|
|
597
903
|
}
|
|
@@ -601,31 +907,30 @@ export async function startService(opts) {
|
|
|
601
907
|
}
|
|
602
908
|
return;
|
|
603
909
|
}
|
|
604
|
-
|
|
910
|
+
// Self-heal Stage 2: build the heal prompt for a failed spec and bounce it
|
|
911
|
+
// back. The extension then runs it through the normal run path (runPrompt →
|
|
912
|
+
// command), so the repair streams into chat and crystallizes like any run —
|
|
913
|
+
// no run-path surgery. The failing locator comes from the latest Playwright
|
|
914
|
+
// run JSON (parseRunFailures); absent → buildHealPrompt degrades gracefully.
|
|
915
|
+
if (msg.type === 'heal-spec') {
|
|
605
916
|
const slug = msg.payload?.slug;
|
|
606
|
-
|
|
607
|
-
|
|
917
|
+
const specSource = typeof msg.payload?.specSource === 'string' ? msg.payload.specSource : '';
|
|
918
|
+
if (typeof slug !== 'string' || !slug || !specSource) {
|
|
919
|
+
send(ws, { type: 'error', payload: { message: 'heal-spec: slug and specSource are required' } });
|
|
608
920
|
return;
|
|
609
921
|
}
|
|
922
|
+
let failures = [];
|
|
610
923
|
try {
|
|
611
|
-
const
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
}
|
|
619
|
-
return;
|
|
620
|
-
}
|
|
621
|
-
if (msg.type === 'discard-optimized') {
|
|
622
|
-
const slug = msg.payload?.slug;
|
|
623
|
-
if (typeof slug !== 'string' || !slug) {
|
|
624
|
-
send(ws, { type: 'error', payload: { message: 'discard-optimized: slug is required' } });
|
|
625
|
-
return;
|
|
924
|
+
const runsDir = join(devRoot, '.hover', 'runs');
|
|
925
|
+
const files = readdirSync(runsDir).filter((f) => f.endsWith('.json')).sort();
|
|
926
|
+
const newest = files.at(-1);
|
|
927
|
+
if (newest) {
|
|
928
|
+
failures = parseRunFailures(readFileSync(join(runsDir, newest), 'utf-8'))
|
|
929
|
+
.filter((f) => f.specFile.includes(slug));
|
|
930
|
+
}
|
|
626
931
|
}
|
|
627
|
-
|
|
628
|
-
send(ws, { type: '
|
|
932
|
+
catch { /* no runs ledger yet — heal from the spec source alone */ }
|
|
933
|
+
send(ws, { type: 'heal-ready', payload: { slug, prompt: buildHealPrompt(slug, specSource, failures), label: healLabel(slug) } });
|
|
629
934
|
return;
|
|
630
935
|
}
|
|
631
936
|
// v0.12 — plugin-contributed save handlers. Lookup is O(plugins),
|
|
@@ -661,62 +966,212 @@ export async function startService(opts) {
|
|
|
661
966
|
});
|
|
662
967
|
return;
|
|
663
968
|
}
|
|
664
|
-
if (msg.type === 'check-cdp') {
|
|
665
|
-
await handleCheckCdp(ws, msg, cdpUrl, effectiveLaunchExtras());
|
|
666
|
-
return;
|
|
667
|
-
}
|
|
668
969
|
if (msg.type === 'launch-chrome') {
|
|
669
970
|
await handleLaunchChrome(ws, msg, cdpUrl, effectiveLaunchExtras());
|
|
670
971
|
return;
|
|
671
972
|
}
|
|
672
|
-
if (msg.type === 'focus-debug') {
|
|
673
|
-
await handleFocusDebug(ws, msg, cdpUrl, effectiveLaunchExtras());
|
|
674
|
-
return;
|
|
675
|
-
}
|
|
676
973
|
if (msg.type !== 'command')
|
|
677
974
|
return;
|
|
678
|
-
const
|
|
975
|
+
const rawText = msg.payload?.text;
|
|
679
976
|
const resumeSessionId = typeof msg.payload?.sessionId === 'string' && msg.payload.sessionId.length > 0
|
|
680
977
|
? msg.payload.sessionId
|
|
681
978
|
: undefined;
|
|
682
|
-
|
|
683
|
-
// passes `reRecord: { slug }`, runSession collects the tool_use events
|
|
684
|
-
// into a SpecStep[] and, on a clean finish, we overwrite the existing
|
|
685
|
-
// __vibe_tests__/<slug>.spec.ts. Same flow the widget uses for "Save as
|
|
686
|
-
// Spec", but the spec already exists and is being regenerated for the
|
|
687
|
-
// current UI.
|
|
688
|
-
const reRecordSlug = msg.payload && typeof msg.payload === 'object' && 'reRecord' in msg.payload
|
|
689
|
-
? msg.payload.reRecord?.slug
|
|
690
|
-
: undefined;
|
|
691
|
-
if (typeof text !== 'string' || !text.trim())
|
|
979
|
+
if (typeof rawText !== 'string' || !rawText.trim())
|
|
692
980
|
return;
|
|
693
|
-
|
|
981
|
+
// `let` (typed string): the classify gate (below) may substitute a refined
|
|
982
|
+
// instruction (e.g. "read the page" → "test this page") before the run uses it.
|
|
983
|
+
let text = rawText;
|
|
984
|
+
if (activeRun) {
|
|
694
985
|
send(ws, {
|
|
695
986
|
type: 'error',
|
|
696
|
-
payload: { message: 'A command is already running
|
|
987
|
+
payload: { message: 'A command is already running.' },
|
|
697
988
|
});
|
|
698
989
|
return;
|
|
699
990
|
}
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
991
|
+
const run = {
|
|
992
|
+
abort: new AbortController(),
|
|
993
|
+
cancelled: false,
|
|
994
|
+
client: ws,
|
|
995
|
+
graceTimer: null,
|
|
996
|
+
prompt: text,
|
|
997
|
+
};
|
|
998
|
+
activeRun = run;
|
|
999
|
+
// Session-ledger state — declared outside the try so the catch path can
|
|
1000
|
+
// still record an aborted / thrown run (the spend view wants those too).
|
|
1001
|
+
const sessionStartedAt = new Date().toISOString();
|
|
1002
|
+
// One id per run, generated NOW (run start), so the ledger record, the
|
|
1003
|
+
// screenshots, and the QA report all share one folder. Replaces the old
|
|
1004
|
+
// split between an end-based record id and a start-based screenshotTag.
|
|
1005
|
+
const runId = `${sessionStartedAt.replace(/[:.]/g, '-')}-${Math.random().toString(16).slice(2, 6)}`;
|
|
1006
|
+
// The chat conversation this run belongs to (from the editor); groups all
|
|
1007
|
+
// its runs under one folder so deleting a conversation removes them.
|
|
1008
|
+
const conversationId = typeof msg.payload?.conversationId === 'string' && msg.payload.conversationId
|
|
1009
|
+
? msg.payload.conversationId
|
|
1010
|
+
: 'default';
|
|
1011
|
+
const runDirPath = runDir(devRoot, conversationId, runId);
|
|
1012
|
+
const runShotDir = join(runDirPath, 'screenshots');
|
|
1013
|
+
let sessionEnd = {};
|
|
1014
|
+
// Findings + clean summary parsed from the ORIGINAL session_end summary,
|
|
1015
|
+
// captured before that summary is stripped of its ## Findings block for the
|
|
1016
|
+
// chat. recordSession reuses these so the ledger record + QA report keep the
|
|
1017
|
+
// findings (re-parsing the stripped summary would lose them).
|
|
1018
|
+
let runParsed = null;
|
|
1019
|
+
let sessionRecorded = false;
|
|
1020
|
+
runCandidates = []; // fresh per run — QA candidate flows accumulate below
|
|
1021
|
+
runResetRecipe = null; // fresh per run — recon may set it
|
|
1022
|
+
pendingPhase2 = null; // cleared each run; the phase split below may re-arm it
|
|
1023
|
+
// Reproducibility context captured up front (snapshot the mode now so a
|
|
1024
|
+
// mid-run switch can't smear it; the rest are filled as the run learns
|
|
1025
|
+
// them). Account labels are LABELS ONLY — never the credentials.
|
|
1026
|
+
const runMode = currentModeId;
|
|
1027
|
+
// QA intensity (per-run): Quick / Standard / Deep — bounds exploration with
|
|
1028
|
+
// a hard model-spend ceiling so "explore the whole app" can't run away.
|
|
1029
|
+
// Only meaningful in QA mode; ignored elsewhere.
|
|
1030
|
+
const runIntensity = asQaIntensity(msg.payload?.intensity);
|
|
1031
|
+
// QA API capability (per-run): when QA's API toggle is on AND the MITM is
|
|
1032
|
+
// available, compose the api-test runtime into this run. The UI only lets
|
|
1033
|
+
// the user turn it on when available, so "on" must actually work — if it's
|
|
1034
|
+
// requested but unavailable, say so loudly (don't silently degrade).
|
|
1035
|
+
const caps = msg.payload?.capabilities;
|
|
1036
|
+
const isPhase2 = msg.payload?.__phase2 === true;
|
|
1037
|
+
const pentestWanted = runMode === 'qa' && caps?.pentest === true && pentestCapabilityAvailable();
|
|
1038
|
+
const apiWanted = runMode === 'qa' && (caps?.api === true || caps?.api === undefined) && apiCapabilityAvailable();
|
|
1039
|
+
// Two-pass: pentest is destructive, so it always runs as a SECOND phase
|
|
1040
|
+
// after the verify phase (functional [+ API]). A first QA run with pentest
|
|
1041
|
+
// on runs verify now and queues a fresh-session pentest phase; the pentest
|
|
1042
|
+
// phase (isPhase2) then runs pentest alone.
|
|
1043
|
+
const splitting = !isPhase2 && pentestWanted;
|
|
1044
|
+
if (splitting) {
|
|
1045
|
+
pentestActiveThisRun = false; // phase 1 = verify (functional + API if on)
|
|
1046
|
+
apiActiveThisRun = apiWanted;
|
|
1047
|
+
pendingPhase2 = {
|
|
1048
|
+
type: 'command',
|
|
1049
|
+
payload: { ...msg.payload, capabilities: { api: false, pentest: true }, sessionId: undefined, __phase2: true },
|
|
1050
|
+
};
|
|
1051
|
+
}
|
|
1052
|
+
else {
|
|
1053
|
+
// Normal QA (no pentest), OR the queued pentest phase itself. Pentest and
|
|
1054
|
+
// API never run in the same phase.
|
|
1055
|
+
pentestActiveThisRun = pentestWanted;
|
|
1056
|
+
apiActiveThisRun = !pentestActiveThisRun && apiWanted;
|
|
1057
|
+
}
|
|
1058
|
+
// Defensive: the UI only enables these toggles when available, so an
|
|
1059
|
+
// explicit "on" should always be honoured. If it somehow isn't, log it
|
|
1060
|
+
// (the run continues as functional-only rather than failing the run).
|
|
1061
|
+
if (runMode === 'qa' && caps?.api === true && !apiActiveThisRun && !pentestActiveThisRun && !splitting && !apiCapabilityAvailable()) {
|
|
1062
|
+
process.stderr.write('[hover/qa] API capability requested but the api-test runtime is unavailable; running functional-only.\n');
|
|
1063
|
+
}
|
|
1064
|
+
if (runMode === 'qa' && caps?.pentest === true && !pentestCapabilityAvailable()) {
|
|
1065
|
+
process.stderr.write('[hover/qa] Pentest capability requested but the pentest runtime is unavailable; running functional-only.\n');
|
|
1066
|
+
}
|
|
1067
|
+
const runResumeOf = resumeSessionId;
|
|
1068
|
+
const runEnv = (() => {
|
|
1069
|
+
const e = msg.payload?.env;
|
|
1070
|
+
return e && typeof e === 'object' ? { id: e.id, name: e.name } : undefined;
|
|
1071
|
+
})();
|
|
1072
|
+
let runTargetUrl;
|
|
1073
|
+
let runAccountLabels;
|
|
1074
|
+
const recordSession = async (outcome, stepCount, detail) => {
|
|
1075
|
+
if (sessionRecorded)
|
|
1076
|
+
return;
|
|
1077
|
+
sessionRecorded = true;
|
|
1078
|
+
const endedAt = new Date().toISOString();
|
|
1079
|
+
// Prefer the findings captured at session_end (from the un-stripped
|
|
1080
|
+
// summary); fall back to parsing detail.summary (error/abort paths).
|
|
1081
|
+
const parsed = runParsed ?? (detail?.summary ? parseFindings(detail.summary) : { summary: '', findings: [] });
|
|
1082
|
+
const toolCounts = detail?.steps ? tallyTools(detail.steps) : undefined;
|
|
1083
|
+
const target = runTargetUrl || runEnv ? { url: runTargetUrl, id: runEnv?.id, name: runEnv?.name } : undefined;
|
|
1084
|
+
const rec = await writeSessionRecord(devRoot, conversationId, runId, {
|
|
1085
|
+
startedAt: sessionStartedAt,
|
|
1086
|
+
endedAt,
|
|
1087
|
+
durationMs: Date.parse(endedAt) - Date.parse(sessionStartedAt),
|
|
1088
|
+
agent: currentAgentId,
|
|
1089
|
+
model,
|
|
1090
|
+
mode: runMode,
|
|
1091
|
+
prompt: text,
|
|
1092
|
+
outcome,
|
|
1093
|
+
errorReason: detail?.errorReason,
|
|
1094
|
+
summary: parsed.summary || undefined,
|
|
1095
|
+
findings: parsed.findings.length ? parsed.findings : undefined,
|
|
1096
|
+
toolCounts: toolCounts && Object.keys(toolCounts).length ? toolCounts : undefined,
|
|
1097
|
+
target: target ? { url: target.url, envId: target.id, envName: target.name } : undefined,
|
|
1098
|
+
accountLabels: runAccountLabels,
|
|
1099
|
+
resumeOf: runResumeOf,
|
|
1100
|
+
turns: sessionEnd.turns,
|
|
1101
|
+
costUsd: sessionEnd.costUsd,
|
|
1102
|
+
tokensUsed: sessionEnd.tokens,
|
|
1103
|
+
stepCount,
|
|
1104
|
+
});
|
|
1105
|
+
// QA mode is report-first: persist a durable Markdown findings report
|
|
1106
|
+
// (mirrors pentest's report file; the chat already shows the Findings
|
|
1107
|
+
// card live). Best-effort — never breaks the run/ledger.
|
|
1108
|
+
if (runMode === 'qa') {
|
|
1109
|
+
const r = await writeQaReport(runDirPath, {
|
|
1110
|
+
prompt: text,
|
|
1111
|
+
summary: parsed.summary,
|
|
1112
|
+
findings: parsed.findings,
|
|
1113
|
+
endedAt,
|
|
1114
|
+
targetUrl: runTargetUrl,
|
|
1115
|
+
});
|
|
1116
|
+
if ('error' in r)
|
|
1117
|
+
process.stderr.write(`[hover/qa] report write failed: ${r.error}\n`);
|
|
1118
|
+
// Surface the report as a clickable artifact in the chat (mirrors the
|
|
1119
|
+
// screenshot event). The editor opens it on click.
|
|
1120
|
+
else if (!run.cancelled)
|
|
1121
|
+
emitToRun({ type: 'qa-report', payload: { path: r.path } });
|
|
1122
|
+
}
|
|
1123
|
+
// Let the active mode's plugin persist its own per-run artifacts bound to
|
|
1124
|
+
// this session id (e.g. api-test writes .hover/api/<id>.json). Best-effort.
|
|
1125
|
+
const sid = rec && 'id' in rec ? rec.id : null;
|
|
1126
|
+
const runEndPlugin = runMode ? pluginsByModeId.get(runMode) : null;
|
|
1127
|
+
if (sid && runEndPlugin?.hooks?.['hover:run:end']) {
|
|
1128
|
+
try {
|
|
1129
|
+
await runEndPlugin.hooks['hover:run:end']({ devRoot, broadcast: broadcastPluginEvent, sessionId: sid });
|
|
1130
|
+
}
|
|
1131
|
+
catch (err) {
|
|
1132
|
+
process.stderr.write(`[hover] plugin "${runEndPlugin.name}" run:end failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
// QA + API: persist this run's captured API traffic/checks, then flip the
|
|
1136
|
+
// resident MITM back to passthrough (stop recording). Best-effort.
|
|
1137
|
+
if (apiActiveThisRun && apiTestPlugin) {
|
|
1138
|
+
try {
|
|
1139
|
+
if (sid)
|
|
1140
|
+
await apiTestPlugin.hooks?.['hover:run:end']?.({ devRoot, broadcast: broadcastPluginEvent, sessionId: sid });
|
|
1141
|
+
await apiTestPlugin.hooks?.['hover:mode:deactivate']?.({ devRoot, broadcast: broadcastPluginEvent, modeId: 'qa' });
|
|
1142
|
+
}
|
|
1143
|
+
catch (err) {
|
|
1144
|
+
process.stderr.write(`[hover/qa] api-test compose (run:end) failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1145
|
+
}
|
|
1146
|
+
apiActiveThisRun = false;
|
|
1147
|
+
}
|
|
1148
|
+
// QA + Pentest: stop recording (back to passthrough). The findings are in
|
|
1149
|
+
// the agent's report; a deep PoC report is available via Save. Best-effort.
|
|
1150
|
+
if (pentestActiveThisRun && pentestPlugin) {
|
|
1151
|
+
try {
|
|
1152
|
+
await pentestPlugin.hooks?.['hover:mode:deactivate']?.({ devRoot, broadcast: broadcastPluginEvent, modeId: 'qa' });
|
|
1153
|
+
}
|
|
1154
|
+
catch (err) {
|
|
1155
|
+
process.stderr.write(`[hover/qa] pentest compose (run:end) failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1156
|
+
}
|
|
1157
|
+
pentestActiveThisRun = false;
|
|
1158
|
+
}
|
|
1159
|
+
};
|
|
703
1160
|
try {
|
|
704
1161
|
// Build the MCP config first — it's pure local file IO and lets
|
|
705
1162
|
// us assert plugin-contributed servers landed in the config even
|
|
706
1163
|
// when CDP preflight subsequently fails (useful for smoke tests
|
|
707
1164
|
// that don't have a real debug Chrome wired up).
|
|
708
|
-
|
|
1165
|
+
// This run's screenshots go in its own folder
|
|
1166
|
+
// (.hover/conversations/<conversationId>/<runId>/screenshots) — runShotDir,
|
|
1167
|
+
// computed at run start so the ledger record + report + shots all share it.
|
|
1168
|
+
const sourceGate = msg.payload?.sourceAccess ?? 'ask';
|
|
1169
|
+
const mcpConfig = buildMcpConfig(runShotDir, sourceGate);
|
|
709
1170
|
// Preflight: refuse to invoke if CDP isn't reachable. Otherwise the
|
|
710
1171
|
// Playwright MCP server would silently launch its own Chromium —
|
|
711
1172
|
// and Hover's premise is to drive the user's existing Chrome (with
|
|
712
1173
|
// their dev state, cookies, devtools open), never spawn a fresh one.
|
|
713
|
-
|
|
714
|
-
// own port (e.g. 9333 for security), not the default cdpUrl.
|
|
715
|
-
const preflightExtras = effectiveLaunchExtras();
|
|
716
|
-
const preflightCdpUrl = preflightExtras?.cdpPort
|
|
717
|
-
? `http://localhost:${preflightExtras.cdpPort}`
|
|
718
|
-
: cdpUrl;
|
|
719
|
-
const cdp = await getPreflight(preflightCdpUrl);
|
|
1174
|
+
const cdp = await getPreflight(cdpUrl);
|
|
720
1175
|
if (!cdp.ok) {
|
|
721
1176
|
send(ws, {
|
|
722
1177
|
type: 'event',
|
|
@@ -726,14 +1181,85 @@ export async function startService(opts) {
|
|
|
726
1181
|
summary: cdp.reason,
|
|
727
1182
|
},
|
|
728
1183
|
});
|
|
1184
|
+
// A preflight failure is the most common "why did my run die" — make
|
|
1185
|
+
// it a diagnostic ledger row rather than silently returning.
|
|
1186
|
+
await recordSession('error', 0, { errorReason: cdp.reason });
|
|
729
1187
|
return;
|
|
730
1188
|
}
|
|
1189
|
+
// Target URL for the ledger: the localhost tab (the dev server) if we
|
|
1190
|
+
// have one, else the first tab.
|
|
1191
|
+
runTargetUrl =
|
|
1192
|
+
cdp.tabs?.find((t) => /localhost|127\.0\.0\.1/.test(t.url))?.url ?? cdp.tabs?.[0]?.url;
|
|
1193
|
+
// ── Pre-flight classify gate (QA only) ──────────────────────────────
|
|
1194
|
+
// Route the instruction with a cheap one-shot call BEFORE paying for the
|
|
1195
|
+
// full exploratory run. Fresh user instructions only — skip on resume and
|
|
1196
|
+
// on the internal pentest phase-2 re-dispatch (both already vetted).
|
|
1197
|
+
// Fail-open (→ go) lives inside classifyInstruction, so a hiccup never
|
|
1198
|
+
// blocks a legitimate run. 'refuse' / 'clarify' emit a 0-step session_end
|
|
1199
|
+
// and return WITHOUT creating a run folder or ledger record (ephemeral,
|
|
1200
|
+
// like the CDP check); the extension renders a plain reply / clickable
|
|
1201
|
+
// options from the same event a 0-action run produces.
|
|
1202
|
+
if (runMode === 'qa' && !resumeSessionId && !isPhase2 && typeof text === 'string') {
|
|
1203
|
+
// Show immediate activity for the ~1s classify (flips the UI to
|
|
1204
|
+
// "Working"); the real run emits its own session_start on 'go'.
|
|
1205
|
+
emitToRun({ type: 'event', payload: { kind: 'session_start', sessionId: '' } });
|
|
1206
|
+
const classifyAgentId = currentByok ? byokAgentFor(currentByok.protocol) : currentAgentId;
|
|
1207
|
+
let classifyMemory;
|
|
1208
|
+
try {
|
|
1209
|
+
classifyMemory = formatMemoryForPrompt(await loadMemory(devRoot)) || undefined;
|
|
1210
|
+
}
|
|
1211
|
+
catch { /* best-effort */ }
|
|
1212
|
+
const verdict = await classifyInstruction({
|
|
1213
|
+
agentId: classifyAgentId,
|
|
1214
|
+
instruction: text,
|
|
1215
|
+
pageUrl: runTargetUrl,
|
|
1216
|
+
pageTitle: cdp.tabs?.find((t) => t.url === runTargetUrl)?.title,
|
|
1217
|
+
memory: classifyMemory,
|
|
1218
|
+
// Cheap + fast for claude; BYOK / other agents use their configured model.
|
|
1219
|
+
model: classifyAgentId === 'claude' && !currentByok ? 'haiku' : currentByok?.model || model,
|
|
1220
|
+
effort: currentEffort,
|
|
1221
|
+
cwd: msg.payload?.isolateContext === true ? isolatedAgentCwd() : devRoot,
|
|
1222
|
+
env: currentByok
|
|
1223
|
+
? byokEnvFor(currentByok)
|
|
1224
|
+
: classifyAgentId === 'qwen' && currentLocalBaseUrl
|
|
1225
|
+
? { OPENAI_BASE_URL: currentLocalBaseUrl, OPENAI_API_KEY: process.env.OPENAI_API_KEY || 'local' }
|
|
1226
|
+
: undefined,
|
|
1227
|
+
signal: run.abort.signal,
|
|
1228
|
+
});
|
|
1229
|
+
if (run.cancelled)
|
|
1230
|
+
return; // user hit Stop during classify
|
|
1231
|
+
if (verdict.route === 'refuse') {
|
|
1232
|
+
pendingPhase2 = null; // don't let a queued pentest phase fire on a refused instruction
|
|
1233
|
+
emitToRun({
|
|
1234
|
+
type: 'event',
|
|
1235
|
+
payload: {
|
|
1236
|
+
kind: 'session_end',
|
|
1237
|
+
isError: false,
|
|
1238
|
+
summary: verdict.reason || 'I can only help test this app — tell me a page, feature, or flow to test.',
|
|
1239
|
+
},
|
|
1240
|
+
});
|
|
1241
|
+
return;
|
|
1242
|
+
}
|
|
1243
|
+
if (verdict.route === 'clarify' && verdict.options && verdict.options.length >= 2) {
|
|
1244
|
+
pendingPhase2 = null;
|
|
1245
|
+
const question = verdict.reason || 'What would you like me to test?';
|
|
1246
|
+
const block = ['```hover-ask', ...verdict.options.map((o) => `- ${o}`), '```'].join('\n');
|
|
1247
|
+
emitToRun({
|
|
1248
|
+
type: 'event',
|
|
1249
|
+
payload: { kind: 'session_end', isError: false, summary: `${question}\n\n${block}` },
|
|
1250
|
+
});
|
|
1251
|
+
return;
|
|
1252
|
+
}
|
|
1253
|
+
// 'go' — run it, substituting the re-interpreted instruction if any.
|
|
1254
|
+
if (verdict.refinedInstruction)
|
|
1255
|
+
text = verdict.refinedInstruction;
|
|
1256
|
+
}
|
|
731
1257
|
// Build a system-prompt addendum telling the agent about the user's
|
|
732
1258
|
// current tab. The most common waste we observed: agent calls
|
|
733
1259
|
// browser_navigate to the same URL the user is already on, triggering
|
|
734
|
-
// a wasteful full-page reload that
|
|
735
|
-
//
|
|
736
|
-
//
|
|
1260
|
+
// a wasteful full-page reload that discards the app state the run had
|
|
1261
|
+
// built up (login session, form input, position in a flow) — so the
|
|
1262
|
+
// agent has to redo work and sometimes loses track of where it was.
|
|
737
1263
|
// First turn pays the full rules + narration block; follow-up
|
|
738
1264
|
// turns (`resumeSessionId` set) get only the volatile tab list.
|
|
739
1265
|
// The static rules are already in the prior turn's context, and
|
|
@@ -765,12 +1291,37 @@ export async function startService(opts) {
|
|
|
765
1291
|
// is always-on (treated as if activeInModes was '*').
|
|
766
1292
|
const scope = add.activeInModes ?? (p.mode ? [p.mode.id] : ['*']);
|
|
767
1293
|
const inScope = scope.includes('*') ||
|
|
768
|
-
(currentModeId !== null && scope.includes(currentModeId))
|
|
1294
|
+
(currentModeId !== null && scope.includes(currentModeId)) ||
|
|
1295
|
+
apiScopeOk(scope) ||
|
|
1296
|
+
pentestScopeOk(scope);
|
|
769
1297
|
if (inScope) {
|
|
770
1298
|
appendSystemPrompt = `${appendSystemPrompt}\n\n${add.text}`;
|
|
771
1299
|
}
|
|
772
1300
|
}
|
|
773
1301
|
}
|
|
1302
|
+
// codeContext: tell the agent the fenced source reader exists, so it
|
|
1303
|
+
// proactively reads the real code (better selectors/routes when
|
|
1304
|
+
// authoring; white-box confirmation when probing) instead of only
|
|
1305
|
+
// guessing from the rendered DOM.
|
|
1306
|
+
if (opts.codeContext) {
|
|
1307
|
+
appendSystemPrompt = `${appendSystemPrompt}\n\nYou also have read-only access to this project's source via mcp__hoversource (read_source / list_source), fenced to the repo (secrets, keys, .env, .git, node_modules and build output are refused). Use it to read the actual component / route / API code — write tests against the real selectors and, when probing for security issues, confirm a finding against the server code (the query, the authz check) rather than guessing from the page alone.\n\nIMPORTANT — when you get stuck or confused, READ THE CODE before concluding anything: a control you can't operate (a click that does nothing, a field that won't take input), validation that blocks you with no visible reason, a conditional section that won't appear. Use list_source / read_source to open that component's source and look at the real markup, CSS (e.g. visually-hidden / sr-only inputs), event handlers, and state wiring. Base your diagnosis and your next action on what the code actually does — never assert a framework / state / onChange bug you have not seen in the source. Reading source may require the user's one-click approval; if a read is declined or unavailable, just continue from what you can observe on the page and report honestly — do not retry the read in a loop, and do not fall back to guessing an unseen cause.`;
|
|
1308
|
+
}
|
|
1309
|
+
// Test accounts the prompt referenced via @label (resolved by the editor
|
|
1310
|
+
// from its vault). Injected here, NOT in the user-visible transcript, so
|
|
1311
|
+
// the agent can log in; the literal values it types are redacted out of
|
|
1312
|
+
// the saved spec (writeSpec redactions). Never echoed to the user.
|
|
1313
|
+
const runAccounts = Array.isArray(msg.payload?.accounts) ? msg.payload.accounts : [];
|
|
1314
|
+
if (runAccounts.length) {
|
|
1315
|
+
// Ledger keeps LABELS ONLY — never the username/password.
|
|
1316
|
+
runAccountLabels = runAccounts.map((a) => a.label);
|
|
1317
|
+
const lines = runAccounts.map(a => {
|
|
1318
|
+
const role = a.role ? ` (${a.role})` : '';
|
|
1319
|
+
const user = a.username ? `username ${JSON.stringify(a.username)}` : 'username not on file';
|
|
1320
|
+
const pass = a.password ? `, password ${JSON.stringify(a.password)}` : '';
|
|
1321
|
+
return `- @${a.label}${role}: ${user}${pass}`;
|
|
1322
|
+
}).join('\n');
|
|
1323
|
+
appendSystemPrompt = `${appendSystemPrompt}\n\nTest accounts available for this run — when the task refers to an @label, log in using that account's credentials. Use them ONLY to fill authentication fields; never print or echo them in your replies or summaries.\n${lines}`;
|
|
1324
|
+
}
|
|
774
1325
|
// Mirror the prompt's language in the agent's *prose* output — the
|
|
775
1326
|
// verification summary (Result card), the ## Findings block, and the
|
|
776
1327
|
// step narration — the same way Voice mode mirrors it in TTS. A
|
|
@@ -782,87 +1333,273 @@ export async function startService(opts) {
|
|
|
782
1333
|
if (CJK_RE.test(text)) {
|
|
783
1334
|
appendSystemPrompt = `${appendSystemPrompt}\n\n${ZH_OUTPUT_DIRECTIVE}`;
|
|
784
1335
|
}
|
|
1336
|
+
// The report is about the app, never the tooling (all modes).
|
|
1337
|
+
appendSystemPrompt = `${appendSystemPrompt}\n\n${REPORTING_DIRECTIVE}`;
|
|
1338
|
+
// Keep interim narration to one short line per intent (all modes).
|
|
1339
|
+
appendSystemPrompt = `${appendSystemPrompt}\n\n${NARRATION_DIRECTIVE}`;
|
|
1340
|
+
// ASK_FORMAT (propose choices when the request is vague) + EXPLORATION_
|
|
1341
|
+
// CHECKPOINT (ask before stopping with scope left) are for the DIRECTED
|
|
1342
|
+
// modes. QA is autonomous: a vague request means "explore the whole app",
|
|
1343
|
+
// NOT "ask what to test", and QA_EXPLORATION owns its own stop condition —
|
|
1344
|
+
// so skip both for QA (they made QA ask-at-start instead of exploring).
|
|
1345
|
+
if (currentModeId !== 'qa') {
|
|
1346
|
+
appendSystemPrompt = `${appendSystemPrompt}\n\n${ASK_FORMAT_DIRECTIVE}`;
|
|
1347
|
+
appendSystemPrompt = `${appendSystemPrompt}\n\n${EXPLORATION_CHECKPOINT_DIRECTIVE}`;
|
|
1348
|
+
}
|
|
1349
|
+
// Grounded actuation — the agent uses mcp__hover-control__* instead of
|
|
1350
|
+
// the Playwright interaction tools, so saved selectors are role+name,
|
|
1351
|
+
// never a confabulated getByText. Driven by the mode's behavior (Flow +
|
|
1352
|
+
// QA: yes; plugin modes: no), NOT by `currentModeId === null` — a future
|
|
1353
|
+
// built-in mode (QA) has a non-null id but still wants grounded steps.
|
|
1354
|
+
const groundedActuation = resolveModeBehavior(currentModeId).groundedActuation;
|
|
1355
|
+
if (groundedActuation) {
|
|
1356
|
+
appendSystemPrompt = `${appendSystemPrompt}\n\n${GROUNDED_ACTUATION_DIRECTIVE}`;
|
|
1357
|
+
}
|
|
1358
|
+
// State-reset recon (debt-2 reproducible-state-isolation): ONLY when the
|
|
1359
|
+
// extension explicitly asks (it knows whether this env already has a
|
|
1360
|
+
// recipe). Off by default — recon clears client state, which would wipe a
|
|
1361
|
+
// logged-in session, so it must never run unsolicited or on a plain Flow
|
|
1362
|
+
// recording. (Engine plumbing is live; the extension opt-in is piece C.)
|
|
1363
|
+
if (groundedActuation && msg.payload?.reconReset === true) {
|
|
1364
|
+
appendSystemPrompt = `${appendSystemPrompt}\n\n${RECON_DIRECTIVE}`;
|
|
1365
|
+
}
|
|
1366
|
+
// QA mode: autonomous exploratory testing on top of grounded actuation,
|
|
1367
|
+
// bounded by the run's intensity budget (so the agent paces itself and
|
|
1368
|
+
// always writes a report rather than running away on cost).
|
|
1369
|
+
if (currentModeId === 'qa') {
|
|
1370
|
+
appendSystemPrompt = `${appendSystemPrompt}\n\n${QA_EXPLORATION_DIRECTIVE}`;
|
|
1371
|
+
appendSystemPrompt = `${appendSystemPrompt}\n\n${qaBudgetDirective(runIntensity)}`;
|
|
1372
|
+
// Two-pass: this is the functional verify pass with a pentest pass
|
|
1373
|
+
// queued behind it — keep it functional-only so it doesn't duplicate
|
|
1374
|
+
// the pentest pass's security work (the overlap that read like a
|
|
1375
|
+
// double security run).
|
|
1376
|
+
if (splitting)
|
|
1377
|
+
appendSystemPrompt = `${appendSystemPrompt}\n\n${QA_VERIFY_DEFER_SECURITY_DIRECTIVE}`;
|
|
1378
|
+
}
|
|
1379
|
+
// Business memory (QA + API modes only): inject what earlier runs learned
|
|
1380
|
+
// about THIS app so the agent doesn't re-ask answered business questions.
|
|
1381
|
+
// Best-effort — a memory read must never block a run.
|
|
1382
|
+
if (currentModeId === 'qa' || currentModeId === 'api-test') {
|
|
1383
|
+
try {
|
|
1384
|
+
const mem = formatMemoryForPrompt(await loadMemory(devRoot));
|
|
1385
|
+
if (mem)
|
|
1386
|
+
appendSystemPrompt = `${appendSystemPrompt}\n\n${mem}`;
|
|
1387
|
+
}
|
|
1388
|
+
catch { /* memory is best-effort */ }
|
|
1389
|
+
}
|
|
785
1390
|
// Snapshot the agent id so a switch-agent message during the run
|
|
786
1391
|
// can't smear two agents across one invocation. (We also gate
|
|
787
|
-
// switch-agent on
|
|
788
|
-
// allow/deny lists on the agent's sandboxStrength internally.
|
|
789
|
-
|
|
1392
|
+
// switch-agent on an active run, but defense in depth.) runSession gates
|
|
1393
|
+
// the allow/deny lists on the agent's sandboxStrength internally.
|
|
1394
|
+
// BYOK overrides the active CLI: the protocol picks which CLI is
|
|
1395
|
+
// driven; key/base/model are injected via env below. Otherwise the
|
|
1396
|
+
// user's selected local-CLI agent runs with its own auth.
|
|
1397
|
+
const invokedAgentId = currentByok ? byokAgentFor(currentByok.protocol) : currentAgentId;
|
|
1398
|
+
const effectiveModel = currentByok?.model || model;
|
|
790
1399
|
// Active mode's plugin-contributed MCP server ids — added to the
|
|
791
1400
|
// hard-sandbox allow list so Claude can actually call them. Claude
|
|
792
1401
|
// sanitises non-alphanumeric chars in the id when forming tool
|
|
793
|
-
// names (e.g. "@hover-dev/
|
|
1402
|
+
// names (e.g. "@hover-dev/api-test:flows" → "mcp__hover_dev_api_test_flows"),
|
|
794
1403
|
// and `--allowedTools mcp__foo` matches every tool under that
|
|
795
1404
|
// prefix. We pass the prefix `mcp__<sanitized>` so all of the
|
|
796
1405
|
// server's tools are reachable.
|
|
797
|
-
|
|
798
|
-
const activePluginMcpIds = [];
|
|
1406
|
+
// Control actuation is always reachable (every mode).
|
|
1407
|
+
const activePluginMcpIds = [mcpToolPrefix(CONTROL_MCP_ID)];
|
|
799
1408
|
if (currentModeId) {
|
|
800
1409
|
for (const p of plugins) {
|
|
801
1410
|
for (const srv of p.mcpServers ?? []) {
|
|
802
1411
|
const scope = srv.activeInModes ?? (p.mode ? [p.mode.id] : []);
|
|
803
|
-
if (scope.includes('*') || scope.includes(currentModeId)) {
|
|
804
|
-
activePluginMcpIds.push(
|
|
1412
|
+
if (scope.includes('*') || scope.includes(currentModeId) || apiScopeOk(scope) || pentestScopeOk(scope)) {
|
|
1413
|
+
activePluginMcpIds.push(mcpToolPrefix(srv.id));
|
|
805
1414
|
}
|
|
806
1415
|
}
|
|
807
1416
|
}
|
|
808
1417
|
}
|
|
1418
|
+
// codeContext: the fenced source reader is allowed in every mode.
|
|
1419
|
+
if (opts.codeContext)
|
|
1420
|
+
activePluginMcpIds.push(mcpToolPrefix(SOURCE_MCP_ID));
|
|
1421
|
+
// Mark a per-run boundary on the active mode's plugin (api-test scopes its
|
|
1422
|
+
// recorded checks to this run, not the whole session). Best-effort.
|
|
1423
|
+
const runStartPlugin = currentModeId ? pluginsByModeId.get(currentModeId) : null;
|
|
1424
|
+
if (runStartPlugin?.hooks?.['hover:run:start']) {
|
|
1425
|
+
try {
|
|
1426
|
+
await runStartPlugin.hooks['hover:run:start']({ devRoot, broadcast: broadcastPluginEvent });
|
|
1427
|
+
}
|
|
1428
|
+
catch (err) {
|
|
1429
|
+
process.stderr.write(`[hover] plugin "${runStartPlugin.name}" run:start failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1430
|
+
}
|
|
1431
|
+
}
|
|
1432
|
+
// QA + API capability: compose the api-test runtime into this QA run —
|
|
1433
|
+
// flip the resident MITM to intercept (activate) + mark its run boundary,
|
|
1434
|
+
// so the QA agent's API calls are captured/replayable. Mirror-undone at
|
|
1435
|
+
// run end. Best-effort: a hook failure must not break the functional run.
|
|
1436
|
+
if (apiActiveThisRun && apiTestPlugin) {
|
|
1437
|
+
try {
|
|
1438
|
+
await apiTestPlugin.hooks?.['hover:mode:activate']?.({
|
|
1439
|
+
devRoot,
|
|
1440
|
+
broadcast: broadcastPluginEvent,
|
|
1441
|
+
modeId: 'qa',
|
|
1442
|
+
setChromeProxy(proxy) { residentChromeProxy = proxy; },
|
|
1443
|
+
setMcpServerEnv(id, env) { mcpEnvOverrides.set(id, env); },
|
|
1444
|
+
});
|
|
1445
|
+
await apiTestPlugin.hooks?.['hover:run:start']?.({ devRoot, broadcast: broadcastPluginEvent });
|
|
1446
|
+
}
|
|
1447
|
+
catch (err) {
|
|
1448
|
+
process.stderr.write(`[hover/qa] api-test compose (run:start) failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1449
|
+
}
|
|
1450
|
+
}
|
|
1451
|
+
// QA + Pentest capability: compose the pentest runtime — flip the resident
|
|
1452
|
+
// MITM to intercept so the agent's offensive probes are recorded. The
|
|
1453
|
+
// PENTEST_SYSTEM_PROMPT (origin-locked, own-app) is added via the scope
|
|
1454
|
+
// checks above. Mirror-undone at run end. Best-effort.
|
|
1455
|
+
if (pentestActiveThisRun && pentestPlugin) {
|
|
1456
|
+
try {
|
|
1457
|
+
await pentestPlugin.hooks?.['hover:mode:activate']?.({
|
|
1458
|
+
devRoot,
|
|
1459
|
+
broadcast: broadcastPluginEvent,
|
|
1460
|
+
modeId: 'qa',
|
|
1461
|
+
setChromeProxy(proxy) { residentChromeProxy = proxy; },
|
|
1462
|
+
setMcpServerEnv(id, env) { mcpEnvOverrides.set(id, env); },
|
|
1463
|
+
});
|
|
1464
|
+
await pentestPlugin.hooks?.['hover:run:start']?.({ devRoot, broadcast: broadcastPluginEvent });
|
|
1465
|
+
}
|
|
1466
|
+
catch (err) {
|
|
1467
|
+
process.stderr.write(`[hover/qa] pentest compose (run:start) failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
1468
|
+
}
|
|
1469
|
+
}
|
|
1470
|
+
// Screenshot previews: this run's MCP output dir (same path buildMcpConfig
|
|
1471
|
+
// uses) + a flag tracking whether the last tool_use was a screenshot, so
|
|
1472
|
+
// we can surface the freshly-written png to the chat as a tool_result lands.
|
|
1473
|
+
// (runShotDir is the run folder's screenshots/, computed at run start.)
|
|
1474
|
+
let pendingShot = null;
|
|
1475
|
+
let lastShotPath = null;
|
|
809
1476
|
const runResult = await runSession({
|
|
810
1477
|
agentId: invokedAgentId,
|
|
811
1478
|
prompt: text,
|
|
812
1479
|
sessionId: resumeSessionId,
|
|
813
1480
|
mcpConfig,
|
|
814
|
-
// cwd = devRoot so the agent
|
|
815
|
-
//
|
|
816
|
-
cwd
|
|
1481
|
+
// Memory setting: "shared" (default) → cwd = devRoot, so the agent
|
|
1482
|
+
// gets the project's CLAUDE.md + Claude Code auto-memory. "isolated"
|
|
1483
|
+
// → a throwaway temp cwd, so NONE of the user's CLAUDE.md / memory
|
|
1484
|
+
// leaks into the test agent.
|
|
1485
|
+
cwd: msg.payload?.isolateContext === true ? isolatedAgentCwd() : devRoot,
|
|
817
1486
|
appendSystemPrompt,
|
|
818
1487
|
// mcp__playwright covers every browser tool; active-mode plugin MCP
|
|
819
1488
|
// servers are appended. (Save-as-Skill retired → no Skill tool.)
|
|
820
1489
|
allowedToolsExtra: activePluginMcpIds,
|
|
1490
|
+
// Normal mode: deny the Playwright interaction tools so the agent
|
|
1491
|
+
// must use the grounded mcp__hover-control__* actuation tools.
|
|
1492
|
+
disallowedToolsExtra: groundedActuation ? GROUNDED_ACTUATION_DENY : undefined,
|
|
821
1493
|
maxBudgetUsd,
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
1494
|
+
// QA runs are bounded by the chosen intensity's STEP ceiling
|
|
1495
|
+
// (--max-turns); the prompt paces against the same number.
|
|
1496
|
+
maxTurns: runMode === 'qa' ? QA_INTENSITY[runIntensity].maxSteps : undefined,
|
|
1497
|
+
model: effectiveModel,
|
|
1498
|
+
effort: currentEffort,
|
|
1499
|
+
// BYOK: inject the protocol's auth env (key + base URL) into the
|
|
1500
|
+
// matching CLI. Otherwise, Local LLM (qwen host): point qwen at the
|
|
1501
|
+
// user's OpenAI-compatible endpoint via env (the endpoint's key, if
|
|
1502
|
+
// any, falls back to the ambient OPENAI_API_KEY / a placeholder).
|
|
1503
|
+
env: currentByok
|
|
1504
|
+
? byokEnvFor(currentByok)
|
|
1505
|
+
: invokedAgentId === 'qwen' && currentLocalBaseUrl
|
|
1506
|
+
? { OPENAI_BASE_URL: currentLocalBaseUrl, OPENAI_API_KEY: process.env.OPENAI_API_KEY || 'local' }
|
|
1507
|
+
: undefined,
|
|
1508
|
+
signal: run.abort.signal,
|
|
825
1509
|
}, (ev) => {
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
`Original spec left unchanged.`,
|
|
841
|
-
},
|
|
842
|
-
});
|
|
1510
|
+
// Cost/turns/tokens for the session ledger ride the session_end
|
|
1511
|
+
// event — snoop them off the stream. Also track the running `usage`
|
|
1512
|
+
// totals so an aborted/errored run still records partial spend.
|
|
1513
|
+
if (ev.kind === 'session_end') {
|
|
1514
|
+
sessionEnd = { turns: ev.turns, costUsd: ev.costUsd, tokens: ev.tokens };
|
|
1515
|
+
// Structured-first: parse the agent's JSON findings block, hand the
|
|
1516
|
+
// editor the clean summary + structured findings (so the Findings
|
|
1517
|
+
// card renders from data, not a Markdown scrape). All modes.
|
|
1518
|
+
if (typeof ev.summary === 'string' && ev.summary) {
|
|
1519
|
+
const parsed = parseFindings(ev.summary);
|
|
1520
|
+
runParsed = parsed; // keep for the ledger record + QA report
|
|
1521
|
+
ev.summary = parsed.summary;
|
|
1522
|
+
ev.findings = parsed.findings;
|
|
1523
|
+
}
|
|
843
1524
|
}
|
|
844
|
-
else {
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
1525
|
+
else if (ev.kind === 'usage') {
|
|
1526
|
+
sessionEnd = {
|
|
1527
|
+
turns: ev.turns ?? sessionEnd.turns,
|
|
1528
|
+
costUsd: ev.costUsd ?? sessionEnd.costUsd,
|
|
1529
|
+
tokens: ev.tokens ?? sessionEnd.tokens,
|
|
1530
|
+
};
|
|
1531
|
+
}
|
|
1532
|
+
// Screenshot preview: a take_screenshot tool_use writes a png by the
|
|
1533
|
+
// time its tool_result lands — resolve the freshest png in the run's
|
|
1534
|
+
// output dir and surface it to the chat. Best-effort, never throws.
|
|
1535
|
+
if (ev.kind === 'tool_use') {
|
|
1536
|
+
const bare = String(ev.tool ?? '').replace(/^mcp__.*?__/, '');
|
|
1537
|
+
// browser_take_screenshot (Playwright, plugin modes) OR take_screenshot
|
|
1538
|
+
// (hover-control, grounded modes — viewport only, never resizes the
|
|
1539
|
+
// page). Both write a PNG into the run's shot dir; we surface the
|
|
1540
|
+
// freshest one in the chat.
|
|
1541
|
+
if (bare === 'browser_take_screenshot' || bare === 'take_screenshot') {
|
|
1542
|
+
// browser_take_screenshot may be full-page; take_screenshot is
|
|
1543
|
+
// always viewport. Carry `full` so the chat can collapse a
|
|
1544
|
+
// full+viewport burst and keep the full-page one.
|
|
1545
|
+
pendingShot = { full: Boolean(ev.input?.fullPage) };
|
|
1546
|
+
}
|
|
1547
|
+
}
|
|
1548
|
+
else if (ev.kind === 'tool_result' && pendingShot) {
|
|
1549
|
+
const full = pendingShot.full;
|
|
1550
|
+
pendingShot = null;
|
|
1551
|
+
const shot = newestPng(runShotDir);
|
|
1552
|
+
// Dedupe exact repeats by path (a duplicated tool_use/result resolves
|
|
1553
|
+
// to the same freshest png); distinct full/viewport shots have
|
|
1554
|
+
// distinct paths and are coalesced downstream by the chat instead.
|
|
1555
|
+
if (shot && shot !== lastShotPath && !run.cancelled) {
|
|
1556
|
+
lastShotPath = shot;
|
|
1557
|
+
emitToRun({ type: 'screenshot', payload: { path: shot, full } });
|
|
857
1558
|
}
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
1559
|
+
}
|
|
1560
|
+
// Stream to whichever ws is attached NOW — survives the widget
|
|
1561
|
+
// reconnecting mid-run (emitToRun is a no-op during a reconnect gap).
|
|
1562
|
+
if (run.cancelled)
|
|
1563
|
+
return;
|
|
1564
|
+
emitToRun({ type: 'event', payload: ev });
|
|
1565
|
+
});
|
|
1566
|
+
// Append to the `.hover/sessions/` ledger (best-effort, never throws).
|
|
1567
|
+
// `saved`/`specSlug` are patched in later by markSessionSaved when the
|
|
1568
|
+
// user crystallizes — save-spec arrives as a separate WS message.
|
|
1569
|
+
await recordSession(run.cancelled ? 'aborted' : runResult.isError ? 'error' : 'completed', runResult.steps.filter((s) => s.kind === 'step').length, {
|
|
1570
|
+
summary: runResult.summary,
|
|
1571
|
+
errorReason: runResult.isError ? runResult.summary : undefined,
|
|
1572
|
+
steps: runResult.steps,
|
|
1573
|
+
});
|
|
1574
|
+
// QA Stage 4: resolve the agent's recorded candidate flows to their real
|
|
1575
|
+
// recorded steps and offer them as one-click "Crystallize" cards. Steps
|
|
1576
|
+
// are the actual hover-control actuations (record==replay), so each
|
|
1577
|
+
// candidate crystallizes to a clean, runnable spec. Candidates are
|
|
1578
|
+
// functional regression artifacts — the pentest phase produces a findings
|
|
1579
|
+
// report, not specs, so it never offers them (and avoids duplicating the
|
|
1580
|
+
// verify phase's candidates).
|
|
1581
|
+
if (runMode === 'qa' && !pentestActiveThisRun && !run.cancelled) {
|
|
1582
|
+
// Fallback: the agent may finish a clean flow but never call
|
|
1583
|
+
// record_candidate (compliance is unreliable, esp. on short directed
|
|
1584
|
+
// tasks). If it recorded none, offer the whole completed run's grounded
|
|
1585
|
+
// actuations as ONE candidate — crystallization shouldn't depend on the
|
|
1586
|
+
// agent remembering to mark it. Deterministic; the user renames at the
|
|
1587
|
+
// Crystallize prompt. Skipped on error runs and when nothing was acted.
|
|
1588
|
+
if (runCandidates.length === 0 && !runResult.isError) {
|
|
1589
|
+
const grounded = runResult.steps.filter(isCrystallizableStep);
|
|
1590
|
+
if (grounded.some(isRealAction)) {
|
|
1591
|
+
runCandidates.push({ name: 'Recorded flow', steps: grounded });
|
|
864
1592
|
}
|
|
865
1593
|
}
|
|
1594
|
+
const resolved = finalizeCandidates(runCandidates);
|
|
1595
|
+
if (resolved.length)
|
|
1596
|
+
emitToRun({ type: 'qa-candidates', payload: { candidates: resolved } });
|
|
1597
|
+
}
|
|
1598
|
+
// Forward a recon-discovered reset recipe to the extension (it owns
|
|
1599
|
+
// .hover/environments.json), keyed to this run's env. The extension
|
|
1600
|
+
// persists it onto the env record (piece C); harmless if unhandled.
|
|
1601
|
+
if (runResetRecipe && runEnv && !run.cancelled) {
|
|
1602
|
+
emitToRun({ type: 'reset-recipe', payload: { envId: runEnv.id, recipe: runResetRecipe } });
|
|
866
1603
|
}
|
|
867
1604
|
}
|
|
868
1605
|
catch (err) {
|
|
@@ -872,32 +1609,46 @@ export async function startService(opts) {
|
|
|
872
1609
|
// widget to reconcile two terminal events for one run. CDP isn't
|
|
873
1610
|
// suspect either — the user just stopped — so skip preflight
|
|
874
1611
|
// invalidation too.
|
|
875
|
-
if (!cancelled) {
|
|
1612
|
+
if (!run.cancelled) {
|
|
876
1613
|
const message = err instanceof Error ? err.message : String(err);
|
|
877
1614
|
const errorEvent = {
|
|
878
1615
|
kind: 'session_end',
|
|
879
1616
|
isError: true,
|
|
880
1617
|
summary: message,
|
|
881
1618
|
};
|
|
882
|
-
|
|
1619
|
+
emitToRun({ type: 'event', payload: errorEvent });
|
|
1620
|
+
await recordSession('error', 0, { errorReason: message });
|
|
883
1621
|
// Force the next command to re-probe CDP. The error could be from
|
|
884
1622
|
// Chrome dying, MCP spawning a stray Chromium, the user closing
|
|
885
1623
|
// their debug window — anything that would make a cached "all
|
|
886
|
-
// healthy" result lie.
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
: cdpUrl;
|
|
893
|
-
invalidatePreflight(invalCdpUrl);
|
|
1624
|
+
// healthy" result lie.
|
|
1625
|
+
invalidatePreflight(cdpUrl);
|
|
1626
|
+
}
|
|
1627
|
+
else {
|
|
1628
|
+
// User-initiated cancel — still worth a ledger row (spend view).
|
|
1629
|
+
await recordSession('aborted', 0, { errorReason: 'Cancelled by the user.' });
|
|
894
1630
|
}
|
|
895
1631
|
}
|
|
896
1632
|
finally {
|
|
897
|
-
|
|
898
|
-
|
|
1633
|
+
if (run.graceTimer)
|
|
1634
|
+
clearTimeout(run.graceTimer);
|
|
1635
|
+
activeRun = null;
|
|
899
1636
|
}
|
|
900
|
-
|
|
1637
|
+
// QA two-pass: a verify run with a pentest phase queued behind it. Now that
|
|
1638
|
+
// this (verify) run has finished and activeRun is clear, kick off the
|
|
1639
|
+
// pentest phase as a fresh re-entry — UNLESS the user cancelled. Each phase
|
|
1640
|
+
// is its own agent session (fresh context), so this is the token-cheap way
|
|
1641
|
+
// to sequence them; the pentest phase carries __phase2 so it can't re-split.
|
|
1642
|
+
if (pendingPhase2 && !run.cancelled) {
|
|
1643
|
+
const next = pendingPhase2;
|
|
1644
|
+
pendingPhase2 = null;
|
|
1645
|
+
void onClientMessage(Buffer.from(JSON.stringify(next)));
|
|
1646
|
+
}
|
|
1647
|
+
else {
|
|
1648
|
+
pendingPhase2 = null;
|
|
1649
|
+
}
|
|
1650
|
+
};
|
|
1651
|
+
ws.on('message', onClientMessage);
|
|
901
1652
|
});
|
|
902
1653
|
// ───────────────────────── service:start + single Chrome ─────────────────
|
|
903
1654
|
// Fire plugin `hover:service:start` hooks BEFORE launching Chrome, so a
|
|
@@ -941,6 +1692,7 @@ export async function startService(opts) {
|
|
|
941
1692
|
url: launchUrl,
|
|
942
1693
|
port: launchPort,
|
|
943
1694
|
proxy: residentChromeProxy ?? undefined,
|
|
1695
|
+
userDataDir,
|
|
944
1696
|
})
|
|
945
1697
|
.then((r) => {
|
|
946
1698
|
if (!r.ok) {
|
|
@@ -954,6 +1706,20 @@ export async function startService(opts) {
|
|
|
954
1706
|
return {
|
|
955
1707
|
port,
|
|
956
1708
|
async close() {
|
|
1709
|
+
// Kill any in-flight run FIRST. The run is held at service scope and is
|
|
1710
|
+
// only torn down by aborting its signal (invoke.ts SIGTERMs the agent
|
|
1711
|
+
// child on abort). wss.close() below stops the listener but does NOT
|
|
1712
|
+
// terminate established client sockets, so no ws.on('close') fires — so
|
|
1713
|
+
// without this the agent child would keep driving the debug Chrome as an
|
|
1714
|
+
// orphan after the dev server is gone, and a pending grace timer would
|
|
1715
|
+
// fire abort() 15s into the void.
|
|
1716
|
+
if (activeRun) {
|
|
1717
|
+
if (activeRun.graceTimer)
|
|
1718
|
+
clearTimeout(activeRun.graceTimer);
|
|
1719
|
+
activeRun.cancelled = true;
|
|
1720
|
+
activeRun.abort.abort();
|
|
1721
|
+
activeRun = null;
|
|
1722
|
+
}
|
|
957
1723
|
// Deactivate the active mode first, then run every plugin's
|
|
958
1724
|
// shutdown hook (regardless of which mode is active — a plugin may
|
|
959
1725
|
// own background state even outside its mode). Best-effort: log
|
|
@@ -981,6 +1747,26 @@ export async function startService(opts) {
|
|
|
981
1747
|
await new Promise((res, rej) => {
|
|
982
1748
|
wss.close(err => (err ? rej(err) : res()));
|
|
983
1749
|
});
|
|
1750
|
+
// Multi-host model: a per-session host owns its own Chrome (distinct
|
|
1751
|
+
// userDataDir + CDP port). Tear that Chrome down with the host so the
|
|
1752
|
+
// slot's CDP port frees up and a session reusing the slot gets a fresh
|
|
1753
|
+
// browser — not the previous session's logged-in profile. The legacy
|
|
1754
|
+
// single-Chrome model (no userDataDir) deliberately leaves its Chrome
|
|
1755
|
+
// running, reused across runs / dev-server restarts.
|
|
1756
|
+
if (userDataDir) {
|
|
1757
|
+
const launchPort = (() => {
|
|
1758
|
+
try {
|
|
1759
|
+
return Number(new URL(cdpUrl).port) || 9222;
|
|
1760
|
+
}
|
|
1761
|
+
catch {
|
|
1762
|
+
return 9222;
|
|
1763
|
+
}
|
|
1764
|
+
})();
|
|
1765
|
+
try {
|
|
1766
|
+
await closeDebugChrome(launchPort);
|
|
1767
|
+
}
|
|
1768
|
+
catch { /* best-effort */ }
|
|
1769
|
+
}
|
|
984
1770
|
},
|
|
985
1771
|
};
|
|
986
1772
|
}
|