@hover-dev/core 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/dist/agents/aider.d.ts +16 -0
- package/dist/agents/aider.d.ts.map +1 -0
- package/dist/agents/aider.js +169 -0
- package/dist/agents/cursor.d.ts +18 -0
- package/dist/agents/cursor.d.ts.map +1 -0
- package/dist/agents/cursor.js +229 -0
- package/dist/agents/gemini.d.ts +17 -0
- package/dist/agents/gemini.d.ts.map +1 -0
- package/dist/agents/gemini.js +197 -0
- package/dist/agents/qwen.d.ts +17 -0
- package/dist/agents/qwen.d.ts.map +1 -0
- package/dist/agents/qwen.js +183 -0
- package/dist/agents/registry.d.ts +7 -5
- package/dist/agents/registry.d.ts.map +1 -1
- package/dist/agents/registry.js +15 -5
- package/dist/plugin-api.d.ts +10 -0
- package/dist/plugin-api.d.ts.map +1 -1
- package/dist/scripts/bench-multi-tab.d.ts +2 -0
- package/dist/scripts/bench-multi-tab.d.ts.map +1 -0
- package/dist/scripts/bench-multi-tab.js +192 -0
- package/dist/service/cdpHint.d.ts.map +1 -1
- package/dist/service/cdpHint.js +39 -0
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -17,7 +17,9 @@ The local Node service. Owns:
|
|
|
17
17
|
| `detect.ts` | `detectAgents()`, `resolveBinForAgent()`, `resolveOnPath()` — PATH scanning |
|
|
18
18
|
| `argv.ts` | `buildArgv()` — protocol-aware argv construction, throws `UnsupportedAgentProtocolError` for `acp` / `pi-rpc` |
|
|
19
19
|
| `invoke.ts` | `invokeAgent()` — async-iterable spawning + stdout streaming |
|
|
20
|
-
| `claude.ts` | Claude Code descriptor: `claude -p`, stream-json parser, sandbox flags |
|
|
20
|
+
| `claude.ts` | Claude Code descriptor: `claude -p`, stream-json parser, hard sandbox flags |
|
|
21
|
+
| `codex.ts` | OpenAI Codex CLI descriptor: `codex exec --json`, JSONL parser, soft sandbox (`--sandbox read-only`) |
|
|
22
|
+
| `cursor.ts` | Cursor CLI descriptor (v0.9): stream-JSON / NDJSON parser, soft sandbox |
|
|
21
23
|
|
|
22
24
|
To add an agent: implement an `AgentDescriptor`, register it in `registry.ts`. Done.
|
|
23
25
|
|
|
@@ -43,7 +45,7 @@ pnpm smoke http://localhost:5173/ "log in then add a todo named 'verify hover'"
|
|
|
43
45
|
Environment variables:
|
|
44
46
|
|
|
45
47
|
- `HOVER_CDP` — CDP URL (default `http://localhost:9222`)
|
|
46
|
-
- `HOVER_AGENT` — agent id (omit to auto-detect; tries the user's stated preference, then the first installed agent in registry order — `claude` → `codex` today)
|
|
48
|
+
- `HOVER_AGENT` — agent id (omit to auto-detect; tries the user's stated preference, then the first installed agent in registry order — `claude` → `codex` → `cursor-agent` today)
|
|
47
49
|
- `HOVER_MODEL` — model for the agent (default `sonnet`, much cheaper than opus)
|
|
48
50
|
|
|
49
51
|
## Sandboxing (what the smoke test enforces)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { AgentDescriptor, ParserState } from './types.js';
|
|
2
|
+
export declare const aiderAgent: AgentDescriptor;
|
|
3
|
+
/**
|
|
4
|
+
* Test-only escape hatches, same pattern as cursor.ts / codex.ts.
|
|
5
|
+
*/
|
|
6
|
+
export declare const __testing: {
|
|
7
|
+
freshState: () => ParserState;
|
|
8
|
+
resetCounters: (state: ParserState) => void;
|
|
9
|
+
getState: (state: ParserState) => {
|
|
10
|
+
runningLines: number;
|
|
11
|
+
runningSessionId: string | undefined;
|
|
12
|
+
collectedText: string[];
|
|
13
|
+
sawErrorEvent: boolean;
|
|
14
|
+
};
|
|
15
|
+
};
|
|
16
|
+
//# sourceMappingURL=aider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"aider.d.ts","sourceRoot":"","sources":["../../src/agents/aider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAA8B,WAAW,EAAE,MAAM,YAAY,CAAC;AAsH3F,eAAO,MAAM,UAAU,EAAE,eA0GxB,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,SAAS;sBACJ,WAAW;2BACJ,WAAW;sBAChB,WAAW;;;;;;CAS9B,CAAC"}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
function aiderState(state) {
|
|
2
|
+
if (typeof state.runningLines !== 'number') {
|
|
3
|
+
state.runningLines = 0;
|
|
4
|
+
state.collectedText = [];
|
|
5
|
+
state.sawErrorEvent = false;
|
|
6
|
+
state.runningSessionId = undefined;
|
|
7
|
+
}
|
|
8
|
+
return state;
|
|
9
|
+
}
|
|
10
|
+
function resetAiderCounters(s) {
|
|
11
|
+
s.runningLines = 0;
|
|
12
|
+
s.collectedText = [];
|
|
13
|
+
s.sawErrorEvent = false;
|
|
14
|
+
s.runningSessionId = undefined;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Aider has no system-prompt flag, so we prepend this preface to the user
|
|
18
|
+
* prompt (same approach as cursor.ts). The agent treats it as the leading
|
|
19
|
+
* user-message text.
|
|
20
|
+
*/
|
|
21
|
+
const AIDER_PROMPT_PREFACE = [
|
|
22
|
+
'You are operating in Hover, a browser-testing tool.',
|
|
23
|
+
'Use ONLY the MCP playwright tools (prefixed `mcp__playwright__` / `mcp__hover-playwright__`) to drive the browser.',
|
|
24
|
+
'Do NOT use shell, file-edit, web-search, or any other built-in tool.',
|
|
25
|
+
'Do NOT navigate to a URL the user is already on; check the page state via `browser_snapshot` first.',
|
|
26
|
+
'When the task is complete, emit a short summary and stop.',
|
|
27
|
+
].join(' ');
|
|
28
|
+
/**
|
|
29
|
+
* Lines we treat as noise and drop instead of surfacing as text events.
|
|
30
|
+
* Aider chatters with status lines that would clutter the widget panel.
|
|
31
|
+
* Conservative list — anything we don't explicitly skip falls through.
|
|
32
|
+
*/
|
|
33
|
+
function isNoiseLine(line) {
|
|
34
|
+
const t = line.trim();
|
|
35
|
+
if (!t)
|
|
36
|
+
return true;
|
|
37
|
+
// Common aider boilerplate / banner lines.
|
|
38
|
+
if (/^Aider v\d/i.test(t))
|
|
39
|
+
return true;
|
|
40
|
+
if (/^Main model:/i.test(t))
|
|
41
|
+
return true;
|
|
42
|
+
if (/^Weak model:/i.test(t))
|
|
43
|
+
return true;
|
|
44
|
+
if (/^Git repo:/i.test(t))
|
|
45
|
+
return true;
|
|
46
|
+
if (/^Repo-map:/i.test(t))
|
|
47
|
+
return true;
|
|
48
|
+
if (/^VSCode terminal detected/i.test(t))
|
|
49
|
+
return true;
|
|
50
|
+
if (/^Use \/help/i.test(t))
|
|
51
|
+
return true;
|
|
52
|
+
if (/^Tokens:.*sent.*received/i.test(t))
|
|
53
|
+
return true;
|
|
54
|
+
if (/^─{3,}$/.test(t))
|
|
55
|
+
return true; // horizontal rule
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
function detectErrorLine(line) {
|
|
59
|
+
// Aider prints errors / API failures with a leading marker.
|
|
60
|
+
return /^(error|fatal|api error|litellm.*error)/i.test(line.trim());
|
|
61
|
+
}
|
|
62
|
+
export const aiderAgent = {
|
|
63
|
+
id: 'aider',
|
|
64
|
+
binName: 'aider',
|
|
65
|
+
protocol: 'argv',
|
|
66
|
+
streamFormat: 'plain-text',
|
|
67
|
+
sandboxStrength: 'soft',
|
|
68
|
+
display: {
|
|
69
|
+
label: 'Aider',
|
|
70
|
+
tagline: 'Aider — soft sandbox, plain-text stream, no MCP support',
|
|
71
|
+
homepage: 'https://aider.chat',
|
|
72
|
+
installHint: 'pipx install aider-chat',
|
|
73
|
+
},
|
|
74
|
+
buildArgs(opts) {
|
|
75
|
+
// Prepend HOVER-mode preface plus any caller-supplied appendSystemPrompt
|
|
76
|
+
// to the prompt. Aider has no --append-system-prompt flag, so this is
|
|
77
|
+
// the closest functional analogue (same trick as cursor.ts).
|
|
78
|
+
const preface = opts.appendSystemPrompt && opts.appendSystemPrompt.trim().length > 0
|
|
79
|
+
? `${AIDER_PROMPT_PREFACE} ${opts.appendSystemPrompt}`
|
|
80
|
+
: AIDER_PROMPT_PREFACE;
|
|
81
|
+
const finalPrompt = `${preface}\n\n${opts.prompt}`;
|
|
82
|
+
const args = ['--message', finalPrompt];
|
|
83
|
+
// Auto-confirm every prompt so the run doesn't hang.
|
|
84
|
+
args.push('--yes-always');
|
|
85
|
+
// Make stdout line-buffered instead of character-streamed; friendlier
|
|
86
|
+
// for our per-line parseEvent loop.
|
|
87
|
+
args.push('--no-stream');
|
|
88
|
+
// Defang git side-effects. Aider's default behaviour is to auto-commit
|
|
89
|
+
// every edit it makes; for a browser-driving agent that should never
|
|
90
|
+
// edit files this is still a hazard if cwd is a stale repo.
|
|
91
|
+
args.push('--no-auto-commits');
|
|
92
|
+
args.push('--no-git');
|
|
93
|
+
if (opts.model) {
|
|
94
|
+
args.push('--model', opts.model);
|
|
95
|
+
}
|
|
96
|
+
// Aider's `--restore-chat-history` is a boolean (no session-id form);
|
|
97
|
+
// we deliberately do NOT pass it. `opts.sessionId` is ignored because
|
|
98
|
+
// there is no way to select a specific past session by ID.
|
|
99
|
+
// No equivalents for --max-budget-usd / --allowedTools / --mcp-config /
|
|
100
|
+
// --append-system-prompt — all four are absent from aider.
|
|
101
|
+
return args;
|
|
102
|
+
},
|
|
103
|
+
parseEvent(line, state = {}) {
|
|
104
|
+
const s = aiderState(state);
|
|
105
|
+
const out = [];
|
|
106
|
+
// Emit a synthetic session_start on the very first non-empty line so
|
|
107
|
+
// the widget gets the same shape it expects from JSON-based agents.
|
|
108
|
+
if (!s.runningSessionId) {
|
|
109
|
+
// Cheap unique id; aider has no real session_id we can echo. The
|
|
110
|
+
// Math.random() suffix is load-bearing — two states created in the
|
|
111
|
+
// same millisecond would otherwise collide and break per-invocation
|
|
112
|
+
// session tracking.
|
|
113
|
+
const rand = Math.random().toString(36).slice(2, 8);
|
|
114
|
+
s.runningSessionId = `aider-${Date.now().toString(36)}-${rand}`;
|
|
115
|
+
out.push({ kind: 'session_start', sessionId: s.runningSessionId });
|
|
116
|
+
}
|
|
117
|
+
if (isNoiseLine(line))
|
|
118
|
+
return out;
|
|
119
|
+
if (detectErrorLine(line)) {
|
|
120
|
+
s.sawErrorEvent = true;
|
|
121
|
+
out.push({ kind: 'text', text: line.trim() });
|
|
122
|
+
return out;
|
|
123
|
+
}
|
|
124
|
+
// Treat everything else as assistant text. Aider has no per-tool events
|
|
125
|
+
// so we cannot emit tool_use / tool_result — see file-header doc comment.
|
|
126
|
+
s.runningLines += 1;
|
|
127
|
+
s.collectedText.push(line.trim());
|
|
128
|
+
out.push({ kind: 'text', text: line.trim() });
|
|
129
|
+
return out;
|
|
130
|
+
},
|
|
131
|
+
/**
|
|
132
|
+
* Aider doesn't emit a terminal event — the child process simply exits
|
|
133
|
+
* after the final printed line. Synthesize session_end from accumulated
|
|
134
|
+
* state, same pattern as codex.ts.
|
|
135
|
+
*/
|
|
136
|
+
onStreamEnd(exitCode, state = {}) {
|
|
137
|
+
const s = aiderState(state);
|
|
138
|
+
// The "summary" is the last non-empty text line, which is typically
|
|
139
|
+
// aider's final answer. If we collected nothing, leave it undefined
|
|
140
|
+
// rather than fabricating.
|
|
141
|
+
const lastText = s.collectedText.length > 0
|
|
142
|
+
? s.collectedText[s.collectedText.length - 1]
|
|
143
|
+
: undefined;
|
|
144
|
+
return {
|
|
145
|
+
kind: 'session_end',
|
|
146
|
+
turns: s.runningLines,
|
|
147
|
+
// costUsd intentionally undefined — aider's "Tokens:" status line is
|
|
148
|
+
// ad-hoc text, not a stable API. We don't fabricate a number.
|
|
149
|
+
isError: s.sawErrorEvent || (exitCode != null && exitCode !== 0),
|
|
150
|
+
summary: lastText,
|
|
151
|
+
};
|
|
152
|
+
},
|
|
153
|
+
};
|
|
154
|
+
/**
|
|
155
|
+
* Test-only escape hatches, same pattern as cursor.ts / codex.ts.
|
|
156
|
+
*/
|
|
157
|
+
export const __testing = {
|
|
158
|
+
freshState: () => ({}),
|
|
159
|
+
resetCounters: (state) => resetAiderCounters(aiderState(state)),
|
|
160
|
+
getState: (state) => {
|
|
161
|
+
const s = aiderState(state);
|
|
162
|
+
return {
|
|
163
|
+
runningLines: s.runningLines,
|
|
164
|
+
runningSessionId: s.runningSessionId,
|
|
165
|
+
collectedText: [...s.collectedText],
|
|
166
|
+
sawErrorEvent: s.sawErrorEvent,
|
|
167
|
+
};
|
|
168
|
+
},
|
|
169
|
+
};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { AgentDescriptor, ParserState } from './types.js';
|
|
2
|
+
export declare const cursorAgent: AgentDescriptor;
|
|
3
|
+
/**
|
|
4
|
+
* Test-only escape hatches. Same pattern as codex.ts so the tests can drive
|
|
5
|
+
* the parser without going through invokeAgent.
|
|
6
|
+
*/
|
|
7
|
+
export declare const __testing: {
|
|
8
|
+
freshState: () => ParserState;
|
|
9
|
+
resetCounters: (state: ParserState) => void;
|
|
10
|
+
getState: (state: ParserState) => {
|
|
11
|
+
runningTurns: number;
|
|
12
|
+
runningSessionId: string | undefined;
|
|
13
|
+
runningModel: string | undefined;
|
|
14
|
+
lastAssistantText: string | undefined;
|
|
15
|
+
sawErrorEvent: boolean;
|
|
16
|
+
};
|
|
17
|
+
};
|
|
18
|
+
//# sourceMappingURL=cursor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cursor.d.ts","sourceRoot":"","sources":["../../src/agents/cursor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAA8B,WAAW,EAAE,MAAM,YAAY,CAAC;AAoL3F,eAAO,MAAM,WAAW,EAAE,eAuJzB,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,SAAS;sBACJ,WAAW;2BACJ,WAAW;sBAChB,WAAW;;;;;;;CAU9B,CAAC"}
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
function cursorState(state) {
|
|
2
|
+
if (typeof state.runningTurns !== 'number') {
|
|
3
|
+
state.runningTurns = 0;
|
|
4
|
+
state.runningSessionId = undefined;
|
|
5
|
+
state.runningModel = undefined;
|
|
6
|
+
state.lastAssistantText = undefined;
|
|
7
|
+
state.sawErrorEvent = false;
|
|
8
|
+
state.toolNameByCallId = new Map();
|
|
9
|
+
}
|
|
10
|
+
return state;
|
|
11
|
+
}
|
|
12
|
+
function resetCursorCounters(s) {
|
|
13
|
+
s.runningTurns = 0;
|
|
14
|
+
s.runningSessionId = undefined;
|
|
15
|
+
s.runningModel = undefined;
|
|
16
|
+
s.lastAssistantText = undefined;
|
|
17
|
+
s.sawErrorEvent = false;
|
|
18
|
+
s.toolNameByCallId.clear();
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Best-effort extraction of a tool's name from the `tool_call` envelope.
|
|
22
|
+
* Cursor's stream-json wraps each kind in a sub-object keyed by name
|
|
23
|
+
* (`shellToolCall`, `mcpToolCall`, etc.) but doesn't publish a stable
|
|
24
|
+
* `name` field at the top level. We:
|
|
25
|
+
* 1. Look for the first key that ends in `ToolCall` → strip the suffix.
|
|
26
|
+
* `shellToolCall` → `shell`, `mcpToolCall` → `mcp`.
|
|
27
|
+
* 2. If the sub-object carries a `tool` / `name` field, prefer that
|
|
28
|
+
* (mcp calls embed the playwright tool name there).
|
|
29
|
+
* 3. Strip the `mcp__playwright__` / `mcp__hover-playwright__` prefix to
|
|
30
|
+
* match the normalised tool names claude / codex emit.
|
|
31
|
+
*/
|
|
32
|
+
function extractToolName(tc) {
|
|
33
|
+
if (!tc)
|
|
34
|
+
return { tool: 'unknown', input: undefined };
|
|
35
|
+
const wrapperKey = Object.keys(tc).find(k => k.endsWith('ToolCall'));
|
|
36
|
+
const inner = (wrapperKey ? tc[wrapperKey] : undefined) ?? undefined;
|
|
37
|
+
// Prefer the inner sub-tool name if it exists (MCP case).
|
|
38
|
+
const innerName = (inner && typeof inner === 'object' && 'tool' in inner && typeof inner.tool === 'string' && inner.tool) ||
|
|
39
|
+
(inner && typeof inner === 'object' && 'name' in inner && typeof inner.name === 'string' && inner.name) ||
|
|
40
|
+
null;
|
|
41
|
+
const kindFromKey = wrapperKey ? wrapperKey.replace(/ToolCall$/, '') : 'unknown';
|
|
42
|
+
const rawName = innerName || kindFromKey;
|
|
43
|
+
const tool = rawName
|
|
44
|
+
.replace(/^mcp__playwright__/, '')
|
|
45
|
+
.replace(/^mcp__hover-playwright__/, '');
|
|
46
|
+
const input = (inner && typeof inner === 'object' && 'input' in inner && inner.input) ||
|
|
47
|
+
(inner && typeof inner === 'object' && 'arguments' in inner && inner.arguments) ||
|
|
48
|
+
(inner && typeof inner === 'object' && 'args' in inner && inner.args) ||
|
|
49
|
+
inner;
|
|
50
|
+
return { tool, input };
|
|
51
|
+
}
|
|
52
|
+
function detectToolError(tc) {
|
|
53
|
+
if (!tc || typeof tc !== 'object')
|
|
54
|
+
return false;
|
|
55
|
+
const wrapperKey = Object.keys(tc).find(k => k.endsWith('ToolCall'));
|
|
56
|
+
const inner = wrapperKey ? tc[wrapperKey] : undefined;
|
|
57
|
+
if (!inner)
|
|
58
|
+
return false;
|
|
59
|
+
if (inner.is_error === true)
|
|
60
|
+
return true;
|
|
61
|
+
if (typeof inner.status === 'string' && /error|fail/i.test(inner.status))
|
|
62
|
+
return true;
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* The closest analogue Cursor has to claude's --append-system-prompt or
|
|
67
|
+
* codex's developer_instructions. Since there is no CLI flag, we prepend
|
|
68
|
+
* this to the user prompt so the agent sees it as the leading instruction.
|
|
69
|
+
*/
|
|
70
|
+
const CURSOR_PROMPT_PREFACE = [
|
|
71
|
+
'You are operating in Hover, a browser-testing tool.',
|
|
72
|
+
'Use ONLY the MCP playwright tools (prefixed `mcp__playwright__` / `mcp__hover-playwright__`) to drive the browser.',
|
|
73
|
+
'Do NOT use shell, file-edit, web-search, or any other built-in tool.',
|
|
74
|
+
'Do NOT navigate to a URL the user is already on; check the page state via `browser_snapshot` first.',
|
|
75
|
+
'When the task is complete, emit a short summary and stop.',
|
|
76
|
+
].join(' ');
|
|
77
|
+
export const cursorAgent = {
|
|
78
|
+
id: 'cursor',
|
|
79
|
+
binName: 'cursor-agent',
|
|
80
|
+
protocol: 'argv',
|
|
81
|
+
streamFormat: 'json-lines',
|
|
82
|
+
sandboxStrength: 'soft',
|
|
83
|
+
display: {
|
|
84
|
+
label: 'Cursor',
|
|
85
|
+
tagline: 'Cursor — soft sandbox (no built-in tool deny-list)',
|
|
86
|
+
homepage: 'https://cursor.com/docs/cli/overview',
|
|
87
|
+
installHint: 'curl https://cursor.com/install -fsS | bash',
|
|
88
|
+
},
|
|
89
|
+
buildArgs(opts) {
|
|
90
|
+
// The HOVER-mode preface plus any caller-supplied appendSystemPrompt
|
|
91
|
+
// gets prepended to the prompt. This is the closest functional analogue
|
|
92
|
+
// Cursor has to claude's --append-system-prompt / codex's
|
|
93
|
+
// developer_instructions, because Cursor exposes no CLI flag for it.
|
|
94
|
+
const preface = opts.appendSystemPrompt && opts.appendSystemPrompt.trim().length > 0
|
|
95
|
+
? `${CURSOR_PROMPT_PREFACE} ${opts.appendSystemPrompt}`
|
|
96
|
+
: CURSOR_PROMPT_PREFACE;
|
|
97
|
+
const finalPrompt = `${preface}\n\n${opts.prompt}`;
|
|
98
|
+
const args = ['-p', finalPrompt];
|
|
99
|
+
// NDJSON streaming output.
|
|
100
|
+
args.push('--output-format', 'stream-json');
|
|
101
|
+
// Non-interactive: auto-approve commands and MCP tools so the run doesn't
|
|
102
|
+
// hang waiting for permission. Cursor calls this --force / --yolo.
|
|
103
|
+
args.push('--force');
|
|
104
|
+
if (opts.model) {
|
|
105
|
+
args.push('--model', opts.model);
|
|
106
|
+
}
|
|
107
|
+
if (opts.sessionId) {
|
|
108
|
+
// Cursor's --resume accepts the chat_id. Empty / no-arg --resume
|
|
109
|
+
// resumes the latest, which is NOT what we want here.
|
|
110
|
+
args.push('--resume', opts.sessionId);
|
|
111
|
+
}
|
|
112
|
+
// MCP servers are configured in ~/.cursor/mcp.json (or repo-local
|
|
113
|
+
// .cursor/mcp.json) at install time, not per-invocation. Cursor has no
|
|
114
|
+
// equivalent of claude's --mcp-config. If the caller passed opts.mcpConfig
|
|
115
|
+
// we don't have a way to forward it; service.ts logs a one-time warning.
|
|
116
|
+
// No equivalent for --max-budget-usd or --allowedTools / --disallowedTools.
|
|
117
|
+
return args;
|
|
118
|
+
},
|
|
119
|
+
parseEvent(line, state = {}) {
|
|
120
|
+
if (!line.trim())
|
|
121
|
+
return [];
|
|
122
|
+
let ev;
|
|
123
|
+
try {
|
|
124
|
+
ev = JSON.parse(line);
|
|
125
|
+
}
|
|
126
|
+
catch {
|
|
127
|
+
return [{ kind: 'raw', line }];
|
|
128
|
+
}
|
|
129
|
+
const s = cursorState(state);
|
|
130
|
+
const out = [];
|
|
131
|
+
if (ev.type === 'system' && ev.subtype === 'init') {
|
|
132
|
+
resetCursorCounters(s);
|
|
133
|
+
s.runningModel = ev.model;
|
|
134
|
+
if (ev.session_id) {
|
|
135
|
+
s.runningSessionId = ev.session_id;
|
|
136
|
+
out.push({ kind: 'session_start', sessionId: ev.session_id, model: ev.model });
|
|
137
|
+
}
|
|
138
|
+
return out;
|
|
139
|
+
}
|
|
140
|
+
if (ev.type === 'tool_call' && ev.subtype === 'started') {
|
|
141
|
+
const { tool, input } = extractToolName(ev.tool_call);
|
|
142
|
+
if (ev.call_id)
|
|
143
|
+
s.toolNameByCallId.set(ev.call_id, tool);
|
|
144
|
+
out.push({ kind: 'tool_use', tool, input });
|
|
145
|
+
return out;
|
|
146
|
+
}
|
|
147
|
+
if (ev.type === 'tool_call' && ev.subtype === 'completed') {
|
|
148
|
+
const isError = detectToolError(ev.tool_call);
|
|
149
|
+
out.push({ kind: 'tool_result', isError });
|
|
150
|
+
return out;
|
|
151
|
+
}
|
|
152
|
+
if (ev.type === 'assistant') {
|
|
153
|
+
s.runningTurns += 1;
|
|
154
|
+
// Emit a usage event so the widget can advance its turn counter even
|
|
155
|
+
// though Cursor gives us no token / $ data. costUsd intentionally
|
|
156
|
+
// omitted — we don't fabricate a number.
|
|
157
|
+
out.push({ kind: 'usage', turns: s.runningTurns });
|
|
158
|
+
for (const block of ev.message?.content ?? []) {
|
|
159
|
+
if (block.type === 'text') {
|
|
160
|
+
const text = block.text?.trim();
|
|
161
|
+
if (text) {
|
|
162
|
+
s.lastAssistantText = text;
|
|
163
|
+
out.push({ kind: 'text', text });
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return out;
|
|
168
|
+
}
|
|
169
|
+
if (ev.type === 'result') {
|
|
170
|
+
// Cursor's result event IS the session_end. We forward it directly so
|
|
171
|
+
// onStreamEnd doesn't need to synthesize.
|
|
172
|
+
const isError = ev.is_error === true ||
|
|
173
|
+
(typeof ev.subtype === 'string' && /error|fail/i.test(ev.subtype));
|
|
174
|
+
if (isError)
|
|
175
|
+
s.sawErrorEvent = true;
|
|
176
|
+
out.push({
|
|
177
|
+
kind: 'session_end',
|
|
178
|
+
turns: s.runningTurns,
|
|
179
|
+
// costUsd intentionally undefined — Cursor doesn't publish $ or tokens.
|
|
180
|
+
isError,
|
|
181
|
+
summary: ev.result ?? s.lastAssistantText,
|
|
182
|
+
});
|
|
183
|
+
return out;
|
|
184
|
+
}
|
|
185
|
+
// Cursor sometimes emits error envelopes mid-stream; surface them as
|
|
186
|
+
// text so the widget shows the problem instead of silently hanging.
|
|
187
|
+
if (ev.type && /error/i.test(ev.type)) {
|
|
188
|
+
s.sawErrorEvent = true;
|
|
189
|
+
// No documented `message` field on these — best-effort.
|
|
190
|
+
const msg = ev.result ?? `[cursor] ${ev.type}`;
|
|
191
|
+
out.push({ kind: 'text', text: msg });
|
|
192
|
+
return out;
|
|
193
|
+
}
|
|
194
|
+
return [];
|
|
195
|
+
},
|
|
196
|
+
/**
|
|
197
|
+
* Cursor's `result` event already produces a session_end via parseEvent;
|
|
198
|
+
* this fallback is for the case where the child exits without emitting a
|
|
199
|
+
* `result` (e.g. crash, signal). Mirrors codex.ts's shape.
|
|
200
|
+
*/
|
|
201
|
+
onStreamEnd(exitCode, state = {}) {
|
|
202
|
+
const s = cursorState(state);
|
|
203
|
+
return {
|
|
204
|
+
kind: 'session_end',
|
|
205
|
+
turns: s.runningTurns,
|
|
206
|
+
// costUsd intentionally undefined — see parseEvent note.
|
|
207
|
+
isError: s.sawErrorEvent || (exitCode != null && exitCode !== 0),
|
|
208
|
+
summary: s.lastAssistantText,
|
|
209
|
+
};
|
|
210
|
+
},
|
|
211
|
+
};
|
|
212
|
+
/**
|
|
213
|
+
* Test-only escape hatches. Same pattern as codex.ts so the tests can drive
|
|
214
|
+
* the parser without going through invokeAgent.
|
|
215
|
+
*/
|
|
216
|
+
export const __testing = {
|
|
217
|
+
freshState: () => ({}),
|
|
218
|
+
resetCounters: (state) => resetCursorCounters(cursorState(state)),
|
|
219
|
+
getState: (state) => {
|
|
220
|
+
const s = cursorState(state);
|
|
221
|
+
return {
|
|
222
|
+
runningTurns: s.runningTurns,
|
|
223
|
+
runningSessionId: s.runningSessionId,
|
|
224
|
+
runningModel: s.runningModel,
|
|
225
|
+
lastAssistantText: s.lastAssistantText,
|
|
226
|
+
sawErrorEvent: s.sawErrorEvent,
|
|
227
|
+
};
|
|
228
|
+
},
|
|
229
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { AgentDescriptor, ParserState } from './types.js';
|
|
2
|
+
export declare const geminiAgent: AgentDescriptor;
|
|
3
|
+
/**
|
|
4
|
+
* Test-only escape hatches, same pattern as cursor.ts / codex.ts.
|
|
5
|
+
*/
|
|
6
|
+
export declare const __testing: {
|
|
7
|
+
freshState: () => ParserState;
|
|
8
|
+
resetCounters: (state: ParserState) => void;
|
|
9
|
+
getState: (state: ParserState) => {
|
|
10
|
+
runningTurns: number;
|
|
11
|
+
runningSessionId: string | undefined;
|
|
12
|
+
runningModel: string | undefined;
|
|
13
|
+
lastAssistantText: string | undefined;
|
|
14
|
+
sawErrorEvent: boolean;
|
|
15
|
+
};
|
|
16
|
+
};
|
|
17
|
+
//# sourceMappingURL=gemini.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gemini.d.ts","sourceRoot":"","sources":["../../src/agents/gemini.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAA8B,WAAW,EAAE,MAAM,YAAY,CAAC;AA4K3F,eAAO,MAAM,WAAW,EAAE,eAiJzB,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,SAAS;sBACJ,WAAW;2BACJ,WAAW;sBAChB,WAAW;;;;;;;CAU9B,CAAC"}
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
function geminiState(state) {
|
|
2
|
+
if (typeof state.runningTurns !== 'number') {
|
|
3
|
+
state.runningTurns = 0;
|
|
4
|
+
state.runningSessionId = undefined;
|
|
5
|
+
state.runningModel = undefined;
|
|
6
|
+
state.lastAssistantText = undefined;
|
|
7
|
+
state.sawErrorEvent = false;
|
|
8
|
+
state.toolNameByUseId = new Map();
|
|
9
|
+
}
|
|
10
|
+
return state;
|
|
11
|
+
}
|
|
12
|
+
function resetGeminiCounters(s) {
|
|
13
|
+
s.runningTurns = 0;
|
|
14
|
+
s.runningSessionId = undefined;
|
|
15
|
+
s.runningModel = undefined;
|
|
16
|
+
s.lastAssistantText = undefined;
|
|
17
|
+
s.sawErrorEvent = false;
|
|
18
|
+
s.toolNameByUseId.clear();
|
|
19
|
+
}
|
|
20
|
+
/** Strip the `mcp__playwright__` / `mcp__hover-playwright__` prefix so tool
|
|
21
|
+
* names match the normalised names claude / codex / cursor / qwen emit. */
|
|
22
|
+
function stripMcpPrefix(raw) {
|
|
23
|
+
return raw.replace(/^mcp__playwright__/, '').replace(/^mcp__hover-playwright__/, '');
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Extract assistant text from a `message` event whose `content` may be a
|
|
27
|
+
* plain string OR an array of `{type:'text', text}` content blocks. Gemini's
|
|
28
|
+
* docs aren't explicit on which shape ships per build, so we handle both.
|
|
29
|
+
*/
|
|
30
|
+
function extractMessageText(ev) {
|
|
31
|
+
if (typeof ev.content === 'string') {
|
|
32
|
+
const t = ev.content.trim();
|
|
33
|
+
return t.length > 0 ? t : undefined;
|
|
34
|
+
}
|
|
35
|
+
if (Array.isArray(ev.content)) {
|
|
36
|
+
const parts = ev.content
|
|
37
|
+
.filter(b => b.type === 'text' && typeof b.text === 'string')
|
|
38
|
+
.map(b => b.text.trim())
|
|
39
|
+
.filter(t => t.length > 0);
|
|
40
|
+
return parts.length > 0 ? parts.join('\n') : undefined;
|
|
41
|
+
}
|
|
42
|
+
return undefined;
|
|
43
|
+
}
|
|
44
|
+
const GEMINI_PROMPT_PREFACE = [
|
|
45
|
+
'You are operating in Hover, a browser-testing tool.',
|
|
46
|
+
'Use ONLY the MCP playwright tools (prefixed `mcp__playwright__` / `mcp__hover-playwright__`) to drive the browser.',
|
|
47
|
+
'Do NOT use shell, file-edit, web-search, or any other built-in tool.',
|
|
48
|
+
'Do NOT navigate to a URL the user is already on; check the page state via `browser_snapshot` first.',
|
|
49
|
+
'When the task is complete, emit a short summary and stop.',
|
|
50
|
+
].join(' ');
|
|
51
|
+
export const geminiAgent = {
|
|
52
|
+
id: 'gemini',
|
|
53
|
+
binName: 'gemini',
|
|
54
|
+
protocol: 'argv',
|
|
55
|
+
streamFormat: 'json-lines',
|
|
56
|
+
sandboxStrength: 'soft',
|
|
57
|
+
display: {
|
|
58
|
+
label: 'Gemini',
|
|
59
|
+
tagline: 'Google Gemini — soft sandbox (no built-in tool deny-list)',
|
|
60
|
+
homepage: 'https://github.com/google-gemini/gemini-cli',
|
|
61
|
+
installHint: 'npm install -g @google/gemini-cli',
|
|
62
|
+
},
|
|
63
|
+
buildArgs(opts) {
|
|
64
|
+
// Gemini has no --append-system-prompt CLI flag (only the
|
|
65
|
+
// GEMINI_SYSTEM_MD env var which writes a file). Prepend the HOVER-mode
|
|
66
|
+
// preface to the prompt instead — same pattern as cursor.ts / aider.ts.
|
|
67
|
+
const preface = opts.appendSystemPrompt && opts.appendSystemPrompt.trim().length > 0
|
|
68
|
+
? `${GEMINI_PROMPT_PREFACE} ${opts.appendSystemPrompt}`
|
|
69
|
+
: GEMINI_PROMPT_PREFACE;
|
|
70
|
+
const finalPrompt = `${preface}\n\n${opts.prompt}`;
|
|
71
|
+
const args = ['-p', finalPrompt];
|
|
72
|
+
// NDJSON streaming output.
|
|
73
|
+
args.push('--output-format', 'stream-json');
|
|
74
|
+
// Auto-approve all tool calls so the run doesn't hang. The newer
|
|
75
|
+
// canonical form is --approval-mode=yolo; --yolo is deprecated but
|
|
76
|
+
// still accepted in 2026-05. We use the modern form.
|
|
77
|
+
args.push('--approval-mode', 'yolo');
|
|
78
|
+
if (opts.model) {
|
|
79
|
+
args.push('--model', opts.model);
|
|
80
|
+
}
|
|
81
|
+
if (opts.sessionId) {
|
|
82
|
+
// --resume <id> is the documented form. -r is the alias. The single
|
|
83
|
+
// string 'latest' picks the most recent session; we only pass an
|
|
84
|
+
// explicit id, never the literal 'latest'.
|
|
85
|
+
args.push('--resume', opts.sessionId);
|
|
86
|
+
}
|
|
87
|
+
// MCP servers configured via `gemini mcp add` at install time — no
|
|
88
|
+
// per-invocation --mcp-config equivalent.
|
|
89
|
+
// No equivalent for --max-budget-usd or --allowedTools / --disallowedTools
|
|
90
|
+
// in the disable-built-ins sense.
|
|
91
|
+
return args;
|
|
92
|
+
},
|
|
93
|
+
parseEvent(line, state = {}) {
|
|
94
|
+
if (!line.trim())
|
|
95
|
+
return [];
|
|
96
|
+
let ev;
|
|
97
|
+
try {
|
|
98
|
+
ev = JSON.parse(line);
|
|
99
|
+
}
|
|
100
|
+
catch {
|
|
101
|
+
return [{ kind: 'raw', line }];
|
|
102
|
+
}
|
|
103
|
+
const s = geminiState(state);
|
|
104
|
+
const out = [];
|
|
105
|
+
if (ev.type === 'init') {
|
|
106
|
+
resetGeminiCounters(s);
|
|
107
|
+
s.runningModel = ev.model;
|
|
108
|
+
if (ev.session_id) {
|
|
109
|
+
s.runningSessionId = ev.session_id;
|
|
110
|
+
out.push({ kind: 'session_start', sessionId: ev.session_id, model: ev.model });
|
|
111
|
+
}
|
|
112
|
+
return out;
|
|
113
|
+
}
|
|
114
|
+
if (ev.type === 'message') {
|
|
115
|
+
// Only count and surface assistant messages — user echoes (the
|
|
116
|
+
// role:'user' message events) don't count as turns from our POV.
|
|
117
|
+
if (ev.role === 'assistant' || ev.role === undefined) {
|
|
118
|
+
s.runningTurns += 1;
|
|
119
|
+
out.push({ kind: 'usage', turns: s.runningTurns });
|
|
120
|
+
const text = extractMessageText(ev);
|
|
121
|
+
if (text) {
|
|
122
|
+
s.lastAssistantText = text;
|
|
123
|
+
out.push({ kind: 'text', text });
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return out;
|
|
127
|
+
}
|
|
128
|
+
if (ev.type === 'tool_use') {
|
|
129
|
+
const rawName = ev.name ?? '';
|
|
130
|
+
const tool = stripMcpPrefix(rawName);
|
|
131
|
+
if (ev.id)
|
|
132
|
+
s.toolNameByUseId.set(ev.id, tool);
|
|
133
|
+
out.push({ kind: 'tool_use', tool, input: ev.input });
|
|
134
|
+
return out;
|
|
135
|
+
}
|
|
136
|
+
if (ev.type === 'tool_result') {
|
|
137
|
+
const isError = ev.is_error === true;
|
|
138
|
+
out.push({ kind: 'tool_result', isError });
|
|
139
|
+
return out;
|
|
140
|
+
}
|
|
141
|
+
if (ev.type === 'result') {
|
|
142
|
+
// result.stats may carry turns; prefer it over our running count.
|
|
143
|
+
const turns = ev.stats?.turns ?? ev.stats?.model?.turns ?? s.runningTurns;
|
|
144
|
+
const isError = ev.is_error === true || ev.error !== undefined && ev.error !== null;
|
|
145
|
+
if (isError)
|
|
146
|
+
s.sawErrorEvent = true;
|
|
147
|
+
out.push({
|
|
148
|
+
kind: 'session_end',
|
|
149
|
+
turns,
|
|
150
|
+
// costUsd intentionally undefined — gemini's stats block does not
|
|
151
|
+
// include $ figures.
|
|
152
|
+
isError,
|
|
153
|
+
summary: ev.response ?? s.lastAssistantText,
|
|
154
|
+
});
|
|
155
|
+
return out;
|
|
156
|
+
}
|
|
157
|
+
if (ev.type === 'error') {
|
|
158
|
+
s.sawErrorEvent = true;
|
|
159
|
+
const msg = ev.message ?? ev.error?.message ?? `[gemini] error`;
|
|
160
|
+
out.push({ kind: 'text', text: msg });
|
|
161
|
+
return out;
|
|
162
|
+
}
|
|
163
|
+
return [];
|
|
164
|
+
},
|
|
165
|
+
/**
|
|
166
|
+
* Gemini's `result` event already produces a session_end via parseEvent;
|
|
167
|
+
* this fallback is for the case where the child exits without emitting a
|
|
168
|
+
* `result` (e.g. crash, signal). Same shape as cursor.ts / qwen.ts.
|
|
169
|
+
*/
|
|
170
|
+
onStreamEnd(exitCode, state = {}) {
|
|
171
|
+
const s = geminiState(state);
|
|
172
|
+
return {
|
|
173
|
+
kind: 'session_end',
|
|
174
|
+
turns: s.runningTurns,
|
|
175
|
+
// costUsd intentionally undefined — see parseEvent note.
|
|
176
|
+
isError: s.sawErrorEvent || (exitCode != null && exitCode !== 0),
|
|
177
|
+
summary: s.lastAssistantText,
|
|
178
|
+
};
|
|
179
|
+
},
|
|
180
|
+
};
|
|
181
|
+
/**
|
|
182
|
+
* Test-only escape hatches, same pattern as cursor.ts / codex.ts.
|
|
183
|
+
*/
|
|
184
|
+
export const __testing = {
|
|
185
|
+
freshState: () => ({}),
|
|
186
|
+
resetCounters: (state) => resetGeminiCounters(geminiState(state)),
|
|
187
|
+
getState: (state) => {
|
|
188
|
+
const s = geminiState(state);
|
|
189
|
+
return {
|
|
190
|
+
runningTurns: s.runningTurns,
|
|
191
|
+
runningSessionId: s.runningSessionId,
|
|
192
|
+
runningModel: s.runningModel,
|
|
193
|
+
lastAssistantText: s.lastAssistantText,
|
|
194
|
+
sawErrorEvent: s.sawErrorEvent,
|
|
195
|
+
};
|
|
196
|
+
},
|
|
197
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { AgentDescriptor, ParserState } from './types.js';
|
|
2
|
+
export declare const qwenAgent: AgentDescriptor;
|
|
3
|
+
/**
|
|
4
|
+
* Test-only escape hatches, same pattern as cursor.ts / codex.ts.
|
|
5
|
+
*/
|
|
6
|
+
export declare const __testing: {
|
|
7
|
+
freshState: () => ParserState;
|
|
8
|
+
resetCounters: (state: ParserState) => void;
|
|
9
|
+
getState: (state: ParserState) => {
|
|
10
|
+
runningTurns: number;
|
|
11
|
+
runningSessionId: string | undefined;
|
|
12
|
+
runningModel: string | undefined;
|
|
13
|
+
lastAssistantText: string | undefined;
|
|
14
|
+
sawErrorEvent: boolean;
|
|
15
|
+
};
|
|
16
|
+
};
|
|
17
|
+
//# sourceMappingURL=qwen.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"qwen.d.ts","sourceRoot":"","sources":["../../src/agents/qwen.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAA8B,WAAW,EAAE,MAAM,YAAY,CAAC;AAiK3F,eAAO,MAAM,SAAS,EAAE,eAqJvB,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,SAAS;sBACJ,WAAW;2BACJ,WAAW;sBAChB,WAAW;;;;;;;CAU9B,CAAC"}
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
function qwenState(state) {
|
|
2
|
+
if (typeof state.runningTurns !== 'number') {
|
|
3
|
+
state.runningTurns = 0;
|
|
4
|
+
state.runningSessionId = undefined;
|
|
5
|
+
state.runningModel = undefined;
|
|
6
|
+
state.lastAssistantText = undefined;
|
|
7
|
+
state.sawErrorEvent = false;
|
|
8
|
+
state.toolNameByUseId = new Map();
|
|
9
|
+
}
|
|
10
|
+
return state;
|
|
11
|
+
}
|
|
12
|
+
function resetQwenCounters(s) {
|
|
13
|
+
s.runningTurns = 0;
|
|
14
|
+
s.runningSessionId = undefined;
|
|
15
|
+
s.runningModel = undefined;
|
|
16
|
+
s.lastAssistantText = undefined;
|
|
17
|
+
s.sawErrorEvent = false;
|
|
18
|
+
s.toolNameByUseId.clear();
|
|
19
|
+
}
|
|
20
|
+
/** Strip the `mcp__playwright__` / `mcp__hover-playwright__` prefix so tool
|
|
21
|
+
* names match the normalised names claude / codex / cursor emit. */
|
|
22
|
+
function stripMcpPrefix(raw) {
|
|
23
|
+
return raw.replace(/^mcp__playwright__/, '').replace(/^mcp__hover-playwright__/, '');
|
|
24
|
+
}
|
|
25
|
+
const QWEN_PROMPT_PREFACE = [
|
|
26
|
+
'You are operating in Hover, a browser-testing tool.',
|
|
27
|
+
'Use ONLY the MCP playwright tools (prefixed `mcp__playwright__` / `mcp__hover-playwright__`) to drive the browser.',
|
|
28
|
+
'Do NOT use shell, file-edit, web-search, or any other built-in tool.',
|
|
29
|
+
'Do NOT navigate to a URL the user is already on; check the page state via `browser_snapshot` first.',
|
|
30
|
+
'When the task is complete, emit a short summary and stop.',
|
|
31
|
+
].join(' ');
|
|
32
|
+
export const qwenAgent = {
|
|
33
|
+
id: 'qwen',
|
|
34
|
+
binName: 'qwen',
|
|
35
|
+
protocol: 'argv',
|
|
36
|
+
streamFormat: 'json-lines',
|
|
37
|
+
sandboxStrength: 'soft',
|
|
38
|
+
display: {
|
|
39
|
+
label: 'Qwen Code',
|
|
40
|
+
tagline: 'Qwen Code — soft sandbox (no built-in tool deny-list)',
|
|
41
|
+
homepage: 'https://github.com/QwenLM/qwen-code',
|
|
42
|
+
installHint: 'npm install -g @qwen-code/qwen-code@latest',
|
|
43
|
+
},
|
|
44
|
+
buildArgs(opts) {
|
|
45
|
+
const args = ['-p', opts.prompt];
|
|
46
|
+
// NDJSON streaming output.
|
|
47
|
+
args.push('--output-format', 'stream-json');
|
|
48
|
+
// Auto-approve all tool calls so the run doesn't hang. The newer
|
|
49
|
+
// canonical form is --approval-mode=yolo; --yolo is deprecated but
|
|
50
|
+
// still works in 2026-05. We use the modern form.
|
|
51
|
+
args.push('--approval-mode', 'yolo');
|
|
52
|
+
if (opts.model) {
|
|
53
|
+
args.push('--model', opts.model);
|
|
54
|
+
}
|
|
55
|
+
if (opts.sessionId) {
|
|
56
|
+
// --resume <sessionId> is the documented headless form. --continue
|
|
57
|
+
// (no arg) picks the most recent — NOT what we want when a specific
|
|
58
|
+
// session was passed.
|
|
59
|
+
args.push('--resume', opts.sessionId);
|
|
60
|
+
}
|
|
61
|
+
// Qwen has a real --append-system-prompt flag — use it instead of
|
|
62
|
+
// prepending to the user prompt. Concatenate the standing Hover-mode
|
|
63
|
+
// preface with whatever the caller appended.
|
|
64
|
+
const sysPrompt = opts.appendSystemPrompt && opts.appendSystemPrompt.trim().length > 0
|
|
65
|
+
? `${QWEN_PROMPT_PREFACE} ${opts.appendSystemPrompt}`
|
|
66
|
+
: QWEN_PROMPT_PREFACE;
|
|
67
|
+
args.push('--append-system-prompt', sysPrompt);
|
|
68
|
+
// MCP servers configured in ~/.qwen/settings.json — no per-invocation
|
|
69
|
+
// --mcp-config equivalent. Same constraint as cursor / codex.
|
|
70
|
+
// No equivalent for --max-budget-usd or --allowedTools / --disallowedTools.
|
|
71
|
+
return args;
|
|
72
|
+
},
|
|
73
|
+
parseEvent(line, state = {}) {
|
|
74
|
+
if (!line.trim())
|
|
75
|
+
return [];
|
|
76
|
+
let ev;
|
|
77
|
+
try {
|
|
78
|
+
ev = JSON.parse(line);
|
|
79
|
+
}
|
|
80
|
+
catch {
|
|
81
|
+
return [{ kind: 'raw', line }];
|
|
82
|
+
}
|
|
83
|
+
const s = qwenState(state);
|
|
84
|
+
const out = [];
|
|
85
|
+
if (ev.type === 'system' && ev.subtype === 'session_start') {
|
|
86
|
+
resetQwenCounters(s);
|
|
87
|
+
s.runningModel = ev.model;
|
|
88
|
+
if (ev.session_id) {
|
|
89
|
+
s.runningSessionId = ev.session_id;
|
|
90
|
+
out.push({ kind: 'session_start', sessionId: ev.session_id, model: ev.model });
|
|
91
|
+
}
|
|
92
|
+
return out;
|
|
93
|
+
}
|
|
94
|
+
if (ev.type === 'assistant' && ev.message) {
|
|
95
|
+
s.runningTurns += 1;
|
|
96
|
+
// Emit a usage event so the widget can advance its turn counter.
|
|
97
|
+
// costUsd intentionally omitted — qwen doesn't publish $ in stream.
|
|
98
|
+
out.push({ kind: 'usage', turns: s.runningTurns });
|
|
99
|
+
for (const block of ev.message.content ?? []) {
|
|
100
|
+
if (block.type === 'text') {
|
|
101
|
+
const text = block.text?.trim();
|
|
102
|
+
if (text) {
|
|
103
|
+
s.lastAssistantText = text;
|
|
104
|
+
out.push({ kind: 'text', text });
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
else if (block.type === 'tool_use') {
|
|
108
|
+
const rawName = block.name ?? '';
|
|
109
|
+
const tool = stripMcpPrefix(rawName);
|
|
110
|
+
if (block.id)
|
|
111
|
+
s.toolNameByUseId.set(block.id, tool);
|
|
112
|
+
out.push({ kind: 'tool_use', tool, input: block.input });
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return out;
|
|
116
|
+
}
|
|
117
|
+
// tool_result blocks are wrapped in `user` messages (Anthropic Messages
|
|
118
|
+
// convention). We surface them as tool_result events.
|
|
119
|
+
if (ev.type === 'user' && ev.message) {
|
|
120
|
+
for (const block of ev.message.content ?? []) {
|
|
121
|
+
if (block.type === 'tool_result') {
|
|
122
|
+
const isError = block.is_error === true;
|
|
123
|
+
out.push({ kind: 'tool_result', isError });
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return out;
|
|
127
|
+
}
|
|
128
|
+
if (ev.type === 'result') {
|
|
129
|
+
const isError = ev.is_error === true ||
|
|
130
|
+
(typeof ev.subtype === 'string' && /error|fail/i.test(ev.subtype));
|
|
131
|
+
if (isError)
|
|
132
|
+
s.sawErrorEvent = true;
|
|
133
|
+
out.push({
|
|
134
|
+
kind: 'session_end',
|
|
135
|
+
turns: s.runningTurns,
|
|
136
|
+
// costUsd intentionally undefined — qwen doesn't publish $.
|
|
137
|
+
isError,
|
|
138
|
+
summary: ev.result ?? s.lastAssistantText,
|
|
139
|
+
});
|
|
140
|
+
return out;
|
|
141
|
+
}
|
|
142
|
+
// Qwen emits various error envelopes mid-stream; surface them as text.
|
|
143
|
+
if (ev.type && /error/i.test(ev.type)) {
|
|
144
|
+
s.sawErrorEvent = true;
|
|
145
|
+
const msg = ev.error?.message ?? ev.text ?? ev.result ?? `[qwen] ${ev.type}`;
|
|
146
|
+
out.push({ kind: 'text', text: msg });
|
|
147
|
+
return out;
|
|
148
|
+
}
|
|
149
|
+
return [];
|
|
150
|
+
},
|
|
151
|
+
/**
|
|
152
|
+
* Qwen's `result` event already produces a session_end via parseEvent;
|
|
153
|
+
* this fallback is for the case where the child exits without emitting a
|
|
154
|
+
* `result` (e.g. crash, signal). Mirrors codex.ts / cursor.ts shape.
|
|
155
|
+
*/
|
|
156
|
+
onStreamEnd(exitCode, state = {}) {
|
|
157
|
+
const s = qwenState(state);
|
|
158
|
+
return {
|
|
159
|
+
kind: 'session_end',
|
|
160
|
+
turns: s.runningTurns,
|
|
161
|
+
// costUsd intentionally undefined — see parseEvent note.
|
|
162
|
+
isError: s.sawErrorEvent || (exitCode != null && exitCode !== 0),
|
|
163
|
+
summary: s.lastAssistantText,
|
|
164
|
+
};
|
|
165
|
+
},
|
|
166
|
+
};
|
|
167
|
+
/**
|
|
168
|
+
* Test-only escape hatches, same pattern as cursor.ts / codex.ts.
|
|
169
|
+
*/
|
|
170
|
+
export const __testing = {
|
|
171
|
+
freshState: () => ({}),
|
|
172
|
+
resetCounters: (state) => resetQwenCounters(qwenState(state)),
|
|
173
|
+
getState: (state) => {
|
|
174
|
+
const s = qwenState(state);
|
|
175
|
+
return {
|
|
176
|
+
runningTurns: s.runningTurns,
|
|
177
|
+
runningSessionId: s.runningSessionId,
|
|
178
|
+
runningModel: s.runningModel,
|
|
179
|
+
lastAssistantText: s.lastAssistantText,
|
|
180
|
+
sawErrorEvent: s.sawErrorEvent,
|
|
181
|
+
};
|
|
182
|
+
},
|
|
183
|
+
};
|
|
@@ -2,13 +2,15 @@ import type { AgentDescriptor } from './types.js';
|
|
|
2
2
|
/**
|
|
3
3
|
* Registry of agents Hover can drive.
|
|
4
4
|
*
|
|
5
|
-
* To add support for another agent (e.g.
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
5
|
+
* To add support for another agent (e.g. cline, continue, kilo), implement
|
|
6
|
+
* its AgentDescriptor in its own file and register it here. The rest of the
|
|
7
|
+
* system — detect, argv, invoke, service, widget — works without further
|
|
8
|
+
* changes.
|
|
9
9
|
*
|
|
10
10
|
* Insertion order is the order shown in the widget's agent dropdown, so put
|
|
11
|
-
* the recommended primary first.
|
|
11
|
+
* the recommended primary first. The two hard-sandbox / first-party agents
|
|
12
|
+
* (claude, codex) lead; the soft-sandbox third-party agents follow in the
|
|
13
|
+
* order they were added.
|
|
12
14
|
*/
|
|
13
15
|
export declare const AGENTS: Record<string, AgentDescriptor>;
|
|
14
16
|
export declare function getAgent(id: string): AgentDescriptor | undefined;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/agents/registry.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/agents/registry.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAQlD;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAOlD,CAAC;AAEF,wBAAgB,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,eAAe,GAAG,SAAS,CAEhE;AAED,+DAA+D;AAC/D,wBAAgB,UAAU,IAAI,eAAe,EAAE,CAE9C"}
|
package/dist/agents/registry.js
CHANGED
|
@@ -1,19 +1,29 @@
|
|
|
1
1
|
import { claudeAgent } from './claude.js';
|
|
2
2
|
import { codexAgent } from './codex.js';
|
|
3
|
+
import { cursorAgent } from './cursor.js';
|
|
4
|
+
import { aiderAgent } from './aider.js';
|
|
5
|
+
import { geminiAgent } from './gemini.js';
|
|
6
|
+
import { qwenAgent } from './qwen.js';
|
|
3
7
|
/**
|
|
4
8
|
* Registry of agents Hover can drive.
|
|
5
9
|
*
|
|
6
|
-
* To add support for another agent (e.g.
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
+
* To add support for another agent (e.g. cline, continue, kilo), implement
|
|
11
|
+
* its AgentDescriptor in its own file and register it here. The rest of the
|
|
12
|
+
* system — detect, argv, invoke, service, widget — works without further
|
|
13
|
+
* changes.
|
|
10
14
|
*
|
|
11
15
|
* Insertion order is the order shown in the widget's agent dropdown, so put
|
|
12
|
-
* the recommended primary first.
|
|
16
|
+
* the recommended primary first. The two hard-sandbox / first-party agents
|
|
17
|
+
* (claude, codex) lead; the soft-sandbox third-party agents follow in the
|
|
18
|
+
* order they were added.
|
|
13
19
|
*/
|
|
14
20
|
export const AGENTS = {
|
|
15
21
|
[claudeAgent.id]: claudeAgent,
|
|
16
22
|
[codexAgent.id]: codexAgent,
|
|
23
|
+
[cursorAgent.id]: cursorAgent,
|
|
24
|
+
[aiderAgent.id]: aiderAgent,
|
|
25
|
+
[geminiAgent.id]: geminiAgent,
|
|
26
|
+
[qwenAgent.id]: qwenAgent,
|
|
17
27
|
};
|
|
18
28
|
export function getAgent(id) {
|
|
19
29
|
return AGENTS[id];
|
package/dist/plugin-api.d.ts
CHANGED
|
@@ -139,6 +139,16 @@ export interface HoverPluginManifest {
|
|
|
139
139
|
* here so the widget side can be tree-shaken to skip handlers for
|
|
140
140
|
* events that no loaded plugin will ever produce. */
|
|
141
141
|
widgetEventTypes?: string[];
|
|
142
|
+
/** Absolute path to a JS module that runs inside the widget's Shadow
|
|
143
|
+
* DOM. The host reads this file at bundle-assembly time, inlines it
|
|
144
|
+
* as a `<script type="module">` after the widget core, and exposes
|
|
145
|
+
* `window.__HOVER_WIDGET__` for the module to register itself.
|
|
146
|
+
*
|
|
147
|
+
* Plugin authors typically resolve this via `import.meta` or
|
|
148
|
+
* `fileURLToPath(new URL('./widget.js', import.meta.url))` from
|
|
149
|
+
* inside their server-side entry. If absent, the plugin contributes
|
|
150
|
+
* no widget code (server-side-only plugin). */
|
|
151
|
+
widgetEntry?: string;
|
|
142
152
|
hooks?: HoverHooks;
|
|
143
153
|
}
|
|
144
154
|
/**
|
package/dist/plugin-api.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"plugin-api.d.ts","sourceRoot":"","sources":["../src/plugin-api.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH;;;;GAIG;AACH,MAAM,MAAM,eAAe,GAAG,CAAC,CAAC;AAChC,eAAO,MAAM,mBAAmB,EAAE,eAAmB,CAAC;AAMtD,MAAM,WAAW,eAAe;IAC9B,uEAAuE;IACvE,EAAE,EAAE,MAAM,CAAC;IACX,4DAA4D;IAC5D,KAAK,EAAE,MAAM,CAAC;IACd,iDAAiD;IACjD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;gEAC4D;IAC5D,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,oBAAoB;IACnC;gDAC4C;IAC5C,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B;8DAC0D;IAC1D,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,sBAAsB;IACrC,qDAAqD;IACrD,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB;iFAC6E;IAC7E,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;2EACuE;IACvE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;yDAEqD;IACrD,KAAK,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACvC,6EAA6E;IAC7E,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,MAAM,CAAC;IACb;uDACmD;IACnD,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAMD,MAAM,WAAW,cAAc;IAC7B;sEACkE;IAClE,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,IAAI,CAAC;CACpD;AAED,MAAM,WAAW,gBAAgB;IAC/B;;yBAEqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,qDAAqD;IACrD,SAAS,EAAE,cAAc,CAAC;CAC3B;AAED;;2EAE2E;AAC3E,MAAM,WAAW,eAAgB,SAAQ,gBAAgB;IACvD,MAAM,EAAE,MAAM,CAAC;IACf;6DACyD;IACzD,cAAc,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,GAAG,IAAI,CAAC;IACnE;;;;;sDAKkD;IAClD,eAAe,CAAC,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;CAChE;AAED;oDACoD;AACpD,MAAM,WAAW,iBAAkB,SAAQ,gBAAgB;IACzD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;gEACgE;AAChE,MAAM,MAAM,WAAW,GAAG,gBAAgB,CAAC;AAE3C,MAAM,WAAW,UAAU;IACzB,qBAAqB,CAAC,EAAE,CAAC,GAAG,EAAE,eAAe,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACvE,uBAAuB,CAAC,EAAE,CAAC,GAAG,EAAE,iBAAiB,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3E,wBAAwB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACvE;AAMD,MAAM,WAAW,mBAAmB;IAClC,8DAA8D;IAC9D,UAAU,EAAE,eAAe,CAAC;IAE5B,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAC;IAEb,uDAAuD;IACvD,IAAI,CAAC,EAAE,eAAe,CAAC;IAEvB,qEAAqE;IACrE,UAAU,CAAC,EAAE,oBAAoB,EAAE,CAAC;IAEpC,uDAAuD;IACvD,WAAW,CAAC,EAAE,sBAAsB,CAAC;IAErC;+BAC2B;IAC3B,qBAAqB,CAAC,EAAE,+BAA+B,EAAE,CAAC;IAE1D;;0DAEsD;IACtD,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAE5B,KAAK,CAAC,EAAE,UAAU,CAAC;CACpB;AAMD;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,GAAG,IAAI,EAC5C,OAAO,EAAE,CAAC,IAAI,EAAE,KAAK,KAAK,mBAAmB,GAC5C,CAAC,IAAI,EAAE,KAAK,KAAK,mBAAmB,CAYtC"}
|
|
1
|
+
{"version":3,"file":"plugin-api.d.ts","sourceRoot":"","sources":["../src/plugin-api.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH;;;;GAIG;AACH,MAAM,MAAM,eAAe,GAAG,CAAC,CAAC;AAChC,eAAO,MAAM,mBAAmB,EAAE,eAAmB,CAAC;AAMtD,MAAM,WAAW,eAAe;IAC9B,uEAAuE;IACvE,EAAE,EAAE,MAAM,CAAC;IACX,4DAA4D;IAC5D,KAAK,EAAE,MAAM,CAAC;IACd,iDAAiD;IACjD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;gEAC4D;IAC5D,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,oBAAoB;IACnC;gDAC4C;IAC5C,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B;8DAC0D;IAC1D,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,sBAAsB;IACrC,qDAAqD;IACrD,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB;iFAC6E;IAC7E,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;2EACuE;IACvE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;yDAEqD;IACrD,KAAK,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACvC,6EAA6E;IAC7E,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,MAAM,CAAC;IACb;uDACmD;IACnD,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAMD,MAAM,WAAW,cAAc;IAC7B;sEACkE;IAClE,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,IAAI,CAAC;CACpD;AAED,MAAM,WAAW,gBAAgB;IAC/B;;yBAEqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,qDAAqD;IACrD,SAAS,EAAE,cAAc,CAAC;CAC3B;AAED;;2EAE2E;AAC3E,MAAM,WAAW,eAAgB,SAAQ,gBAAgB;IACvD,MAAM,EAAE,MAAM,CAAC;IACf;6DACyD;IACzD,cAAc,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,GAAG,IAAI,CAAC;IACnE;;;;;sDAKkD;IAClD,eAAe,CAAC,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;CAChE;AAED;oDACoD;AACpD,MAAM,WAAW,iBAAkB,SAAQ,gBAAgB;IACzD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;gEACgE;AAChE,MAAM,MAAM,WAAW,GAAG,gBAAgB,CAAC;AAE3C,MAAM,WAAW,UAAU;IACzB,qBAAqB,CAAC,EAAE,CAAC,GAAG,EAAE,eAAe,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACvE,uBAAuB,CAAC,EAAE,CAAC,GAAG,EAAE,iBAAiB,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3E,wBAAwB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACvE;AAMD,MAAM,WAAW,mBAAmB;IAClC,8DAA8D;IAC9D,UAAU,EAAE,eAAe,CAAC;IAE5B,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAC;IAEb,uDAAuD;IACvD,IAAI,CAAC,EAAE,eAAe,CAAC;IAEvB,qEAAqE;IACrE,UAAU,CAAC,EAAE,oBAAoB,EAAE,CAAC;IAEpC,uDAAuD;IACvD,WAAW,CAAC,EAAE,sBAAsB,CAAC;IAErC;+BAC2B;IAC3B,qBAAqB,CAAC,EAAE,+BAA+B,EAAE,CAAC;IAE1D;;0DAEsD;IACtD,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAE5B;;;;;;;;oDAQgD;IAChD,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,KAAK,CAAC,EAAE,UAAU,CAAC;CACpB;AAMD;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,GAAG,IAAI,EAC5C,OAAO,EAAE,CAAC,IAAI,EAAE,KAAK,KAAK,mBAAmB,GAC5C,CAAC,IAAI,EAAE,KAAK,KAAK,mBAAmB,CAYtC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench-multi-tab.d.ts","sourceRoot":"","sources":["../../src/scripts/bench-multi-tab.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark agent success rate on the multi-tab "Pay with PayHover" flow.
|
|
3
|
+
*
|
|
4
|
+
* Why this exists — v0.10's central theme is "agent can drive cross-tab
|
|
5
|
+
* flows in the wild." The system-prompt addendum (cdpHint.ts rule 5/6/7)
|
|
6
|
+
* is the lever we're tuning. This script gives us a number to tune
|
|
7
|
+
* against: across N iterations, how often does the agent get from
|
|
8
|
+
* "browse the store" to "Order placed"?
|
|
9
|
+
*
|
|
10
|
+
* Per iteration the agent has to:
|
|
11
|
+
* 1. Browse the e-commerce store, add 1+ items to cart, go to checkout.
|
|
12
|
+
* 2. Fill the shipping form.
|
|
13
|
+
* 3. Pick "Pay with PayHover" (opens a new tab at localhost:5177).
|
|
14
|
+
* 4. Switch to the new tab, fill card number + CVV, click Continue.
|
|
15
|
+
* 5. Wait ~600ms for the simulated 3DS pre-check.
|
|
16
|
+
* 6. Fill the 6-digit OTP (always 123456 in the sandbox).
|
|
17
|
+
* 7. Click Confirm. The provider tab closes itself.
|
|
18
|
+
* 8. Switch back to the original tab, observe the "Order placed" view.
|
|
19
|
+
*
|
|
20
|
+
* Steps 3, 4, 7, 8 are the failure-prone ones.
|
|
21
|
+
*
|
|
22
|
+
* Assumes:
|
|
23
|
+
* - Debug Chrome on :9222 (run `pnpm smoke:chrome`).
|
|
24
|
+
* - e-commerce on :5174 AND payment-provider on :5177 both running
|
|
25
|
+
* (run `pnpm dev:example:e-commerce` and `pnpm dev:example:payment-provider`
|
|
26
|
+
* in two terminals before invoking this).
|
|
27
|
+
*
|
|
28
|
+
* Usage:
|
|
29
|
+
* pnpm --filter @hover-dev/core exec tsx src/scripts/bench-multi-tab.ts [n]
|
|
30
|
+
* pnpm bench-multi-tab [n]
|
|
31
|
+
*
|
|
32
|
+
* `n` defaults to 5. Per-iteration timeout is 5 minutes — multi-tab flows
|
|
33
|
+
* are slow because the agent does a lot of browser_snapshot calls.
|
|
34
|
+
*
|
|
35
|
+
* Output: per-run pass/fail + final summary (success rate, median wall
|
|
36
|
+
* time, median turns, median cost in $). A/B prompt changes by running
|
|
37
|
+
* once on each branch and comparing.
|
|
38
|
+
*/
|
|
39
|
+
import { WebSocket } from 'ws';
|
|
40
|
+
import { startService } from '../service.js';
|
|
41
|
+
const PROMPT = process.env.HOVER_BENCH_PROMPT ??
|
|
42
|
+
[
|
|
43
|
+
'Open http://localhost:5174 (Hover Store).',
|
|
44
|
+
'Add any item to the cart, go to checkout, fill the shipping form with',
|
|
45
|
+
'realistic values, then choose "Pay with PayHover". A new tab opens at',
|
|
46
|
+
'the payment provider — switch to it, fill in card 4242 4242 4242 4242',
|
|
47
|
+
'with CVV 123, click Continue, wait for the OTP step, enter 123456,',
|
|
48
|
+
'click Confirm. The popup will close. Switch back to the original tab',
|
|
49
|
+
'and verify the order shows as placed.',
|
|
50
|
+
].join(' ');
|
|
51
|
+
const ITERATIONS = Number(process.argv[2] ?? 5);
|
|
52
|
+
const PER_RUN_TIMEOUT_MS = 5 * 60 * 1000;
|
|
53
|
+
async function singleRun(idx) {
|
|
54
|
+
process.stderr.write(`\n[bench-multi-tab] run ${idx + 1}/${ITERATIONS}\n`);
|
|
55
|
+
const service = await startService({
|
|
56
|
+
port: 0,
|
|
57
|
+
agentId: 'claude',
|
|
58
|
+
model: 'sonnet',
|
|
59
|
+
cdpUrl: 'http://localhost:9222',
|
|
60
|
+
devRoot: process.cwd(),
|
|
61
|
+
});
|
|
62
|
+
return new Promise((resolve) => {
|
|
63
|
+
const ws = new WebSocket(`ws://127.0.0.1:${service.port}`);
|
|
64
|
+
const t0 = performance.now();
|
|
65
|
+
let turns = 0;
|
|
66
|
+
let costUsd = null;
|
|
67
|
+
let resolved = false;
|
|
68
|
+
const finish = (result) => {
|
|
69
|
+
if (resolved)
|
|
70
|
+
return;
|
|
71
|
+
resolved = true;
|
|
72
|
+
try {
|
|
73
|
+
ws.close(1000);
|
|
74
|
+
}
|
|
75
|
+
catch { /* already closed */ }
|
|
76
|
+
service.close().finally(() => resolve(result));
|
|
77
|
+
};
|
|
78
|
+
const timeout = setTimeout(() => {
|
|
79
|
+
finish({
|
|
80
|
+
ok: false,
|
|
81
|
+
wallMs: performance.now() - t0,
|
|
82
|
+
turns,
|
|
83
|
+
costUsd,
|
|
84
|
+
reason: `timed out after ${PER_RUN_TIMEOUT_MS / 1000}s`,
|
|
85
|
+
});
|
|
86
|
+
}, PER_RUN_TIMEOUT_MS);
|
|
87
|
+
ws.on('open', () => {
|
|
88
|
+
ws.send(JSON.stringify({ type: 'command', payload: { text: PROMPT } }));
|
|
89
|
+
});
|
|
90
|
+
ws.on('message', (raw) => {
|
|
91
|
+
let msg;
|
|
92
|
+
try {
|
|
93
|
+
msg = JSON.parse(raw.toString());
|
|
94
|
+
}
|
|
95
|
+
catch {
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
if (msg.type !== 'event')
|
|
99
|
+
return;
|
|
100
|
+
const ev = msg.payload;
|
|
101
|
+
if (ev.kind === 'tool_use') {
|
|
102
|
+
turns += 1;
|
|
103
|
+
if (process.env.HOVER_BENCH_VERBOSE === '1') {
|
|
104
|
+
const ev2 = ev;
|
|
105
|
+
process.stderr.write(` [turn ${turns}] ${ev2.name ?? '<tool>'}\n`);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
if (ev.kind === 'session_end') {
|
|
109
|
+
clearTimeout(timeout);
|
|
110
|
+
const evAny = ev;
|
|
111
|
+
if (typeof evAny.costUsd === 'number')
|
|
112
|
+
costUsd = evAny.costUsd;
|
|
113
|
+
finish({
|
|
114
|
+
ok: !evAny.isError,
|
|
115
|
+
wallMs: performance.now() - t0,
|
|
116
|
+
turns,
|
|
117
|
+
costUsd,
|
|
118
|
+
reason: evAny.isError ? 'agent reported error' : undefined,
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
});
|
|
122
|
+
ws.on('error', (err) => {
|
|
123
|
+
clearTimeout(timeout);
|
|
124
|
+
finish({
|
|
125
|
+
ok: false,
|
|
126
|
+
wallMs: performance.now() - t0,
|
|
127
|
+
turns,
|
|
128
|
+
costUsd,
|
|
129
|
+
reason: `WS error: ${err.message}`,
|
|
130
|
+
});
|
|
131
|
+
});
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
function median(xs) {
|
|
135
|
+
if (xs.length === 0)
|
|
136
|
+
return 0;
|
|
137
|
+
const sorted = [...xs].sort((a, b) => a - b);
|
|
138
|
+
const mid = Math.floor(sorted.length / 2);
|
|
139
|
+
return sorted.length % 2 === 0
|
|
140
|
+
? (sorted[mid - 1] + sorted[mid]) / 2
|
|
141
|
+
: sorted[mid];
|
|
142
|
+
}
|
|
143
|
+
function fmtMs(ms) {
|
|
144
|
+
return `${(ms / 1000).toFixed(1)}s`;
|
|
145
|
+
}
|
|
146
|
+
function fmtUsd(usd) {
|
|
147
|
+
return usd == null ? '–' : `$${usd.toFixed(4)}`;
|
|
148
|
+
}
|
|
149
|
+
async function main() {
|
|
150
|
+
process.stderr.write(`[bench-multi-tab] ${ITERATIONS} iterations, per-run timeout ${PER_RUN_TIMEOUT_MS / 1000}s\n`);
|
|
151
|
+
process.stderr.write(`[bench-multi-tab] prompt: ${PROMPT.slice(0, 80)}…\n`);
|
|
152
|
+
const results = [];
|
|
153
|
+
for (let i = 0; i < ITERATIONS; i++) {
|
|
154
|
+
try {
|
|
155
|
+
const r = await singleRun(i);
|
|
156
|
+
results.push(r);
|
|
157
|
+
const status = r.ok ? '✓ PASS' : '✗ FAIL';
|
|
158
|
+
process.stderr.write(`[bench-multi-tab] run ${i + 1}: ${status} · ${fmtMs(r.wallMs)} · ${r.turns} turns · ${fmtUsd(r.costUsd)}${r.reason ? ` · ${r.reason}` : ''}\n`);
|
|
159
|
+
}
|
|
160
|
+
catch (err) {
|
|
161
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
162
|
+
results.push({ ok: false, wallMs: 0, turns: 0, costUsd: null, reason: msg });
|
|
163
|
+
process.stderr.write(`[bench-multi-tab] run ${i + 1}: ✗ FAIL · setup error · ${msg}\n`);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
const passes = results.filter((r) => r.ok);
|
|
167
|
+
const successRate = passes.length / results.length;
|
|
168
|
+
process.stderr.write('\n[bench-multi-tab] summary\n');
|
|
169
|
+
process.stderr.write(` success rate: ${(successRate * 100).toFixed(0)}% (${passes.length}/${results.length})\n`);
|
|
170
|
+
if (passes.length > 0) {
|
|
171
|
+
process.stderr.write(` median wall: ${fmtMs(median(passes.map((r) => r.wallMs)))}\n`);
|
|
172
|
+
process.stderr.write(` median turns: ${median(passes.map((r) => r.turns)).toFixed(0)}\n`);
|
|
173
|
+
const costs = passes.map((r) => r.costUsd).filter((c) => c != null);
|
|
174
|
+
if (costs.length > 0) {
|
|
175
|
+
process.stderr.write(` median cost: ${fmtUsd(median(costs))}\n`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
if (passes.length < results.length) {
|
|
179
|
+
process.stderr.write(`\n failures:\n`);
|
|
180
|
+
results.forEach((r, i) => {
|
|
181
|
+
if (!r.ok)
|
|
182
|
+
process.stderr.write(` run ${i + 1}: ${r.reason ?? 'unknown'}\n`);
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
// Exit non-zero if EVERY run failed — useful for CI plumbing later. A
|
|
186
|
+
// partial-pass run still exits 0 so we collect signal across branches.
|
|
187
|
+
process.exit(passes.length === 0 ? 1 : 0);
|
|
188
|
+
}
|
|
189
|
+
main().catch((err) => {
|
|
190
|
+
process.stderr.write(`[bench-multi-tab] fatal: ${err instanceof Error ? err.stack ?? err.message : String(err)}\n`);
|
|
191
|
+
process.exit(1);
|
|
192
|
+
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cdpHint.d.ts","sourceRoot":"","sources":["../../src/service/cdpHint.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,UAAU,GAAG;IAAG,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE;AAa7C,wBAAgB,YAAY,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,MAAM,
|
|
1
|
+
{"version":3,"file":"cdpHint.d.ts","sourceRoot":"","sources":["../../src/service/cdpHint.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,UAAU,GAAG;IAAG,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE;AAa7C,wBAAgB,YAAY,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,MAAM,CAmIhD;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,MAAM,CAYtD"}
|
package/dist/service/cdpHint.js
CHANGED
|
@@ -101,6 +101,45 @@ export function buildCdpHint(tabs) {
|
|
|
101
101
|
` 4. To see the current page state, call browser_snapshot first. Only`,
|
|
102
102
|
` navigate if you actually need a different URL.`,
|
|
103
103
|
``,
|
|
104
|
+
`Multi-tab + cross-origin flows (Stripe Checkout, OAuth login, "Pay with X" popups):`,
|
|
105
|
+
``,
|
|
106
|
+
` 5. When you click something that may open a new tab (target=_blank, a`,
|
|
107
|
+
` window.open trigger, a "Pay with …" / "Sign in with …" button), the`,
|
|
108
|
+
` popup tab is where the next user-visible step happens — but your tools`,
|
|
109
|
+
` stay anchored to the prior tab until you switch. After such a click:`,
|
|
110
|
+
``,
|
|
111
|
+
` a) Call browser_tabs(action='list') to see if a new tab appeared.`,
|
|
112
|
+
` A new entry at a different origin is the popup.`,
|
|
113
|
+
` b) Call browser_tabs(action='select', idx=<popup idx>) to focus it,`,
|
|
114
|
+
` then browser_snapshot the new tab and proceed.`,
|
|
115
|
+
` c) When the popup closes (it usually does so on success/cancel —`,
|
|
116
|
+
` window.close() or after a redirect chain), browser_tabs(list)`,
|
|
117
|
+
` will no longer show it. The current page may be invalid; call`,
|
|
118
|
+
` browser_tabs(action='select', idx=0) to refocus the original tab,`,
|
|
119
|
+
` then browser_snapshot it. The original tab's DOM may have updated`,
|
|
120
|
+
` via a postMessage handler (e.g. it should now show a "Success" or`,
|
|
121
|
+
` "Payment complete" state).`,
|
|
122
|
+
` d) If the original tab's snapshot looks unchanged (still showing the`,
|
|
123
|
+
` checkout form / login button), the postMessage handler may not`,
|
|
124
|
+
` have fired yet or may not exist. Wait once with`,
|
|
125
|
+
` browser_wait_for_text("<expected success copy>", timeout=3000)`,
|
|
126
|
+
` before concluding the flow is broken.`,
|
|
127
|
+
``,
|
|
128
|
+
` 6. OAuth-style redirect chains: when a tab redirects through several`,
|
|
129
|
+
` origins (myapp → identity provider → /callback?code=… → myapp), watch`,
|
|
130
|
+
` browser_tabs after each browser_snapshot — the same tab idx can switch`,
|
|
131
|
+
` origin underneath you. The URL in browser_tabs(list) is authoritative.`,
|
|
132
|
+
``,
|
|
133
|
+
` 7. Cross-origin cookie/session updates: after the popup closes and you're`,
|
|
134
|
+
` back on the original tab, the server-set session cookie may be present`,
|
|
135
|
+
` in the browser but the React state hasn't yet picked it up. The most`,
|
|
136
|
+
` likely cause is a missing or slow postMessage handler — NOT a real`,
|
|
137
|
+
` bug yet. Try browser_wait_for_text once for the expected logged-in`,
|
|
138
|
+
` copy with a 3s timeout. If nothing shows, report it as a Finding`,
|
|
139
|
+
` ("Original tab did not update after popup closed — likely missing`,
|
|
140
|
+
` postMessage listener or auth refresh"); do NOT browser_navigate to`,
|
|
141
|
+
` same-origin to force a refresh (rule #2 still applies).`,
|
|
142
|
+
``,
|
|
104
143
|
`Narration format — affects how the widget renders your run for the user:`,
|
|
105
144
|
``,
|
|
106
145
|
` Before each LOGICAL STEP (a coherent unit of work like "Open the login`,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hover-dev/core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.10.0",
|
|
4
4
|
"description": "Hover's local Node service: agent invocation, Playwright CDP preflight, WebSocket bridge.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"author": "Hyperyond",
|
|
@@ -75,6 +75,7 @@
|
|
|
75
75
|
"verify-spec": "tsx src/scripts/verify-spec.ts",
|
|
76
76
|
"ws-smoke": "tsx src/scripts/ws-smoke.ts",
|
|
77
77
|
"bench-ttfb": "tsx src/scripts/bench-ttfb.ts",
|
|
78
|
+
"bench-multi-tab": "tsx src/scripts/bench-multi-tab.ts",
|
|
78
79
|
"test": "vitest run",
|
|
79
80
|
"test:watch": "vitest"
|
|
80
81
|
},
|