@hover-dev/core 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -68,11 +68,22 @@ export type InvokeEvent = {
68
68
  kind: 'usage';
69
69
  costUsd?: number;
70
70
  turns?: number;
71
- } | {
71
+ }
72
+ /** End-of-session event. Three terminal states the widget renders distinctly:
73
+ *
74
+ * - normal completion: `isError: false`, no `cancelled` flag
75
+ * - agent / runtime failure: `isError: true`, no `cancelled` flag
76
+ * - user-initiated stop: `cancelled: true` (and we leave `isError: false`
77
+ * so downstream "did the agent fail?" predicates don't conflate
78
+ * "user pressed Stop" with "agent crashed mid-run"). The widget
79
+ * renders this as a neutral "Stopped" state, not a red Failed card.
80
+ */
81
+ | {
72
82
  kind: 'session_end';
73
83
  turns?: number;
74
84
  costUsd?: number;
75
85
  isError?: boolean;
86
+ cancelled?: boolean;
76
87
  summary?: string;
77
88
  } | {
78
89
  kind: 'raw';
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/agents/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,MAAM,aAAa,GACrB,MAAM,GACN,OAAO,GACP,KAAK,GACL,QAAQ,CAAC;AAEb,MAAM,MAAM,YAAY,GACpB,aAAa,GACb,KAAK,GACL,YAAY,GACZ,YAAY,CAAC;AAEjB,qBAAa,6BAA8B,SAAQ,KAAK;gBAC1C,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,sBAAuB,SAAQ,KAAK;aACnB,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAI5C;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;yCAGqC;IACrC,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B;6EACyE;IACzE,MAAM,CAAC,EAAE,WAAW,CAAC;CACtB;AAED;;;GAGG;AACH,MAAM,MAAM,WAAW,GACnB;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5D;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACtD;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,OAAO,CAAC;IAAC,eAAe,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5E;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,OAAO,CAAC,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5D;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE;AAChC;;;qEAGqE;GACnE;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GACnD;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GAC9F;IAAE,IAAI,EAAE,KAAK,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AAElC;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,MAAM,eAAe,GAAG,MAAM,GAAG,MAAM,CAAC;AAE9C;;;GAGG;AACH,MAAM,WAAW,YAAY;IAC3B,oEAAoE;IACpE,KAAK,EAAE,MAAM,CAAC;IACd,8CAA8C;IAC9C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;4DACwD;IACxD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;mEAC+D;IAC/D,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAElD,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,aAAa,CAAC;IACxB,YAAY,EAAE,YAAY,CAAC;IAC3B,eAAe,EAAE,eAAe,CAAC;IACjC,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,CAAC,IAAI,EAAE,aAAa,GAAG,MAAM,EAAE,CAAC;IACzC;;;;;;OAMG;IACH,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,WAAW,GAAG,WAAW,EAAE,CAAC;IAC7D;;;;;;;;;OASG;IACH,WAAW,CAAC,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,CAAC,EAAE,WAAW,GAAG,WAAW,GAAG,IAAI,CAAC;CAChF"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/agents/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,MAAM,aAAa,GACrB,MAAM,GACN,OAAO,GACP,KAAK,GACL,QAAQ,CAAC;AAEb,MAAM,MAAM,YAAY,GACpB,aAAa,GACb,KAAK,GACL,YAAY,GACZ,YAAY,CAAC;AAEjB,qBAAa,6BAA8B,SAAQ,KAAK;gBAC1C,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,sBAAuB,SAAQ,KAAK;aACnB,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAI5C;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;yCAGqC;IACrC,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B;6EACyE;IACzE,MAAM,CAAC,EAAE,WAAW,CAAC;CACtB;AAED;;;GAGG;AACH,MAAM,MAAM,WAAW,GACnB;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5D;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACtD;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,OAAO,CAAC;IAAC,eAAe,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5E;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,OAAO,CAAC,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5D;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE;AAChC;;;qEAGqE;GACnE;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE;AACrD;;;;;;;;GAQG;GACD;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,OAAO,CAAC;IAAC,SAAS,CAAC,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GACnH;IAAE,IAAI,EAAE,KAAK,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AAElC;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,MAAM,eAAe,GAAG,MAAM,GAAG,MAAM,CAAC;AAE9C;;;GAGG;AACH,MAAM,WAAW,YAAY;IAC3B,oEAAoE;IACpE,KAAK,EAAE,MAAM,CAAC;IACd,8CAA8C;IAC9C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;4DACwD;IACxD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;mEAC+D;IAC/D,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAElD,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,aAAa,CAAC;IACxB,YAAY,EAAE,YAAY,CAAC;IAC3B,eAAe,EAAE,eAAe,CAAC;IACjC,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,CAAC,IAAI,EAAE,aAAa,GAAG,MAAM,EAAE,CAAC;IACzC;;;;;;OAMG;IACH,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,WAAW,GAAG,WAAW,EAAE,CAAC;IAC7D;;;;;;;;;OASG;IACH,WAAW,CAAC,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,CAAC,EAAE,WAAW,GAAG,WAAW,GAAG,IAAI,CAAC;CAChF"}
@@ -12,7 +12,7 @@
12
12
  * trigger a launch.
13
13
  */
14
14
  import { chromium } from 'playwright-core';
15
- import { preflightCDP } from './preflight.js';
15
+ import { getPreflight } from './preflightCache.js';
16
16
  import { findCdpPid, raiseChromeWindow } from './raiseWindow.js';
17
17
  /**
18
18
  * Parse a page URL down to its origin (protocol + host + port). We compare
@@ -33,7 +33,7 @@ export async function checkCdpStatus(cdpUrl, pageUrl) {
33
33
  // Treat unparseable page URLs as no-cdp so the UI nudges a relaunch.
34
34
  return { state: 'no-cdp', reason: `unparseable page URL: ${pageUrl}` };
35
35
  }
36
- const cdp = await preflightCDP(cdpUrl);
36
+ const cdp = await getPreflight(cdpUrl);
37
37
  if (!cdp.ok) {
38
38
  return { state: 'no-cdp', reason: cdp.reason };
39
39
  }
@@ -0,0 +1,27 @@
1
+ import { preflightCDP } from './preflight.js';
2
+ /**
3
+ * Per-cdpUrl preflight cache.
4
+ *
5
+ * Why this exists: Hover preflights the CDP endpoint (`/json/version` +
6
+ * `/json/list`) before every agent command AND on every widget connect's
7
+ * `check-cdp` ping. Each `/json/list` round-trip is ~30–80 ms on local
8
+ * Chrome; doing it twice for the same request — once from the command
9
+ * path, once from the widget's CDP banner — adds up, and the widget pings
10
+ * frequently because Vite HMR cycles the WebSocket connection.
11
+ *
12
+ * The cache is shared across both paths and keyed by `cdpUrl` so multiple
13
+ * Hover services (one per example app in this monorepo, each with its own
14
+ * CDP endpoint someday) don't share entries. 30 s TTL — Chrome's tab list
15
+ * doesn't drift faster than that during a dev session, and any failure
16
+ * (Chrome killed, --remote-debugging-port closed) invalidates via
17
+ * `invalidatePreflight()` on the next failed agent invocation.
18
+ *
19
+ * Successful preflights are cached; failures are not (so the user gets
20
+ * immediate feedback the next time they fix the underlying issue —
21
+ * starting Chrome, fixing the wrong port).
22
+ */
23
+ type Result = Awaited<ReturnType<typeof preflightCDP>>;
24
+ export declare function getPreflight(cdpUrl: string): Promise<Result>;
25
+ export declare function invalidatePreflight(cdpUrl: string): void;
26
+ export {};
27
+ //# sourceMappingURL=preflightCache.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"preflightCache.d.ts","sourceRoot":"","sources":["../../src/playwright/preflightCache.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE9C;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,KAAK,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,OAAO,YAAY,CAAC,CAAC,CAAC;AAMvD,wBAAsB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAalE;AAED,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAExD"}
@@ -0,0 +1,21 @@
1
+ import { preflightCDP } from './preflight.js';
2
+ const TTL_MS = 30_000;
3
+ const cache = new Map();
4
+ export async function getPreflight(cdpUrl) {
5
+ const now = Date.now();
6
+ const hit = cache.get(cdpUrl);
7
+ if (hit && hit.result.ok && now - hit.at < TTL_MS) {
8
+ return hit.result;
9
+ }
10
+ const result = await preflightCDP(cdpUrl);
11
+ if (result.ok) {
12
+ cache.set(cdpUrl, { result, at: now });
13
+ }
14
+ else {
15
+ cache.delete(cdpUrl);
16
+ }
17
+ return result;
18
+ }
19
+ export function invalidatePreflight(cdpUrl) {
20
+ cache.delete(cdpUrl);
21
+ }
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Resolve a ready-to-use MCP config file path that points at the local
3
+ * `@playwright/mcp` package via an absolute Node-resolved path.
4
+ *
5
+ * Why this exists: Hover originally shipped a static `mcp.config.json`
6
+ * with `"command": "npx", "args": ["-y", "@playwright/mcp@latest", …]`.
7
+ * That meant every `claude -p` invocation kicked off a registry lookup
8
+ * for `@latest` plus a tarball metadata round-trip before the MCP server
9
+ * even started — adding 300 ms - 2 s of dead air to first-token latency
10
+ * on every command (verified via `time npx -y @playwright/mcp@latest`).
11
+ *
12
+ * The fix is to (a) declare `@playwright/mcp` as a real dependency of
13
+ * `@hover-dev/core` so npm resolves it locally at install time, and
14
+ * (b) write a synthetic config file pointing `node <abs-path>/cli.js`
15
+ * at the resolved location. No registry hit on the hot path.
16
+ *
17
+ * The config file is written to `<tmpdir>/hover/mcp-config-<port>.json`,
18
+ * which lets multiple Hover services (one per example app) coexist
19
+ * without stepping on each other's CDP endpoint.
20
+ */
21
+ export declare function resolveMcpConfig(opts: {
22
+ /** CDP URL passed to the MCP server's `--cdp-endpoint` flag. */
23
+ cdpUrl: string;
24
+ /** Service port — used to namespace the temp config file. */
25
+ port: number;
26
+ }): string;
27
+ //# sourceMappingURL=resolveMcpConfig.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"resolveMcpConfig.d.ts","sourceRoot":"","sources":["../../src/playwright/resolveMcpConfig.ts"],"names":[],"mappings":"AAMA;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE;IACrC,gEAAgE;IAChE,MAAM,EAAE,MAAM,CAAC;IACf,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,MAAM,CAsCT"}
@@ -0,0 +1,62 @@
1
+ import { createRequire } from 'node:module';
2
+ import { mkdirSync, writeFileSync } from 'node:fs';
3
+ import { dirname, resolve } from 'node:path';
4
+ import { tmpdir } from 'node:os';
5
+ import process from 'node:process';
6
+ /**
7
+ * Resolve a ready-to-use MCP config file path that points at the local
8
+ * `@playwright/mcp` package via an absolute Node-resolved path.
9
+ *
10
+ * Why this exists: Hover originally shipped a static `mcp.config.json`
11
+ * with `"command": "npx", "args": ["-y", "@playwright/mcp@latest", …]`.
12
+ * That meant every `claude -p` invocation kicked off a registry lookup
13
+ * for `@latest` plus a tarball metadata round-trip before the MCP server
14
+ * even started — adding 300 ms - 2 s of dead air to first-token latency
15
+ * on every command (verified via `time npx -y @playwright/mcp@latest`).
16
+ *
17
+ * The fix is to (a) declare `@playwright/mcp` as a real dependency of
18
+ * `@hover-dev/core` so npm resolves it locally at install time, and
19
+ * (b) write a synthetic config file pointing `node <abs-path>/cli.js`
20
+ * at the resolved location. No registry hit on the hot path.
21
+ *
22
+ * The config file is written to `<tmpdir>/hover/mcp-config-<port>.json`,
23
+ * which lets multiple Hover services (one per example app) coexist
24
+ * without stepping on each other's CDP endpoint.
25
+ */
26
+ export function resolveMcpConfig(opts) {
27
+ // Resolve the package's main file, then walk back to its package root.
28
+ // Using `package.json` as the resolution target is the documented
29
+ // Node.js pattern for locating an installed package's directory
30
+ // regardless of its main/exports map.
31
+ //
32
+ // The resolution starts from `process.cwd()`, NOT `import.meta.url`.
33
+ // When this module is dynamically imported through Next 16's Turbopack
34
+ // (via `@hover-dev/next`'s `register-node.js`), `import.meta.url` is
35
+ // a virtual "[project]/..." URL that doesn't resolve to a real file
36
+ // on disk — `createRequire` accepts the URL but the resulting
37
+ // `require.resolve('@playwright/mcp/...')` walks the wrong tree and
38
+ // emits a "[project]/..." prefix in the result, which Claude Code
39
+ // can't actually load. `process.cwd()` is the user's project root,
40
+ // and `@playwright/mcp` is always reachable from there because it's
41
+ // a declared dependency of `@hover-dev/core`, which the user installed.
42
+ const require = createRequire(resolve(process.cwd(), 'package.json'));
43
+ const pkgJsonPath = require.resolve('@playwright/mcp/package.json');
44
+ const pkgRoot = dirname(pkgJsonPath);
45
+ // The package's `bin` map declares "playwright-mcp": "cli.js" — we
46
+ // pin to that file directly via Node so the user doesn't need the
47
+ // bin shim on PATH and we skip yet another resolution layer.
48
+ const cliPath = resolve(pkgRoot, 'cli.js');
49
+ const config = {
50
+ mcpServers: {
51
+ playwright: {
52
+ command: process.execPath, // current Node binary
53
+ args: [cliPath, '--cdp-endpoint', opts.cdpUrl],
54
+ },
55
+ },
56
+ };
57
+ const outDir = resolve(tmpdir(), 'hover');
58
+ mkdirSync(outDir, { recursive: true });
59
+ const outPath = resolve(outDir, `mcp-config-${opts.port}.json`);
60
+ writeFileSync(outPath, JSON.stringify(config, null, 2), 'utf-8');
61
+ return outPath;
62
+ }
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=bench-ttfb.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bench-ttfb.d.ts","sourceRoot":"","sources":["../../src/scripts/bench-ttfb.ts"],"names":[],"mappings":""}
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Benchmark "time to first tool_use" for the LLM-driven loop.
3
+ *
4
+ * Assumes:
5
+ * - A debug Chrome is running on :9222 (start with `pnpm smoke:chrome`).
6
+ * - A dev server is running so the agent has something to drive
7
+ * (`pnpm dev:example:basic-app`).
8
+ *
9
+ * Per iteration:
10
+ * - Start a fresh Hover service (cold — kills any prior service to avoid
11
+ * cached MCP process state across iterations).
12
+ * - WS-connect, send a fixed command, mark t0 right before send().
13
+ * - Mark t1 on the first tool_use event from the agent.
14
+ * - Report (t1 - t0) in milliseconds. Close service + WS.
15
+ *
16
+ * pnpm --filter @hover-dev/core exec tsx src/scripts/bench-ttfb.ts <n>
17
+ *
18
+ * `n` defaults to 5. Prints individual timings + median + min/max.
19
+ */
20
+ import { WebSocket } from 'ws';
21
+ import { startService } from '../service.js';
22
+ const PROMPT = process.env.HOVER_BENCH_PROMPT ?? 'Take a snapshot of the page.';
23
+ const ITERATIONS = Number(process.argv[2] ?? 5);
24
+ async function singleRun() {
25
+ const service = await startService({
26
+ // Use 0 to let the kernel pick — avoids cross-iter EADDRINUSE races.
27
+ port: 0,
28
+ agentId: 'claude',
29
+ model: 'sonnet',
30
+ cdpUrl: 'http://localhost:9222',
31
+ devRoot: process.cwd(),
32
+ });
33
+ return new Promise((resolve, reject) => {
34
+ const ws = new WebSocket(`ws://127.0.0.1:${service.port}`);
35
+ let t0 = 0;
36
+ let resolved = false;
37
+ const timeout = setTimeout(() => {
38
+ if (!resolved) {
39
+ ws.close(1000);
40
+ service.close();
41
+ reject(new Error('timed out waiting for first tool_use after 60s'));
42
+ }
43
+ }, 60_000);
44
+ ws.on('open', () => {
45
+ t0 = performance.now();
46
+ ws.send(JSON.stringify({ type: 'command', payload: { text: PROMPT } }));
47
+ });
48
+ ws.on('message', raw => {
49
+ let msg;
50
+ try {
51
+ msg = JSON.parse(raw.toString());
52
+ }
53
+ catch {
54
+ return;
55
+ }
56
+ if (process.env.HOVER_BENCH_VERBOSE === '1') {
57
+ process.stderr.write(` [event] ${raw.toString().slice(0, 200)}\n`);
58
+ }
59
+ if (msg.type !== 'event')
60
+ return;
61
+ const ev = msg.payload;
62
+ if (ev.kind === 'tool_use' && !resolved) {
63
+ const t1 = performance.now();
64
+ const ms = t1 - t0;
65
+ resolved = true;
66
+ clearTimeout(timeout);
67
+ ws.close(1000);
68
+ service.close().finally(() => resolve(ms));
69
+ }
70
+ if (ev.kind === 'session_end' && !resolved) {
71
+ // Ran without any tool_use — agent went text-only or errored.
72
+ // Reject so the bench surfaces the issue instead of recording
73
+ // a misleadingly tiny "first tool_use" timing.
74
+ resolved = true;
75
+ clearTimeout(timeout);
76
+ const evAny = ev;
77
+ const reason = evAny.isError ? 'session_end (error)' : 'session_end without tool_use';
78
+ ws.close(1000);
79
+ service.close().finally(() => reject(new Error(reason)));
80
+ }
81
+ });
82
+ ws.on('error', err => {
83
+ if (resolved)
84
+ return;
85
+ resolved = true;
86
+ clearTimeout(timeout);
87
+ service.close().finally(() => reject(err));
88
+ });
89
+ });
90
+ }
91
+ function median(xs) {
92
+ const sorted = [...xs].sort((a, b) => a - b);
93
+ const mid = Math.floor(sorted.length / 2);
94
+ return sorted.length % 2 === 0
95
+ ? (sorted[mid - 1] + sorted[mid]) / 2
96
+ : sorted[mid];
97
+ }
98
+ async function main() {
99
+ console.log(`prompt: ${JSON.stringify(PROMPT)}`);
100
+ console.log(`iterations: ${ITERATIONS}`);
101
+ console.log('');
102
+ const results = [];
103
+ for (let i = 1; i <= ITERATIONS; i++) {
104
+ try {
105
+ const ms = await singleRun();
106
+ results.push(ms);
107
+ console.log(` run ${i}: ${ms.toFixed(0).padStart(5)} ms`);
108
+ }
109
+ catch (err) {
110
+ console.error(` run ${i}: FAILED — ${err instanceof Error ? err.message : String(err)}`);
111
+ }
112
+ // Small gap between runs so any process-cleanup tail can flush.
113
+ await new Promise(r => setTimeout(r, 500));
114
+ }
115
+ if (results.length === 0) {
116
+ console.error('\nNo successful runs.');
117
+ process.exit(1);
118
+ }
119
+ console.log('');
120
+ console.log(`min: ${Math.min(...results).toFixed(0)} ms`);
121
+ console.log(`median: ${median(results).toFixed(0)} ms`);
122
+ console.log(`max: ${Math.max(...results).toFixed(0)} ms`);
123
+ }
124
+ main().catch(err => {
125
+ console.error(err);
126
+ process.exit(1);
127
+ });
@@ -12,9 +12,37 @@
12
12
  *
13
13
  * Lives in its own file because this string is the most-tuned text in the
14
14
  * repo and the easiest to break with a typo. Tests can import directly.
15
+ *
16
+ * Two-tier split (since v0.4.x perf pass):
17
+ * - `buildCdpHint(tabs)` returns the full rules + narration block.
18
+ * Used on the *first* turn of a session (no `--resume`).
19
+ * - `buildCdpHintResume(tabs)` returns ONLY the volatile tab list +
20
+ * active-origin guard. Used on subsequent turns once `--resume`
21
+ * re-anchors the agent to the prior turn's full system prompt —
22
+ * the stable rules are already in context, so re-sending them
23
+ * fragments Anthropic's prompt cache and bills ~500 extra input
24
+ * tokens per turn for zero behavioural change.
15
25
  */
16
- export declare function buildCdpHint(tabs: {
26
+ interface Tab {
17
27
  url: string;
18
28
  title?: string;
19
- }[]): string;
29
+ }
30
+ export declare function buildCdpHint(tabs: Tab[]): string;
31
+ /**
32
+ * Volatile-only hint for `--resume` turns: just the tab list snapshot.
33
+ * Empty string when the tab list is empty (nothing to refresh).
34
+ *
35
+ * The rules and narration format from `buildCdpHint` are already
36
+ * established in the prior turn's context; re-sending them here would
37
+ * fragment Anthropic's prompt-cache fingerprint (cache hits require the
38
+ * system prompt to match byte-for-byte across turns) and bill ~500
39
+ * extra input tokens per follow-up turn for no behaviour change.
40
+ *
41
+ * We DO re-send the tab list because it can drift between turns (user
42
+ * opens a second tab, switches focus). The active-origin nav-guard is
43
+ * not repeated — the agent has it from turn 1 and the tab-list update
44
+ * keeps it grounded in the current URL.
45
+ */
46
+ export declare function buildCdpHintResume(tabs: Tab[]): string;
47
+ export {};
20
48
  //# sourceMappingURL=cdpHint.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"cdpHint.d.ts","sourceRoot":"","sources":["../../src/service/cdpHint.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,wBAAgB,YAAY,CAAC,IAAI,EAAE;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,EAAE,GAAG,MAAM,CAmE5E"}
1
+ {"version":3,"file":"cdpHint.d.ts","sourceRoot":"","sources":["../../src/service/cdpHint.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,UAAU,GAAG;IAAG,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE;AAa7C,wBAAgB,YAAY,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,MAAM,CA4FhD;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,MAAM,CAYtD"}
@@ -12,10 +12,20 @@
12
12
  *
13
13
  * Lives in its own file because this string is the most-tuned text in the
14
14
  * repo and the easiest to break with a typo. Tests can import directly.
15
+ *
16
+ * Two-tier split (since v0.4.x perf pass):
17
+ * - `buildCdpHint(tabs)` returns the full rules + narration block.
18
+ * Used on the *first* turn of a session (no `--resume`).
19
+ * - `buildCdpHintResume(tabs)` returns ONLY the volatile tab list +
20
+ * active-origin guard. Used on subsequent turns once `--resume`
21
+ * re-anchors the agent to the prior turn's full system prompt —
22
+ * the stable rules are already in context, so re-sending them
23
+ * fragments Anthropic's prompt cache and bills ~500 extra input
24
+ * tokens per turn for zero behavioural change.
15
25
  */
16
- export function buildCdpHint(tabs) {
26
+ function resolveActiveOrigin(tabs) {
17
27
  if (tabs.length === 0)
18
- return '';
28
+ return null;
19
29
  // Prefer the localhost tab if we have multiple — that's almost always the
20
30
  // dev server the user is testing against.
21
31
  const localhost = tabs.find(t => /localhost|127\.0\.0\.1/.test(t.url));
@@ -25,7 +35,43 @@ export function buildCdpHint(tabs) {
25
35
  activeOrigin = new URL(active.url).origin;
26
36
  }
27
37
  catch { /* malformed url — fall back to no-origin guard */ }
38
+ return { active, activeOrigin };
39
+ }
40
+ export function buildCdpHint(tabs) {
41
+ const resolved = resolveActiveOrigin(tabs);
42
+ if (!resolved)
43
+ return '';
44
+ const { active, activeOrigin } = resolved;
28
45
  return [
46
+ `Your job — read this first:`,
47
+ ``,
48
+ ` You are an end-to-end testing agent. Your standing mission is to drive`,
49
+ ` the user's web app through the browser, EXERCISE its interactive`,
50
+ ` surface, and report bugs or unexpected behaviour.`,
51
+ ``,
52
+ ` If the user's prompt is specific ("log in as alice and add a todo"),`,
53
+ ` do that and verify the outcome.`,
54
+ ``,
55
+ ` If the user's prompt is vague or short ("test", "check", "see if it`,
56
+ ` works", "find bugs", or a single word), DO NOT ask for clarification`,
57
+ ` and DO NOT just take a snapshot and call it done. Run a real`,
58
+ ` exploratory test pass:`,
59
+ ``,
60
+ ` 1. browser_snapshot to learn the app's structure.`,
61
+ ` 2. Identify the main interactive surfaces (forms, buttons, links,`,
62
+ ` inputs, navigation). Plan 2–5 distinct user flows to exercise.`,
63
+ ` 3. Drive each flow end-to-end. Submit forms with real-ish input,`,
64
+ ` click through navigation, exercise lists / counters / toggles.`,
65
+ ` Try a couple of edge cases — empty submissions, invalid input,`,
66
+ ` boundary values — and observe the response.`,
67
+ ` 4. Note anything that looks broken, inconsistent, slow, or`,
68
+ ` confusing in the final summary's "## Findings" section.`,
69
+ ``,
70
+ ` A short "App is running fine" reply after one snapshot is NOT an`,
71
+ ` acceptable result. Either the app actually works and you ran several`,
72
+ ` flows to confirm it, or you found something interesting — those are`,
73
+ ` the only two valid outcomes of a vague prompt.`,
74
+ ``,
29
75
  `The user's Chrome currently has these tabs open:`,
30
76
  ...tabs.map(t => ` - ${t.url}${t.title ? ` (${t.title})` : ''}`),
31
77
  ``,
@@ -84,3 +130,32 @@ export function buildCdpHint(tabs) {
84
130
  ` final summary so they group cleanly. Mid-run, just narrate the next step.`,
85
131
  ].join('\n');
86
132
  }
133
+ /**
134
+ * Volatile-only hint for `--resume` turns: just the tab list snapshot.
135
+ * Empty string when the tab list is empty (nothing to refresh).
136
+ *
137
+ * The rules and narration format from `buildCdpHint` are already
138
+ * established in the prior turn's context; re-sending them here would
139
+ * fragment Anthropic's prompt-cache fingerprint (cache hits require the
140
+ * system prompt to match byte-for-byte across turns) and bill ~500
141
+ * extra input tokens per follow-up turn for no behaviour change.
142
+ *
143
+ * We DO re-send the tab list because it can drift between turns (user
144
+ * opens a second tab, switches focus). The active-origin nav-guard is
145
+ * not repeated — the agent has it from turn 1 and the tab-list update
146
+ * keeps it grounded in the current URL.
147
+ */
148
+ export function buildCdpHintResume(tabs) {
149
+ const resolved = resolveActiveOrigin(tabs);
150
+ if (!resolved)
151
+ return '';
152
+ const { active } = resolved;
153
+ return [
154
+ `(Resumed session — full nav + narration rules already in context.)`,
155
+ ``,
156
+ `Current Chrome tabs:`,
157
+ ...tabs.map(t => ` - ${t.url}${t.title ? ` (${t.title})` : ''}`),
158
+ ``,
159
+ `Likely active dev tab: ${active.url}`,
160
+ ].join('\n');
161
+ }
@@ -1 +1 @@
1
- {"version":3,"file":"service.d.ts","sourceRoot":"","sources":["../src/service.ts"],"names":[],"mappings":"AAmEA,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gFAAgF;IAChF,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;6EAGyE;IACzE,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B;4EACwE;IACxE,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAiDD,wBAAsB,YAAY,CAAC,IAAI,EAAE,cAAc,GAAG,OAAO,CAAC,aAAa,CAAC,CAwV/E"}
1
+ {"version":3,"file":"service.d.ts","sourceRoot":"","sources":["../src/service.ts"],"names":[],"mappings":"AA+DA,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gFAAgF;IAChF,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;6EAGyE;IACzE,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B;4EACwE;IACxE,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAiDD,wBAAsB,YAAY,CAAC,IAAI,EAAE,cAAc,GAAG,OAAO,CAAC,aAAa,CAAC,CAiV/E"}
package/dist/service.js CHANGED
@@ -35,20 +35,17 @@
35
35
  * server → client (in addition to those documented in the file body):
36
36
  * { type: 'agents', payload: { current: string, available: AgentAvailability[] } }
37
37
  */
38
- import { dirname, resolve } from 'node:path';
39
- import { fileURLToPath } from 'node:url';
40
38
  import { WebSocketServer, WebSocket } from 'ws';
41
39
  import { invokeAgent } from './agents/invoke.js';
42
40
  import { listAgentAvailability, pickPrimaryAgent, } from './agents/detect.js';
43
41
  import { getAgent } from './agents/registry.js';
44
- import { preflightCDP } from './playwright/preflight.js';
42
+ import { getPreflight, invalidatePreflight } from './playwright/preflightCache.js';
43
+ import { resolveMcpConfig } from './playwright/resolveMcpConfig.js';
45
44
  import { listSkills } from './skills/writeSkill.js';
46
45
  import { send } from './service/types.js';
47
- import { buildCdpHint } from './service/cdpHint.js';
46
+ import { buildCdpHint, buildCdpHintResume } from './service/cdpHint.js';
48
47
  import { handleCheckCdp, handleLaunchChrome, handleFocusDebug, } from './service/cdpHandlers.js';
49
48
  import { handleSaveArtifact, SKILL_CONFIG, SPEC_CONFIG, CASE_CSV_CONFIG, } from './service/saveHandlers.js';
50
- const HERE = dirname(fileURLToPath(import.meta.url));
51
- const DEFAULT_MCP_CONFIG = resolve(HERE, '..', 'mcp.config.json');
52
49
  // ClientMessage + send moved to ./service/types.ts so the cdp + save
53
50
  // handler modules can share them. See those files for the wire shape.
54
51
  const PROTOCOL_VERSION = 1;
@@ -121,11 +118,15 @@ export async function startService(opts) {
121
118
  // so the user can hit Stop when they've seen enough. Pass maxBudgetUsd
122
119
  // explicitly (or via the Vite plugin option) if a hard ceiling is needed.
123
120
  const maxBudgetUsd = opts.maxBudgetUsd;
124
- const mcpConfig = opts.mcpConfig ?? DEFAULT_MCP_CONFIG;
125
121
  const cdpUrl = opts.cdpUrl ?? 'http://localhost:9222';
126
122
  const devRoot = opts.devRoot ?? process.cwd();
127
123
  const wss = await pickAndBind('127.0.0.1', requestedPort, PORT_RETRIES);
128
124
  const port = wss.address().port;
125
+ // Resolve a CDP-pinned MCP config pointing at our local
126
+ // `@playwright/mcp` install. See resolveMcpConfig.ts for the rationale
127
+ // (avoids `npx -y @playwright/mcp@latest`'s registry round-trip on
128
+ // every command — 300 ms - 2 s of hot-path latency).
129
+ const mcpConfig = opts.mcpConfig ?? resolveMcpConfig({ cdpUrl, port });
129
130
  // Surface post-listen errors instead of crashing the host process.
130
131
  wss.on('error', err => {
131
132
  process.stderr.write(`[hover] WebSocketServer error: ${err.message}\n`);
@@ -141,38 +142,10 @@ export async function startService(opts) {
141
142
  }
142
143
  return agentAvailabilityCache;
143
144
  };
144
- // Cache the CDP preflight result for a short window. preflightCDP() does
145
- // two HTTP roundtrips to Chrome's debug endpoint (/json/version +
146
- // /json/list); on a multi-turn session that's 100-300ms of latency before
147
- // every follow-up directly observable as a pause between hitting send
148
- // and the agent starting work. A 5s TTL is comfortably shorter than the
149
- // time it takes a user to type+send another message, but long enough that
150
- // back-to-back commands skip the roundtrip. Failures are NOT cached so
151
- // the user gets immediate feedback when they fix the underlying issue
152
- // (e.g. start Chrome). Cache is invalidated whenever an invocation fails
153
- // (defensive: if MCP somehow spawned its own Chromium we want the next
154
- // preflight to re-probe).
155
- const PREFLIGHT_TTL_MS = 5000;
156
- let cachedPreflight = null;
157
- let cachedPreflightAt = 0;
158
- const getPreflight = async () => {
159
- const now = Date.now();
160
- if (cachedPreflight?.ok && now - cachedPreflightAt < PREFLIGHT_TTL_MS) {
161
- return cachedPreflight;
162
- }
163
- const result = await preflightCDP(cdpUrl);
164
- if (result.ok) {
165
- cachedPreflight = result;
166
- cachedPreflightAt = now;
167
- }
168
- else {
169
- cachedPreflight = null;
170
- }
171
- return result;
172
- };
173
- const invalidatePreflight = () => {
174
- cachedPreflight = null;
175
- };
145
+ // The CDP preflight cache (shared between this service's command path
146
+ // and the widget's `check-cdp` ping via `cdpStatus.checkCdpStatus`)
147
+ // lives in ./playwright/preflightCache.ts. 30-second TTL, keyed by
148
+ // cdpUrl. See that file for the rationale.
176
149
  const broadcastAgents = async () => {
177
150
  const available = await getAvailability(false);
178
151
  const payload = { current: currentAgentId, available };
@@ -209,11 +182,18 @@ export async function startService(opts) {
209
182
  // Send a synthetic session_end so the widget resets to idle immediately.
210
183
  // The for-await loop below short-circuits on `cancelled`, so no events
211
184
  // from the dying child will arrive after this.
185
+ //
186
+ // `cancelled: true` is the load-bearing field — it lets the widget
187
+ // distinguish "user pressed Stop" from "agent crashed". `isError`
188
+ // stays false because the agent didn't fail: the user chose to
189
+ // end the run. The widget renders this as a neutral "Stopped"
190
+ // state rather than a red Failed card.
212
191
  send(ws, {
213
192
  type: 'event',
214
193
  payload: {
215
194
  kind: 'session_end',
216
- isError: true,
195
+ isError: false,
196
+ cancelled: true,
217
197
  summary: 'cancelled by user',
218
198
  },
219
199
  });
@@ -324,7 +304,7 @@ export async function startService(opts) {
324
304
  // Playwright MCP server would silently launch its own Chromium —
325
305
  // and Hover's premise is to drive the user's existing Chrome (with
326
306
  // their dev state, cookies, devtools open), never spawn a fresh one.
327
- const cdp = await getPreflight();
307
+ const cdp = await getPreflight(cdpUrl);
328
308
  if (!cdp.ok) {
329
309
  send(ws, {
330
310
  type: 'event',
@@ -342,7 +322,15 @@ export async function startService(opts) {
342
322
  // a wasteful full-page reload that also destroys the Hover widget
343
323
  // momentarily (the widget re-injects + recovers, but the agent's
344
324
  // own session sometimes gets confused).
345
- const appendSystemPrompt = buildCdpHint(cdp.tabs);
325
+ // First turn pays the full rules + narration block; follow-up
326
+ // turns (`resumeSessionId` set) get only the volatile tab list.
327
+ // The static rules are already in the prior turn's context, and
328
+ // re-sending them fragments Anthropic's prompt-cache fingerprint
329
+ // (cache hits require byte-identical system prompts across turns).
330
+ // See cdpHint.ts for the why.
331
+ const appendSystemPrompt = resumeSessionId
332
+ ? buildCdpHintResume(cdp.tabs)
333
+ : buildCdpHint(cdp.tabs);
346
334
  // Snapshot the agent id so a switch-agent message during the run
347
335
  // can't smear two agents across one invocation. (We also gate
348
336
  // switch-agent on `busy`, but defense in depth.)
@@ -411,7 +399,7 @@ export async function startService(opts) {
411
399
  // Chrome dying, MCP spawning a stray Chromium, the user closing
412
400
  // their debug window — anything that would make a cached "all
413
401
  // healthy" result lie.
414
- invalidatePreflight();
402
+ invalidatePreflight(cdpUrl);
415
403
  }
416
404
  finally {
417
405
  busy = false;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hover-dev/core",
3
- "version": "0.3.1",
3
+ "version": "0.3.3",
4
4
  "description": "Hover's local Node service: agent invocation, Playwright CDP preflight, WebSocket bridge.",
5
5
  "license": "Apache-2.0",
6
6
  "author": "Hyperyond",
@@ -42,10 +42,10 @@
42
42
  },
43
43
  "files": [
44
44
  "dist",
45
- "mcp.config.json",
46
45
  "README.md"
47
46
  ],
48
47
  "dependencies": {
48
+ "@playwright/mcp": "0.0.75",
49
49
  "cross-spawn": "^7.0.6",
50
50
  "playwright-core": "^1.50.0",
51
51
  "ws": "^8.20.1"
@@ -70,6 +70,7 @@
70
70
  "verify-skill": "tsx src/scripts/verify-skill.ts",
71
71
  "verify-spec": "tsx src/scripts/verify-spec.ts",
72
72
  "ws-smoke": "tsx src/scripts/ws-smoke.ts",
73
+ "bench-ttfb": "tsx src/scripts/bench-ttfb.ts",
73
74
  "test": "vitest run",
74
75
  "test:watch": "vitest"
75
76
  },
package/mcp.config.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "mcpServers": {
3
- "playwright": {
4
- "command": "npx",
5
- "args": [
6
- "-y",
7
- "@playwright/mcp@latest",
8
- "--cdp-endpoint", "http://localhost:9222"
9
- ]
10
- }
11
- }
12
- }