@hover-dev/core 0.16.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -55
- package/dist/agentDirectives.d.ts +55 -0
- package/dist/agentDirectives.d.ts.map +1 -0
- package/dist/agentDirectives.js +276 -0
- package/dist/engine.d.ts +28 -0
- package/dist/engine.d.ts.map +1 -0
- package/dist/engine.js +27 -0
- package/dist/memory/businessMemory.d.ts +29 -0
- package/dist/memory/businessMemory.d.ts.map +1 -0
- package/dist/memory/businessMemory.js +125 -0
- package/dist/playwright/launchChrome.d.ts +18 -0
- package/dist/playwright/launchChrome.d.ts.map +1 -1
- package/dist/playwright/launchChrome.js +46 -3
- package/dist/qa/candidates.d.ts +32 -0
- package/dist/qa/candidates.d.ts.map +1 -0
- package/dist/qa/candidates.js +20 -0
- package/dist/qa/intensity.d.ts +33 -0
- package/dist/qa/intensity.d.ts.map +1 -0
- package/dist/qa/intensity.js +25 -0
- package/dist/qa/qaReport.d.ts +19 -0
- package/dist/qa/qaReport.d.ts.map +1 -0
- package/dist/qa/qaReport.js +50 -0
- package/dist/sessions/sessions.d.ts +125 -0
- package/dist/sessions/sessions.d.ts.map +1 -0
- package/dist/sessions/sessions.js +175 -0
- package/dist/specs/authFixture.d.ts +30 -0
- package/dist/specs/authFixture.d.ts.map +1 -0
- package/dist/specs/authFixture.js +145 -0
- package/dist/specs/detectSharedFlows.d.ts +1 -1
- package/dist/specs/detectSharedFlows.d.ts.map +1 -1
- package/dist/specs/detectSharedFlows.js +20 -21
- package/dist/specs/generatePageObject.d.ts +1 -1
- package/dist/specs/generatePageObject.d.ts.map +1 -1
- package/dist/specs/healPrompt.d.ts +19 -0
- package/dist/specs/healPrompt.d.ts.map +1 -0
- package/dist/specs/healPrompt.js +48 -0
- package/dist/specs/humanSteps.d.ts +4 -8
- package/dist/specs/humanSteps.d.ts.map +1 -1
- package/dist/specs/humanSteps.js +6 -1
- package/dist/specs/optimizeSpec.d.ts +15 -8
- package/dist/specs/optimizeSpec.d.ts.map +1 -1
- package/dist/specs/optimizeSpec.js +71 -41
- package/dist/specs/pageObjectManifest.d.ts +3 -1
- package/dist/specs/pageObjectManifest.d.ts.map +1 -1
- package/dist/specs/pageObjectManifest.js +24 -19
- package/dist/specs/replayGrounded.d.ts +45 -0
- package/dist/specs/replayGrounded.d.ts.map +1 -0
- package/dist/specs/replayGrounded.js +155 -0
- package/dist/specs/runFailures.d.ts +34 -0
- package/dist/specs/runFailures.d.ts.map +1 -0
- package/dist/specs/runFailures.js +93 -0
- package/dist/specs/seeds.d.ts +16 -15
- package/dist/specs/seeds.d.ts.map +1 -1
- package/dist/specs/seeds.js +86 -54
- package/dist/specs/sidecar.d.ts +34 -6
- package/dist/specs/sidecar.d.ts.map +1 -1
- package/dist/specs/sidecar.js +79 -9
- package/dist/specs/specStep.d.ts +21 -0
- package/dist/specs/specStep.d.ts.map +1 -0
- package/dist/specs/specStep.js +1 -0
- package/dist/specs/text.d.ts +8 -6
- package/dist/specs/text.d.ts.map +1 -1
- package/dist/specs/text.js +10 -7
- package/dist/specs/writeSpec.d.ts +62 -1
- package/dist/specs/writeSpec.d.ts.map +1 -1
- package/dist/specs/writeSpec.js +596 -21
- package/package.json +9 -29
- package/dist/agents/aider.d.ts +0 -16
- package/dist/agents/aider.d.ts.map +0 -1
- package/dist/agents/aider.js +0 -161
- package/dist/agents/argv.d.ts +0 -11
- package/dist/agents/argv.d.ts.map +0 -1
- package/dist/agents/argv.js +0 -23
- package/dist/agents/claude.d.ts +0 -3
- package/dist/agents/claude.d.ts.map +0 -1
- package/dist/agents/claude.js +0 -195
- package/dist/agents/codex.d.ts +0 -19
- package/dist/agents/codex.d.ts.map +0 -1
- package/dist/agents/codex.js +0 -216
- package/dist/agents/cursor.d.ts +0 -18
- package/dist/agents/cursor.d.ts.map +0 -1
- package/dist/agents/cursor.js +0 -220
- package/dist/agents/detect.d.ts +0 -46
- package/dist/agents/detect.d.ts.map +0 -1
- package/dist/agents/detect.js +0 -80
- package/dist/agents/gemini.d.ts +0 -17
- package/dist/agents/gemini.d.ts.map +0 -1
- package/dist/agents/gemini.js +0 -186
- package/dist/agents/index.d.ts +0 -6
- package/dist/agents/index.d.ts.map +0 -1
- package/dist/agents/index.js +0 -5
- package/dist/agents/invoke.d.ts +0 -12
- package/dist/agents/invoke.d.ts.map +0 -1
- package/dist/agents/invoke.js +0 -96
- package/dist/agents/qwen.d.ts +0 -17
- package/dist/agents/qwen.d.ts.map +0 -1
- package/dist/agents/qwen.js +0 -172
- package/dist/agents/registry.d.ts +0 -19
- package/dist/agents/registry.d.ts.map +0 -1
- package/dist/agents/registry.js +0 -34
- package/dist/agents/shared.d.ts +0 -28
- package/dist/agents/shared.d.ts.map +0 -1
- package/dist/agents/shared.js +0 -35
- package/dist/agents/types.d.ts +0 -186
- package/dist/agents/types.d.ts.map +0 -1
- package/dist/agents/types.js +0 -23
- package/dist/index.d.ts +0 -3
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -2
- package/dist/mcp/sourceFence.d.ts +0 -23
- package/dist/mcp/sourceFence.d.ts.map +0 -1
- package/dist/mcp/sourceFence.js +0 -75
- package/dist/mcp/sourceServer.d.ts +0 -3
- package/dist/mcp/sourceServer.d.ts.map +0 -1
- package/dist/mcp/sourceServer.js +0 -116
- package/dist/playwright/cdpStatus.d.ts +0 -29
- package/dist/playwright/cdpStatus.d.ts.map +0 -1
- package/dist/playwright/cdpStatus.js +0 -119
- package/dist/playwright/preflight.d.ts +0 -31
- package/dist/playwright/preflight.d.ts.map +0 -1
- package/dist/playwright/preflight.js +0 -82
- package/dist/playwright/preflightCache.d.ts +0 -27
- package/dist/playwright/preflightCache.d.ts.map +0 -1
- package/dist/playwright/preflightCache.js +0 -21
- package/dist/playwright/raiseWindow.d.ts +0 -10
- package/dist/playwright/raiseWindow.d.ts.map +0 -1
- package/dist/playwright/raiseWindow.js +0 -158
- package/dist/playwright/resolveMcpConfig.d.ts +0 -55
- package/dist/playwright/resolveMcpConfig.d.ts.map +0 -1
- package/dist/playwright/resolveMcpConfig.js +0 -66
- package/dist/plugin-api.d.ts +0 -235
- package/dist/plugin-api.d.ts.map +0 -1
- package/dist/plugin-api.js +0 -52
- package/dist/runSession.d.ts +0 -42
- package/dist/runSession.d.ts.map +0 -1
- package/dist/runSession.js +0 -81
- package/dist/scripts/bench-multi-tab.d.ts +0 -2
- package/dist/scripts/bench-multi-tab.d.ts.map +0 -1
- package/dist/scripts/bench-multi-tab.js +0 -192
- package/dist/scripts/bench-ttfb.d.ts +0 -2
- package/dist/scripts/bench-ttfb.d.ts.map +0 -1
- package/dist/scripts/bench-ttfb.js +0 -127
- package/dist/scripts/start-chrome.d.ts +0 -3
- package/dist/scripts/start-chrome.d.ts.map +0 -1
- package/dist/scripts/start-chrome.js +0 -23
- package/dist/service/cdpHandlers.d.ts +0 -44
- package/dist/service/cdpHandlers.d.ts.map +0 -1
- package/dist/service/cdpHandlers.js +0 -85
- package/dist/service/cdpHint.d.ts +0 -48
- package/dist/service/cdpHint.d.ts.map +0 -1
- package/dist/service/cdpHint.js +0 -216
- package/dist/service/conventions.d.ts +0 -8
- package/dist/service/conventions.d.ts.map +0 -1
- package/dist/service/conventions.js +0 -42
- package/dist/service/saveHandlers.d.ts +0 -52
- package/dist/service/saveHandlers.d.ts.map +0 -1
- package/dist/service/saveHandlers.js +0 -75
- package/dist/service/types.d.ts +0 -58
- package/dist/service/types.d.ts.map +0 -1
- package/dist/service/types.js +0 -26
- package/dist/service.d.ts +0 -50
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js +0 -1065
- package/dist/skills/writeSkill.d.ts +0 -27
- package/dist/skills/writeSkill.d.ts.map +0 -1
- package/dist/skills/writeSkill.js +0 -13
- package/dist/specs/extractPageObjects.d.ts +0 -18
- package/dist/specs/extractPageObjects.d.ts.map +0 -1
- package/dist/specs/extractPageObjects.js +0 -98
- package/dist/specs/listSpecs.d.ts +0 -52
- package/dist/specs/listSpecs.d.ts.map +0 -1
- package/dist/specs/listSpecs.js +0 -139
- package/dist/specs/optimizationSuggestion.d.ts +0 -26
- package/dist/specs/optimizationSuggestion.d.ts.map +0 -1
- package/dist/specs/optimizationSuggestion.js +0 -28
- package/dist/specs/optimizeSpecWithAgent.d.ts +0 -11
- package/dist/specs/optimizeSpecWithAgent.d.ts.map +0 -1
- package/dist/specs/optimizeSpecWithAgent.js +0 -40
- package/dist/specs/writeCaseCsv.d.ts +0 -28
- package/dist/specs/writeCaseCsv.d.ts.map +0 -1
- package/dist/specs/writeCaseCsv.js +0 -134
package/dist/plugin-api.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"plugin-api.d.ts","sourceRoot":"","sources":["../src/plugin-api.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH;;;;GAIG;AACH,MAAM,MAAM,eAAe,GAAG,CAAC,CAAC;AAChC,eAAO,MAAM,mBAAmB,EAAE,eAAmB,CAAC;AAMtD,MAAM,WAAW,eAAe;IAC9B,uEAAuE;IACvE,EAAE,EAAE,MAAM,CAAC;IACX,4DAA4D;IAC5D,KAAK,EAAE,MAAM,CAAC;IACd,iDAAiD;IACjD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;+BAE2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;gEAC4D;IAC5D,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB;;;;;qDAKiD;IACjD,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,oBAAoB;IACnC;gDAC4C;IAC5C,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B;8DAC0D;IAC1D,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,sBAAsB;IACrC,qDAAqD;IACrD,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB;iFAC6E;IAC7E,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;2EACuE;IACvE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;yDAEqD;IACrD,KAAK,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACvC,6EAA6E;IAC7E,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,MAAM,CAAC;IACb;uDACmD;IACnD,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAMD,MAAM,WAAW,cAAc;IAC7B;sEACkE;IAClE,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,IAAI,CAAC;CACpD;AAED,MAAM,WAAW,gBAAgB;IAC/B;;yBAEqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,qDAAqD;IACrD,SAAS,EAAE,cAAc,CAAC;CAC3B;AAED;;2EAE2E;AAC3E,MAAM,WAAW,eAAgB,SAAQ,gBAAgB;IACvD,MAAM,EAAE,MAAM,CAAC;IACf;6DACyD;IACzD,cAAc,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,GAAG,IAAI,CAAC;IACnE;;;;;sDAKkD;IAClD,eAAe,CAAC,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;CAChE;AAED;oDACoD;AACpD,MAAM,WAAW,iBAAkB,SAAQ,gBAAgB;IACzD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;kDAMkD;AAClD,MAAM,WAAW,eAAgB,SAAQ,gBAAgB;IACvD;oEACgE;IAChE,cAAc,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,GAAG,IAAI,CAAC;IACnE;sCACkC;IAClC,eAAe,CAAC,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;CAChE;AAED;gEACgE;AAChE,MAAM,MAAM,WAAW,GAAG,gBAAgB,CAAC;AAE3C,MAAM,WAAW,UAAU;IACzB,qBAAqB,CAAC,EAAE,CAAC,GAAG,EAAE,eAAe,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACvE,qBAAqB,CAAC,EAAE,CAAC,GAAG,EAAE,eAAe,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACvE,uBAAuB,CAAC,EAAE,CAAC,GAAG,EAAE,iBAAiB,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3E,wBAAwB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACvE;AAMD,MAAM,WAAW,mBAAmB;IAClC,8DAA8D;IAC9D,UAAU,EAAE,eAAe,CAAC;IAE5B,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAC;IAEb,uDAAuD;IACvD,IAAI,CAAC,EAAE,eAAe,CAAC;IAEvB,qEAAqE;IACrE,UAAU,CAAC,EAAE,oBAAoB,EAAE,CAAC;IAEpC,uDAAuD;IACvD,WAAW,CAAC,EAAE,sBAAsB,CAAC;IAErC;+BAC2B;IAC3B,qBAAqB,CAAC,EAAE,+BAA+B,EAAE,CAAC;IAE1D;;0DAEsD;IACtD,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAE5B;;;;;;;;oDAQgD;IAChD,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB;;;;;;;6BAOyB;IACzB,YAAY,CAAC,EAAE,sBAAsB,EAAE,CAAC;IAExC,KAAK,CAAC,EAAE,UAAU,CAAC;CACpB;AAED,MAAM,WAAW,sBAAsB;IACrC;;4EAEwE;IACxE,IAAI,EAAE,MAAM,CAAC;IACb,8EAA8E;IAC9E,KAAK,EAAE,MAAM,CAAC;IACd;2EACuE;IACvE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;oEACgE;IAChE,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB;;;4DAGwD;IACxD,MAAM,CAAC,GAAG,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,OAAO,CAAA;KAAE,GAAG,OAAO,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAC7F;AAMD;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,GAAG,IAAI,EAC5C,OAAO,EAAE,CAAC,IAAI,EAAE,KAAK,KAAK,mBAAmB,GAC5C,CAAC,IAAI,EAAE,KAAK,KAAK,mBAAmB,CAYtC"}
|
package/dist/plugin-api.js
DELETED
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Hover plugin API — the public contract third-party packages target.
|
|
3
|
-
*
|
|
4
|
-
* Plugins are *mostly declarative*: they ship a manifest describing what
|
|
5
|
-
* resources they contribute (a mode, MCP servers, Chrome flags, agent
|
|
6
|
-
* prompt fragments, widget event schemas). For genuinely time-bound work
|
|
7
|
-
* — booting a sidecar like mockttp when a mode activates, tearing it down
|
|
8
|
-
* when the mode deactivates or the service shuts down — they register
|
|
9
|
-
* namespaced lifecycle hooks.
|
|
10
|
-
*
|
|
11
|
-
* Patterned after Astro Integrations (declarative manifest + namespaced
|
|
12
|
-
* hooks: `astro:config:setup` etc). The `apiVersion` literal lets us
|
|
13
|
-
* evolve the manifest and reject mismatched plugins at load time with a
|
|
14
|
-
* clear error rather than silent breakage.
|
|
15
|
-
*
|
|
16
|
-
* Stability:
|
|
17
|
-
* - `apiVersion: 1` is what this file declares; breaking changes bump.
|
|
18
|
-
* - Adding new optional fields or new hook names is non-breaking.
|
|
19
|
-
* - Plugin authors should import only from this module; deep imports
|
|
20
|
-
* into `@hover-dev/core` internals are not part of the contract.
|
|
21
|
-
*/
|
|
22
|
-
export const CURRENT_API_VERSION = 1;
|
|
23
|
-
// ──────────────────────────────────────────────────────────────────────
|
|
24
|
-
// Author helper
|
|
25
|
-
// ──────────────────────────────────────────────────────────────────────
|
|
26
|
-
/**
|
|
27
|
-
* Branded factory that wraps a plugin manifest factory. The wrapper
|
|
28
|
-
* - asserts `apiVersion` matches this core's version at construction time
|
|
29
|
-
* (catches authors who copy-pasted from a tutorial for a different core),
|
|
30
|
-
* - returns a `(opts) => manifest` so call sites read `securityMode()` /
|
|
31
|
-
* `perfMode({ sampleHz: 100 })` uniformly.
|
|
32
|
-
*
|
|
33
|
-
* Use:
|
|
34
|
-
*
|
|
35
|
-
* export default defineHoverPlugin<MyOpts>((opts) => ({
|
|
36
|
-
* apiVersion: 1,
|
|
37
|
-
* name: '@hover-dev/security',
|
|
38
|
-
* mode: { id: 'security', label: 'Security testing' },
|
|
39
|
-
* ...
|
|
40
|
-
* }));
|
|
41
|
-
*/
|
|
42
|
-
export function defineHoverPlugin(factory) {
|
|
43
|
-
return (opts) => {
|
|
44
|
-
const manifest = factory(opts);
|
|
45
|
-
if (manifest.apiVersion !== CURRENT_API_VERSION) {
|
|
46
|
-
throw new Error(`[hover] plugin "${manifest.name}" targets apiVersion ` +
|
|
47
|
-
`${String(manifest.apiVersion)} but this Hover supports ` +
|
|
48
|
-
`${CURRENT_API_VERSION}. Update either the plugin or @hover-dev/core.`);
|
|
49
|
-
}
|
|
50
|
-
return manifest;
|
|
51
|
-
};
|
|
52
|
-
}
|
package/dist/runSession.d.ts
DELETED
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import type { InvokeEvent } from './agents/types.js';
|
|
2
|
-
import type { SkillStep } from './skills/writeSkill.js';
|
|
3
|
-
export interface RunSessionOptions {
|
|
4
|
-
prompt: string;
|
|
5
|
-
agentId: string;
|
|
6
|
-
/** CDP URL of the debug Chrome the agent drives. Required unless `mcpConfig`
|
|
7
|
-
* is supplied (the service passes a pre-built config; the CLI passes this). */
|
|
8
|
-
cdpUrl?: string;
|
|
9
|
-
model?: string;
|
|
10
|
-
maxBudgetUsd?: number;
|
|
11
|
-
/** Optional model API key, injected into the spawned CLI's env. */
|
|
12
|
-
apiKey?: string;
|
|
13
|
-
/** Agent cwd (project root) — where Claude Code reads CLAUDE.md and where a
|
|
14
|
-
* `--save` / re-record writes the spec. Defaults to the process cwd. */
|
|
15
|
-
cwd?: string;
|
|
16
|
-
/** Namespaces the temp MCP config filename. Defaults to 51789. */
|
|
17
|
-
port?: number;
|
|
18
|
-
signal?: AbortSignal;
|
|
19
|
-
/** Pre-built MCP config path. The service supplies one (with plugin servers);
|
|
20
|
-
* when omitted, runSession builds a plugin-free Playwright config from
|
|
21
|
-
* `cdpUrl` via resolveMcpConfig. */
|
|
22
|
-
mcpConfig?: string;
|
|
23
|
-
/** Extra hard-sandbox allow-list prefixes — e.g. active-mode plugin MCP
|
|
24
|
-
* server ids the service contributes. Appended to ['mcp__playwright']. */
|
|
25
|
-
allowedToolsExtra?: string[];
|
|
26
|
-
/** Appended to the agent's system prompt (the service folds in cdpHint +
|
|
27
|
-
* conventions + plugin additions + a language directive; the CLI omits it). */
|
|
28
|
-
appendSystemPrompt?: string;
|
|
29
|
-
/** Resume an existing agent session (a follow-up turn). */
|
|
30
|
-
sessionId?: string;
|
|
31
|
-
}
|
|
32
|
-
export interface RunSessionResult {
|
|
33
|
-
/** Captured session as SpecStep[] (`user` → `step`* → `done`), ready to hand
|
|
34
|
-
* straight to `writeSpec`. */
|
|
35
|
-
steps: SkillStep[];
|
|
36
|
-
/** The agent's final summary, if any. */
|
|
37
|
-
summary: string;
|
|
38
|
-
/** True if the run ended in error or was aborted. */
|
|
39
|
-
isError: boolean;
|
|
40
|
-
}
|
|
41
|
-
export declare function runSession(opts: RunSessionOptions, onEvent: (ev: InvokeEvent) => void): Promise<RunSessionResult>;
|
|
42
|
-
//# sourceMappingURL=runSession.d.ts.map
|
package/dist/runSession.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"runSession.d.ts","sourceRoot":"","sources":["../src/runSession.ts"],"names":[],"mappings":"AAoBA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AAGxD,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB;oFACgF;IAChF,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,mEAAmE;IACnE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;6EACyE;IACzE,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,kEAAkE;IAClE,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB;;yCAEqC;IACrC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;+EAC2E;IAC3E,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B;oFACgF;IAChF,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,2DAA2D;IAC3D,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B;mCAC+B;IAC/B,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,yCAAyC;IACzC,OAAO,EAAE,MAAM,CAAC;IAChB,qDAAqD;IACrD,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,wBAAsB,UAAU,CAC9B,IAAI,EAAE,iBAAiB,EACvB,OAAO,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,GACjC,OAAO,CAAC,gBAAgB,CAAC,CA4D3B"}
|
package/dist/runSession.js
DELETED
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Headless session runner — the invoke + crystallize engine shared by every
|
|
3
|
-
* frontend. The widget reaches it through the WebSocket service; `smoke.ts`
|
|
4
|
-
* and (future) `hover run` call it in-process, no WS server. It spawns the
|
|
5
|
-
* agent against the user's debug Chrome over CDP, streams normalized events to
|
|
6
|
-
* `onEvent`, and accumulates the captured tool calls into a `SpecStep[]` the
|
|
7
|
-
* caller can hand to `writeSpec` — `user` seed → `step` per tool_use → `done`
|
|
8
|
-
* with the final summary (the exact shape the spec pipeline consumes).
|
|
9
|
-
*
|
|
10
|
-
* No WebSocket, no DOM. It drives an *already-running* debug Chrome over CDP;
|
|
11
|
-
* launching Chrome / CDP preflight is the caller's call (the service does it
|
|
12
|
-
* with autoLaunch; the CLI will too). The sandbox (allow/deny tools) mirrors
|
|
13
|
-
* the service exactly, gated on the agent's `sandboxStrength`.
|
|
14
|
-
*
|
|
15
|
-
* The full surface (mcpConfig override, allowedToolsExtra, appendSystemPrompt,
|
|
16
|
-
* sessionId) lets the service delegate to this instead of duplicating the
|
|
17
|
-
* invoke loop; the CLI uses only the small subset (prompt + cdpUrl + model).
|
|
18
|
-
*/
|
|
19
|
-
import { invokeAgent } from './agents/invoke.js';
|
|
20
|
-
import { getAgent } from './agents/registry.js';
|
|
21
|
-
import { resolveMcpConfig } from './playwright/resolveMcpConfig.js';
|
|
22
|
-
export async function runSession(opts, onEvent) {
|
|
23
|
-
const descriptor = getAgent(opts.agentId);
|
|
24
|
-
const isHardSandbox = descriptor?.sandboxStrength === 'hard';
|
|
25
|
-
// Seed with a synthetic `user` step so writeSpec's JSDoc `Original prompt:`
|
|
26
|
-
// line carries the prompt the agent was given (mirrors the service path).
|
|
27
|
-
const steps = [{ kind: 'user', text: opts.prompt }];
|
|
28
|
-
let summary = '';
|
|
29
|
-
let isError = false;
|
|
30
|
-
const mcpConfig = opts.mcpConfig ??
|
|
31
|
-
resolveMcpConfig({
|
|
32
|
-
cdpUrl: opts.cdpUrl ?? 'http://localhost:9222',
|
|
33
|
-
port: opts.port ?? 51789,
|
|
34
|
-
// Resolve @playwright/mcp from the run's cwd, not the dir the CLI was
|
|
35
|
-
// invoked from — `hover run --cwd apps/web` must find the MCP package
|
|
36
|
-
// under the target workspace in a monorepo.
|
|
37
|
-
cwd: opts.cwd,
|
|
38
|
-
});
|
|
39
|
-
for await (const ev of invokeAgent({
|
|
40
|
-
agentId: opts.agentId,
|
|
41
|
-
prompt: opts.prompt,
|
|
42
|
-
sessionId: opts.sessionId,
|
|
43
|
-
mcpConfig,
|
|
44
|
-
cwd: opts.cwd,
|
|
45
|
-
appendSystemPrompt: opts.appendSystemPrompt,
|
|
46
|
-
// Hard sandbox: only Playwright MCP (+ any active-mode plugin servers) is
|
|
47
|
-
// callable, every built-in tool denied — a hijacked prompt can't reach the
|
|
48
|
-
// shell or filesystem. Soft agents (codex, …) enforce their own sandbox via
|
|
49
|
-
// buildArgs, so the lists stay undefined for them — exactly what the
|
|
50
|
-
// service does.
|
|
51
|
-
allowedTools: isHardSandbox
|
|
52
|
-
? ['mcp__playwright', ...(opts.allowedToolsExtra ?? [])]
|
|
53
|
-
: undefined,
|
|
54
|
-
disallowedTools: isHardSandbox
|
|
55
|
-
? (descriptor?.defaultDisallowedTools ? [...descriptor.defaultDisallowedTools] : undefined)
|
|
56
|
-
: undefined,
|
|
57
|
-
maxBudgetUsd: opts.maxBudgetUsd,
|
|
58
|
-
model: opts.model,
|
|
59
|
-
apiKey: opts.apiKey,
|
|
60
|
-
signal: opts.signal,
|
|
61
|
-
})) {
|
|
62
|
-
onEvent(ev);
|
|
63
|
-
if (ev.kind === 'tool_use') {
|
|
64
|
-
steps.push({ kind: 'step', tool: ev.tool, input: ev.input });
|
|
65
|
-
}
|
|
66
|
-
else if (ev.kind === 'session_end') {
|
|
67
|
-
if (ev.summary)
|
|
68
|
-
summary = ev.summary;
|
|
69
|
-
if (ev.isError)
|
|
70
|
-
isError = true;
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
// On abort (opts.signal), invokeAgent SIGTERMs the child and no session_end
|
|
74
|
-
// arrives, so the error flag above never gets set. Honour the doc contract
|
|
75
|
-
// ("True if the run ended in error or was aborted") by flipping it here.
|
|
76
|
-
if (opts.signal?.aborted)
|
|
77
|
-
isError = true;
|
|
78
|
-
if (summary)
|
|
79
|
-
steps.push({ kind: 'done', summary });
|
|
80
|
-
return { steps, summary, isError };
|
|
81
|
-
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"bench-multi-tab.d.ts","sourceRoot":"","sources":["../../src/scripts/bench-multi-tab.ts"],"names":[],"mappings":""}
|
|
@@ -1,192 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Benchmark agent success rate on the multi-tab "Pay with PayHover" flow.
|
|
3
|
-
*
|
|
4
|
-
* Why this exists — v0.10's central theme is "agent can drive cross-tab
|
|
5
|
-
* flows in the wild." The system-prompt addendum (cdpHint.ts rule 5/6/7)
|
|
6
|
-
* is the lever we're tuning. This script gives us a number to tune
|
|
7
|
-
* against: across N iterations, how often does the agent get from
|
|
8
|
-
* "browse the store" to "Order placed"?
|
|
9
|
-
*
|
|
10
|
-
* Per iteration the agent has to:
|
|
11
|
-
* 1. Browse the e-commerce store, add 1+ items to cart, go to checkout.
|
|
12
|
-
* 2. Fill the shipping form.
|
|
13
|
-
* 3. Pick "Pay with PayHover" (opens a new tab at localhost:5177).
|
|
14
|
-
* 4. Switch to the new tab, fill card number + CVV, click Continue.
|
|
15
|
-
* 5. Wait ~600ms for the simulated 3DS pre-check.
|
|
16
|
-
* 6. Fill the 6-digit OTP (always 123456 in the sandbox).
|
|
17
|
-
* 7. Click Confirm. The provider tab closes itself.
|
|
18
|
-
* 8. Switch back to the original tab, observe the "Order placed" view.
|
|
19
|
-
*
|
|
20
|
-
* Steps 3, 4, 7, 8 are the failure-prone ones.
|
|
21
|
-
*
|
|
22
|
-
* Assumes:
|
|
23
|
-
* - Debug Chrome on :9222 (run `pnpm smoke:chrome`).
|
|
24
|
-
* - e-commerce on :5174 AND payment-provider on :5177 both running
|
|
25
|
-
* (run `pnpm dev:example:e-commerce` and `pnpm dev:example:payment-provider`
|
|
26
|
-
* in two terminals before invoking this).
|
|
27
|
-
*
|
|
28
|
-
* Usage:
|
|
29
|
-
* pnpm --filter @hover-dev/core exec tsx src/scripts/bench-multi-tab.ts [n]
|
|
30
|
-
* pnpm bench-multi-tab [n]
|
|
31
|
-
*
|
|
32
|
-
* `n` defaults to 5. Per-iteration timeout is 5 minutes — multi-tab flows
|
|
33
|
-
* are slow because the agent does a lot of browser_snapshot calls.
|
|
34
|
-
*
|
|
35
|
-
* Output: per-run pass/fail + final summary (success rate, median wall
|
|
36
|
-
* time, median turns, median cost in $). A/B prompt changes by running
|
|
37
|
-
* once on each branch and comparing.
|
|
38
|
-
*/
|
|
39
|
-
import { WebSocket } from 'ws';
|
|
40
|
-
import { startService } from '../service.js';
|
|
41
|
-
const PROMPT = process.env.HOVER_BENCH_PROMPT ??
|
|
42
|
-
[
|
|
43
|
-
'Open http://localhost:5174 (Hover Store).',
|
|
44
|
-
'Add any item to the cart, go to checkout, fill the shipping form with',
|
|
45
|
-
'realistic values, then choose "Pay with PayHover". A new tab opens at',
|
|
46
|
-
'the payment provider — switch to it, fill in card 4242 4242 4242 4242',
|
|
47
|
-
'with CVV 123, click Continue, wait for the OTP step, enter 123456,',
|
|
48
|
-
'click Confirm. The popup will close. Switch back to the original tab',
|
|
49
|
-
'and verify the order shows as placed.',
|
|
50
|
-
].join(' ');
|
|
51
|
-
const ITERATIONS = Number(process.argv[2] ?? 5);
|
|
52
|
-
const PER_RUN_TIMEOUT_MS = 5 * 60 * 1000;
|
|
53
|
-
async function singleRun(idx) {
|
|
54
|
-
process.stderr.write(`\n[bench-multi-tab] run ${idx + 1}/${ITERATIONS}\n`);
|
|
55
|
-
const service = await startService({
|
|
56
|
-
port: 0,
|
|
57
|
-
agentId: 'claude',
|
|
58
|
-
model: 'sonnet',
|
|
59
|
-
cdpUrl: 'http://localhost:9222',
|
|
60
|
-
devRoot: process.cwd(),
|
|
61
|
-
});
|
|
62
|
-
return new Promise((resolve) => {
|
|
63
|
-
const ws = new WebSocket(`ws://127.0.0.1:${service.port}`);
|
|
64
|
-
const t0 = performance.now();
|
|
65
|
-
let turns = 0;
|
|
66
|
-
let costUsd = null;
|
|
67
|
-
let resolved = false;
|
|
68
|
-
const finish = (result) => {
|
|
69
|
-
if (resolved)
|
|
70
|
-
return;
|
|
71
|
-
resolved = true;
|
|
72
|
-
try {
|
|
73
|
-
ws.close(1000);
|
|
74
|
-
}
|
|
75
|
-
catch { /* already closed */ }
|
|
76
|
-
service.close().finally(() => resolve(result));
|
|
77
|
-
};
|
|
78
|
-
const timeout = setTimeout(() => {
|
|
79
|
-
finish({
|
|
80
|
-
ok: false,
|
|
81
|
-
wallMs: performance.now() - t0,
|
|
82
|
-
turns,
|
|
83
|
-
costUsd,
|
|
84
|
-
reason: `timed out after ${PER_RUN_TIMEOUT_MS / 1000}s`,
|
|
85
|
-
});
|
|
86
|
-
}, PER_RUN_TIMEOUT_MS);
|
|
87
|
-
ws.on('open', () => {
|
|
88
|
-
ws.send(JSON.stringify({ type: 'command', payload: { text: PROMPT } }));
|
|
89
|
-
});
|
|
90
|
-
ws.on('message', (raw) => {
|
|
91
|
-
let msg;
|
|
92
|
-
try {
|
|
93
|
-
msg = JSON.parse(raw.toString());
|
|
94
|
-
}
|
|
95
|
-
catch {
|
|
96
|
-
return;
|
|
97
|
-
}
|
|
98
|
-
if (msg.type !== 'event')
|
|
99
|
-
return;
|
|
100
|
-
const ev = msg.payload;
|
|
101
|
-
if (ev.kind === 'tool_use') {
|
|
102
|
-
turns += 1;
|
|
103
|
-
if (process.env.HOVER_BENCH_VERBOSE === '1') {
|
|
104
|
-
const ev2 = ev;
|
|
105
|
-
process.stderr.write(` [turn ${turns}] ${ev2.name ?? '<tool>'}\n`);
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
if (ev.kind === 'session_end') {
|
|
109
|
-
clearTimeout(timeout);
|
|
110
|
-
const evAny = ev;
|
|
111
|
-
if (typeof evAny.costUsd === 'number')
|
|
112
|
-
costUsd = evAny.costUsd;
|
|
113
|
-
finish({
|
|
114
|
-
ok: !evAny.isError,
|
|
115
|
-
wallMs: performance.now() - t0,
|
|
116
|
-
turns,
|
|
117
|
-
costUsd,
|
|
118
|
-
reason: evAny.isError ? 'agent reported error' : undefined,
|
|
119
|
-
});
|
|
120
|
-
}
|
|
121
|
-
});
|
|
122
|
-
ws.on('error', (err) => {
|
|
123
|
-
clearTimeout(timeout);
|
|
124
|
-
finish({
|
|
125
|
-
ok: false,
|
|
126
|
-
wallMs: performance.now() - t0,
|
|
127
|
-
turns,
|
|
128
|
-
costUsd,
|
|
129
|
-
reason: `WS error: ${err.message}`,
|
|
130
|
-
});
|
|
131
|
-
});
|
|
132
|
-
});
|
|
133
|
-
}
|
|
134
|
-
function median(xs) {
|
|
135
|
-
if (xs.length === 0)
|
|
136
|
-
return 0;
|
|
137
|
-
const sorted = [...xs].sort((a, b) => a - b);
|
|
138
|
-
const mid = Math.floor(sorted.length / 2);
|
|
139
|
-
return sorted.length % 2 === 0
|
|
140
|
-
? (sorted[mid - 1] + sorted[mid]) / 2
|
|
141
|
-
: sorted[mid];
|
|
142
|
-
}
|
|
143
|
-
function fmtMs(ms) {
|
|
144
|
-
return `${(ms / 1000).toFixed(1)}s`;
|
|
145
|
-
}
|
|
146
|
-
function fmtUsd(usd) {
|
|
147
|
-
return usd == null ? '–' : `$${usd.toFixed(4)}`;
|
|
148
|
-
}
|
|
149
|
-
async function main() {
|
|
150
|
-
process.stderr.write(`[bench-multi-tab] ${ITERATIONS} iterations, per-run timeout ${PER_RUN_TIMEOUT_MS / 1000}s\n`);
|
|
151
|
-
process.stderr.write(`[bench-multi-tab] prompt: ${PROMPT.slice(0, 80)}…\n`);
|
|
152
|
-
const results = [];
|
|
153
|
-
for (let i = 0; i < ITERATIONS; i++) {
|
|
154
|
-
try {
|
|
155
|
-
const r = await singleRun(i);
|
|
156
|
-
results.push(r);
|
|
157
|
-
const status = r.ok ? '✓ PASS' : '✗ FAIL';
|
|
158
|
-
process.stderr.write(`[bench-multi-tab] run ${i + 1}: ${status} · ${fmtMs(r.wallMs)} · ${r.turns} turns · ${fmtUsd(r.costUsd)}${r.reason ? ` · ${r.reason}` : ''}\n`);
|
|
159
|
-
}
|
|
160
|
-
catch (err) {
|
|
161
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
162
|
-
results.push({ ok: false, wallMs: 0, turns: 0, costUsd: null, reason: msg });
|
|
163
|
-
process.stderr.write(`[bench-multi-tab] run ${i + 1}: ✗ FAIL · setup error · ${msg}\n`);
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
const passes = results.filter((r) => r.ok);
|
|
167
|
-
const successRate = passes.length / results.length;
|
|
168
|
-
process.stderr.write('\n[bench-multi-tab] summary\n');
|
|
169
|
-
process.stderr.write(` success rate: ${(successRate * 100).toFixed(0)}% (${passes.length}/${results.length})\n`);
|
|
170
|
-
if (passes.length > 0) {
|
|
171
|
-
process.stderr.write(` median wall: ${fmtMs(median(passes.map((r) => r.wallMs)))}\n`);
|
|
172
|
-
process.stderr.write(` median turns: ${median(passes.map((r) => r.turns)).toFixed(0)}\n`);
|
|
173
|
-
const costs = passes.map((r) => r.costUsd).filter((c) => c != null);
|
|
174
|
-
if (costs.length > 0) {
|
|
175
|
-
process.stderr.write(` median cost: ${fmtUsd(median(costs))}\n`);
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
if (passes.length < results.length) {
|
|
179
|
-
process.stderr.write(`\n failures:\n`);
|
|
180
|
-
results.forEach((r, i) => {
|
|
181
|
-
if (!r.ok)
|
|
182
|
-
process.stderr.write(` run ${i + 1}: ${r.reason ?? 'unknown'}\n`);
|
|
183
|
-
});
|
|
184
|
-
}
|
|
185
|
-
// Exit non-zero if EVERY run failed — useful for CI plumbing later. A
|
|
186
|
-
// partial-pass run still exits 0 so we collect signal across branches.
|
|
187
|
-
process.exit(passes.length === 0 ? 1 : 0);
|
|
188
|
-
}
|
|
189
|
-
main().catch((err) => {
|
|
190
|
-
process.stderr.write(`[bench-multi-tab] fatal: ${err instanceof Error ? err.stack ?? err.message : String(err)}\n`);
|
|
191
|
-
process.exit(1);
|
|
192
|
-
});
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"bench-ttfb.d.ts","sourceRoot":"","sources":["../../src/scripts/bench-ttfb.ts"],"names":[],"mappings":""}
|
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Benchmark "time to first tool_use" for the LLM-driven loop.
|
|
3
|
-
*
|
|
4
|
-
* Assumes:
|
|
5
|
-
* - A debug Chrome is running on :9222 (start with `pnpm smoke:chrome`).
|
|
6
|
-
* - A dev server is running so the agent has something to drive
|
|
7
|
-
* (`pnpm dev:example:basic-app`).
|
|
8
|
-
*
|
|
9
|
-
* Per iteration:
|
|
10
|
-
* - Start a fresh Hover service (cold — kills any prior service to avoid
|
|
11
|
-
* cached MCP process state across iterations).
|
|
12
|
-
* - WS-connect, send a fixed command, mark t0 right before send().
|
|
13
|
-
* - Mark t1 on the first tool_use event from the agent.
|
|
14
|
-
* - Report (t1 - t0) in milliseconds. Close service + WS.
|
|
15
|
-
*
|
|
16
|
-
* pnpm --filter @hover-dev/core exec tsx src/scripts/bench-ttfb.ts <n>
|
|
17
|
-
*
|
|
18
|
-
* `n` defaults to 5. Prints individual timings + median + min/max.
|
|
19
|
-
*/
|
|
20
|
-
import { WebSocket } from 'ws';
|
|
21
|
-
import { startService } from '../service.js';
|
|
22
|
-
const PROMPT = process.env.HOVER_BENCH_PROMPT ?? 'Take a snapshot of the page.';
|
|
23
|
-
const ITERATIONS = Number(process.argv[2] ?? 5);
|
|
24
|
-
async function singleRun() {
|
|
25
|
-
const service = await startService({
|
|
26
|
-
// Use 0 to let the kernel pick — avoids cross-iter EADDRINUSE races.
|
|
27
|
-
port: 0,
|
|
28
|
-
agentId: 'claude',
|
|
29
|
-
model: 'sonnet',
|
|
30
|
-
cdpUrl: 'http://localhost:9222',
|
|
31
|
-
devRoot: process.cwd(),
|
|
32
|
-
});
|
|
33
|
-
return new Promise((resolve, reject) => {
|
|
34
|
-
const ws = new WebSocket(`ws://127.0.0.1:${service.port}`);
|
|
35
|
-
let t0 = 0;
|
|
36
|
-
let resolved = false;
|
|
37
|
-
const timeout = setTimeout(() => {
|
|
38
|
-
if (!resolved) {
|
|
39
|
-
ws.close(1000);
|
|
40
|
-
service.close();
|
|
41
|
-
reject(new Error('timed out waiting for first tool_use after 60s'));
|
|
42
|
-
}
|
|
43
|
-
}, 60_000);
|
|
44
|
-
ws.on('open', () => {
|
|
45
|
-
t0 = performance.now();
|
|
46
|
-
ws.send(JSON.stringify({ type: 'command', payload: { text: PROMPT } }));
|
|
47
|
-
});
|
|
48
|
-
ws.on('message', raw => {
|
|
49
|
-
let msg;
|
|
50
|
-
try {
|
|
51
|
-
msg = JSON.parse(raw.toString());
|
|
52
|
-
}
|
|
53
|
-
catch {
|
|
54
|
-
return;
|
|
55
|
-
}
|
|
56
|
-
if (process.env.HOVER_BENCH_VERBOSE === '1') {
|
|
57
|
-
process.stderr.write(` [event] ${raw.toString().slice(0, 200)}\n`);
|
|
58
|
-
}
|
|
59
|
-
if (msg.type !== 'event')
|
|
60
|
-
return;
|
|
61
|
-
const ev = msg.payload;
|
|
62
|
-
if (ev.kind === 'tool_use' && !resolved) {
|
|
63
|
-
const t1 = performance.now();
|
|
64
|
-
const ms = t1 - t0;
|
|
65
|
-
resolved = true;
|
|
66
|
-
clearTimeout(timeout);
|
|
67
|
-
ws.close(1000);
|
|
68
|
-
service.close().finally(() => resolve(ms));
|
|
69
|
-
}
|
|
70
|
-
if (ev.kind === 'session_end' && !resolved) {
|
|
71
|
-
// Ran without any tool_use — agent went text-only or errored.
|
|
72
|
-
// Reject so the bench surfaces the issue instead of recording
|
|
73
|
-
// a misleadingly tiny "first tool_use" timing.
|
|
74
|
-
resolved = true;
|
|
75
|
-
clearTimeout(timeout);
|
|
76
|
-
const evAny = ev;
|
|
77
|
-
const reason = evAny.isError ? 'session_end (error)' : 'session_end without tool_use';
|
|
78
|
-
ws.close(1000);
|
|
79
|
-
service.close().finally(() => reject(new Error(reason)));
|
|
80
|
-
}
|
|
81
|
-
});
|
|
82
|
-
ws.on('error', err => {
|
|
83
|
-
if (resolved)
|
|
84
|
-
return;
|
|
85
|
-
resolved = true;
|
|
86
|
-
clearTimeout(timeout);
|
|
87
|
-
service.close().finally(() => reject(err));
|
|
88
|
-
});
|
|
89
|
-
});
|
|
90
|
-
}
|
|
91
|
-
function median(xs) {
|
|
92
|
-
const sorted = [...xs].sort((a, b) => a - b);
|
|
93
|
-
const mid = Math.floor(sorted.length / 2);
|
|
94
|
-
return sorted.length % 2 === 0
|
|
95
|
-
? (sorted[mid - 1] + sorted[mid]) / 2
|
|
96
|
-
: sorted[mid];
|
|
97
|
-
}
|
|
98
|
-
async function main() {
|
|
99
|
-
console.log(`prompt: ${JSON.stringify(PROMPT)}`);
|
|
100
|
-
console.log(`iterations: ${ITERATIONS}`);
|
|
101
|
-
console.log('');
|
|
102
|
-
const results = [];
|
|
103
|
-
for (let i = 1; i <= ITERATIONS; i++) {
|
|
104
|
-
try {
|
|
105
|
-
const ms = await singleRun();
|
|
106
|
-
results.push(ms);
|
|
107
|
-
console.log(` run ${i}: ${ms.toFixed(0).padStart(5)} ms`);
|
|
108
|
-
}
|
|
109
|
-
catch (err) {
|
|
110
|
-
console.error(` run ${i}: FAILED — ${err instanceof Error ? err.message : String(err)}`);
|
|
111
|
-
}
|
|
112
|
-
// Small gap between runs so any process-cleanup tail can flush.
|
|
113
|
-
await new Promise(r => setTimeout(r, 500));
|
|
114
|
-
}
|
|
115
|
-
if (results.length === 0) {
|
|
116
|
-
console.error('\nNo successful runs.');
|
|
117
|
-
process.exit(1);
|
|
118
|
-
}
|
|
119
|
-
console.log('');
|
|
120
|
-
console.log(`min: ${Math.min(...results).toFixed(0)} ms`);
|
|
121
|
-
console.log(`median: ${median(results).toFixed(0)} ms`);
|
|
122
|
-
console.log(`max: ${Math.max(...results).toFixed(0)} ms`);
|
|
123
|
-
}
|
|
124
|
-
main().catch(err => {
|
|
125
|
-
console.error(err);
|
|
126
|
-
process.exit(1);
|
|
127
|
-
});
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"start-chrome.d.ts","sourceRoot":"","sources":["../../src/scripts/start-chrome.ts"],"names":[],"mappings":""}
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* "Start debug Chrome on port 9222" CLI.
|
|
4
|
-
*
|
|
5
|
-
* Two entry points:
|
|
6
|
-
* - Repo dev: `pnpm smoke:chrome` → tsx src/scripts/start-chrome.ts
|
|
7
|
-
* - npm consumer: `pnpm exec hover-chrome` → dist/scripts/start-chrome.js
|
|
8
|
-
* (or `npx hover-chrome`, bin exposed by vite-plugin-hover)
|
|
9
|
-
*
|
|
10
|
-
* All actual launch logic lives in ../playwright/launchChrome.ts.
|
|
11
|
-
*/
|
|
12
|
-
import { launchDebugChrome } from '../playwright/launchChrome.js';
|
|
13
|
-
const result = await launchDebugChrome();
|
|
14
|
-
if (!result.ok) {
|
|
15
|
-
console.error(`[hover:chrome] ${result.reason}`);
|
|
16
|
-
process.exit(1);
|
|
17
|
-
}
|
|
18
|
-
if (result.alreadyRunning) {
|
|
19
|
-
console.log(`[hover:chrome] already listening on ${result.port}`);
|
|
20
|
-
}
|
|
21
|
-
else {
|
|
22
|
-
console.log(`[hover:chrome] ready on ${result.port} (data-dir=${result.userDataDir})`);
|
|
23
|
-
}
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* CDP-related WebSocket message handlers.
|
|
3
|
-
*
|
|
4
|
-
* check-cdp → checkCdpStatus → emit cdp-status
|
|
5
|
-
* launch-chrome → emit "launching" placeholder → launchDebugChrome →
|
|
6
|
-
* re-check status → emit cdp-status
|
|
7
|
-
* focus-debug → focusDebugTab → no message on success (the widget the
|
|
8
|
-
* user is about to focus runs its own check-cdp anyway)
|
|
9
|
-
*
|
|
10
|
-
* Extracted from service.ts during the v0.2.x refactor pass so the main
|
|
11
|
-
* file can be a thin orchestrator.
|
|
12
|
-
*/
|
|
13
|
-
import type { WebSocket } from 'ws';
|
|
14
|
-
import { type LaunchOptions } from '../playwright/launchChrome.js';
|
|
15
|
-
import { type ClientMessage } from './types.js';
|
|
16
|
-
/** Extra launch options surfaced from the active mode (security plugin
|
|
17
|
-
* needs a resident proxy + spki). When none are set, behaviour is identical
|
|
18
|
-
* to pre-v0.7 normal-mode launch. */
|
|
19
|
-
export type LaunchExtras = Pick<LaunchOptions, 'proxy'>;
|
|
20
|
-
/**
|
|
21
|
-
* "Is this widget running inside the debug Chrome?" The widget asks this on
|
|
22
|
-
* connect (and after every status-changing event) so it can render itself as
|
|
23
|
-
* either:
|
|
24
|
-
* - same-window → normal, drives the page
|
|
25
|
-
* - wrong-window → disabled, with a "use the other window" notice
|
|
26
|
-
* - no-cdp → enabled but click triggers launch-chrome instead
|
|
27
|
-
*/
|
|
28
|
-
export declare function handleCheckCdp(ws: WebSocket, msg: ClientMessage, cdpUrl: string, extras?: LaunchExtras): Promise<void>;
|
|
29
|
-
/**
|
|
30
|
-
* Launch a debug Chrome navigated to `pageUrl`, then re-check status. The
|
|
31
|
-
* re-check usually returns 'wrong-window' (because the widget asking is in
|
|
32
|
-
* the user's regular Chrome, not the freshly-launched one) — the widget then
|
|
33
|
-
* displays the "use the other window" state.
|
|
34
|
-
*/
|
|
35
|
-
export declare function handleLaunchChrome(ws: WebSocket, msg: ClientMessage, cdpUrl: string, extras?: LaunchExtras): Promise<void>;
|
|
36
|
-
/**
|
|
37
|
-
* bringToFront the debug-Chrome tab matching `pageUrl`'s origin (or open one
|
|
38
|
-
* if none exists). Used by the wrong-window UI's "switch to debug Chrome"
|
|
39
|
-
* button. Doesn't return cdp-status — bringToFront doesn't change anything
|
|
40
|
-
* the widget cares about, and the widget the user is about to focus is a
|
|
41
|
-
* different page (and will run its own check-cdp on its own ws connection).
|
|
42
|
-
*/
|
|
43
|
-
export declare function handleFocusDebug(ws: WebSocket, msg: ClientMessage, cdpUrl: string, extras?: LaunchExtras): Promise<void>;
|
|
44
|
-
//# sourceMappingURL=cdpHandlers.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"cdpHandlers.d.ts","sourceRoot":"","sources":["../../src/service/cdpHandlers.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAEpC,OAAO,EAAqB,KAAK,aAAa,EAAE,MAAM,+BAA+B,CAAC;AACtF,OAAO,EAAQ,KAAK,aAAa,EAAE,MAAM,YAAY,CAAC;AAEtD;;sCAEsC;AACtC,MAAM,MAAM,YAAY,GAAG,IAAI,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;AAExD;;;;;;;GAOG;AACH,wBAAsB,cAAc,CAClC,EAAE,EAAE,SAAS,EACb,GAAG,EAAE,aAAa,EAClB,MAAM,EAAE,MAAM,EACd,MAAM,CAAC,EAAE,YAAY,GACpB,OAAO,CAAC,IAAI,CAAC,CAQf;AAED;;;;;GAKG;AACH,wBAAsB,kBAAkB,CACtC,EAAE,EAAE,SAAS,EACb,GAAG,EAAE,aAAa,EAClB,MAAM,EAAE,MAAM,EACd,MAAM,CAAC,EAAE,YAAY,GACpB,OAAO,CAAC,IAAI,CAAC,CA4Bf;AAED;;;;;;GAMG;AACH,wBAAsB,gBAAgB,CACpC,EAAE,EAAE,SAAS,EACb,GAAG,EAAE,aAAa,EAClB,MAAM,EAAE,MAAM,EACd,MAAM,CAAC,EAAE,YAAY,GACpB,OAAO,CAAC,IAAI,CAAC,CAUf"}
|