@hover-dev/core 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/engine.d.ts CHANGED
@@ -19,12 +19,12 @@ export { writeApiSpec } from './specs/writeApiSpec.js';
19
19
  export type { ApiCheck, WriteApiSpecOptions, WriteApiSpecResult } from './specs/writeApiSpec.js';
20
20
  export { replayGroundedSteps, replayOnPage, applyGroundedStep, groundedLocate } from './specs/replayGrounded.js';
21
21
  export type { ReplayResult, ReplayFailure, ReplayStep, GroundedTarget } from './specs/replayGrounded.js';
22
+ export { readSidecar } from './specs/sidecar.js';
23
+ export type { SpecSidecar } from './specs/sidecar.js';
22
24
  export { launchDebugChrome, closeDebugChrome, findChromeBinary } from './playwright/launchChrome.js';
23
25
  export type { LaunchOptions, LaunchResult } from './playwright/launchChrome.js';
24
- export { GROUNDED_ACTUATION_DENY, GROUNDED_ACTUATION_DIRECTIVE } from './agentDirectives.js';
25
26
  export { loadMemory, formatMemoryForPrompt, writeFact, memoryDir } from './memory/businessMemory.js';
26
27
  export type { BusinessFact } from './memory/businessMemory.js';
27
- export { RECON_DIRECTIVE, QA_EXPLORATION_DIRECTIVE } from './agentDirectives.js';
28
28
  export { QA_INTENSITY, DEFAULT_QA_INTENSITY, asQaIntensity, qaBudgetDirective } from './qa/intensity.js';
29
29
  export type { QaIntensity, QaIntensitySpec } from './qa/intensity.js';
30
30
  //# sourceMappingURL=engine.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../src/engine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAGH,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACzF,YAAY,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAErD,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,YAAY,EAAE,QAAQ,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAEjG,OAAO,EAAE,mBAAmB,EAAE,YAAY,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AACjH,YAAY,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAGzG,OAAO,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AACrG,YAAY,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAGhF,OAAO,EAAE,uBAAuB,EAAE,4BAA4B,EAAE,MAAM,sBAAsB,CAAC;AAG7F,OAAO,EAAE,UAAU,EAAE,qBAAqB,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,4BAA4B,CAAC;AACrG,YAAY,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAG/D,OAAO,EAAE,eAAe,EAAE,wBAAwB,EAAE,MAAM,sBAAsB,CAAC;AACjF,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,aAAa,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACzG,YAAY,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC"}
1
+ {"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../src/engine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAGH,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACzF,YAAY,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAErD,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,YAAY,EAAE,QAAQ,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAEjG,OAAO,EAAE,mBAAmB,EAAE,YAAY,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AACjH,YAAY,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAEzG,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,YAAY,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGtD,OAAO,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AACrG,YAAY,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAGhF,OAAO,EAAE,UAAU,EAAE,qBAAqB,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,4BAA4B,CAAC;AACrG,YAAY,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAG/D,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,aAAa,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACzG,YAAY,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC"}
package/dist/engine.js CHANGED
@@ -16,14 +16,13 @@
16
16
  export { writeSpec } from './specs/writeSpec.js';
17
17
  // API-layer crystallizer — observed/replayed requests → *.api-test.spec.ts.
18
18
  export { writeApiSpec } from './specs/writeApiSpec.js';
19
- // Creation-verification: replay a flow's grounded steps over CDP (no playwright test).
19
+ // Creation-verification + self-heal: replay a flow's grounded steps over CDP (no playwright test).
20
20
  export { replayGroundedSteps, replayOnPage, applyGroundedStep, groundedLocate } from './specs/replayGrounded.js';
21
+ // Spec sidecar (recorded grounded steps) — read by self-heal to replay a saved spec.
22
+ export { readSidecar } from './specs/sidecar.js';
21
23
  // ── debug-Chrome lifecycle ───────────────────────────────────────────────────
22
24
  export { launchDebugChrome, closeDebugChrome, findChromeBinary } from './playwright/launchChrome.js';
23
- // ── grounded-actuation knobs (the deny-list + directive the agent runs under) ─
24
- export { GROUNDED_ACTUATION_DENY, GROUNDED_ACTUATION_DIRECTIVE } from './agentDirectives.js';
25
25
  // ── business memory (ask → remember loop) ────────────────────────────────────
26
26
  export { loadMemory, formatMemoryForPrompt, writeFact, memoryDir } from './memory/businessMemory.js';
27
- // ── autonomous-exploration directives + intensity (the test_app workflow) ─────
28
- export { RECON_DIRECTIVE, QA_EXPLORATION_DIRECTIVE } from './agentDirectives.js';
27
+ // ── QA intensity (step budget; parked until wired into the workflow) ──────────
29
28
  export { QA_INTENSITY, DEFAULT_QA_INTENSITY, asQaIntensity, qaBudgetDirective } from './qa/intensity.js';
@@ -81,10 +81,10 @@ export interface SessionRecord {
81
81
  * "writes out" the call (a known tool-calling glitch, common at end-of-turn /
82
82
  * budget cap) and the parser renders it verbatim into the report + Done card.
83
83
  * This keeps user-facing prose about the APP, not Hover's tooling
84
- * (REPORTING_DIRECTIVE). Defensive + total: any agent can trip this.
84
+ * (the markdown-report contract). Defensive + total: any agent can trip this.
85
85
  */
86
86
  export declare function stripToolCallNoise(text: string): string;
87
- /** Markdown-forced: the agent emits a plain-markdown report (REPORTING_DIRECTIVE)
87
+ /** Markdown-forced: the agent emits a plain-markdown report (the markdown-report contract)
88
88
  * — ONE outcome line, `- ` bullets, and an optional `## Findings` section with
89
89
  * `- **severity** — text` items. Parse the summary + findings from that markdown
90
90
  * only; a stray ```json block (a non-compliant agent) is stripped, never parsed
@@ -19,7 +19,7 @@ function deEsc(s) {
19
19
  return s.replace(/\\r\\n/g, '\n').replace(/\\n/g, '\n').replace(/\\t/g, ' ');
20
20
  }
21
21
  /** Defensive leak-guard, NOT a parse path. The agent is directed to emit a
22
- * plain-markdown report (REPORTING_DIRECTIVE) — no JSON. A non-compliant agent
22
+ * plain-markdown report (the markdown-report contract) — no JSON. A non-compliant agent
23
23
  * that still wraps its report in a ```json block would otherwise leak raw JSON
24
24
  * to the UI, so strip it: recover the `summary` field as prose when present
25
25
  * (tolerating unescaped quotes by matching up to `","findings"`), else drop the
@@ -41,7 +41,7 @@ function stripJsonArtifact(summary) {
41
41
  * "writes out" the call (a known tool-calling glitch, common at end-of-turn /
42
42
  * budget cap) and the parser renders it verbatim into the report + Done card.
43
43
  * This keeps user-facing prose about the APP, not Hover's tooling
44
- * (REPORTING_DIRECTIVE). Defensive + total: any agent can trip this.
44
+ * (the markdown-report contract). Defensive + total: any agent can trip this.
45
45
  */
46
46
  export function stripToolCallNoise(text) {
47
47
  return text
@@ -53,7 +53,7 @@ export function stripToolCallNoise(text) {
53
53
  .replace(/\n{3,}/g, '\n\n')
54
54
  .trim();
55
55
  }
56
- /** Markdown-forced: the agent emits a plain-markdown report (REPORTING_DIRECTIVE)
56
+ /** Markdown-forced: the agent emits a plain-markdown report (the markdown-report contract)
57
57
  * — ONE outcome line, `- ` bullets, and an optional `## Findings` section with
58
58
  * `- **severity** — text` items. Parse the summary + findings from that markdown
59
59
  * only; a stray ```json block (a non-compliant agent) is stripped, never parsed
@@ -1 +1 @@
1
- {"version":3,"file":"humanSteps.d.ts","sourceRoot":"","sources":["../../src/specs/humanSteps.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAEtD,uEAAuE;AACvE,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,GAAG,MAAM,GAAG,IAAI,CAwDxE;AAED;;;;;GAKG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,SAAS,EAAE,GAAG,MAAM,EAAE,CAmBvD"}
1
+ {"version":3,"file":"humanSteps.d.ts","sourceRoot":"","sources":["../../src/specs/humanSteps.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAEtD,uEAAuE;AACvE,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,GAAG,MAAM,GAAG,IAAI,CAoExE;AAED;;;;;GAKG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,SAAS,EAAE,GAAG,MAAM,EAAE,CAmBvD"}
@@ -40,6 +40,18 @@ export function humanStep(tool, rawInput) {
40
40
  const key = String(input.key ?? '');
41
41
  return key ? `Press ${key}` : null;
42
42
  }
43
+ // Grounded control tools (MCP-first) — the target is role+name/testId/text
44
+ // ON the input itself, not an `element` string.
45
+ case 'click_control':
46
+ return `Click ${describeGrounded(input)}`;
47
+ case 'fill_control':
48
+ return `Fill ${describeGrounded(input)} with ${quote(String(input.value ?? ''))}`;
49
+ case 'select_control':
50
+ return `Select ${quote(String(input.value ?? ''))} in ${describeGrounded(input)}`;
51
+ case 'check_control':
52
+ return `${input.checked === false ? 'Uncheck' : 'Check'} ${describeGrounded(input)}`;
53
+ case 'assert_visible':
54
+ return `Expect ${describeGrounded(input)} to be visible`;
43
55
  // Diagnostic / read-only — same skip list as writeSpec.translateStep.
44
56
  case 'browser_wait_for':
45
57
  case 'browser_tabs':
@@ -91,6 +103,23 @@ function describe(raw) {
91
103
  const s = String(raw ?? '').trim();
92
104
  return s.length > 0 ? s : 'the target element';
93
105
  }
106
+ /** Human phrase for a GROUNDED target ({ role, name, testId, text }) — the shape
107
+ * the *_control tools carry, vs the old browser_* tools' `element` string. */
108
+ function describeGrounded(input) {
109
+ const role = typeof input.role === 'string' ? input.role : '';
110
+ const name = typeof input.name === 'string' ? input.name : '';
111
+ const testId = typeof input.testId === 'string' ? input.testId : '';
112
+ const text = typeof input.text === 'string' ? input.text : '';
113
+ if (role && name)
114
+ return `${role} "${name}"`;
115
+ if (name)
116
+ return `"${name}"`;
117
+ if (testId)
118
+ return `testId "${testId}"`;
119
+ if (text)
120
+ return `"${text}"`;
121
+ return 'the target element';
122
+ }
94
123
  /** Wrap in double-quotes for prose; escape internal quotes. A redacted
95
124
  * credential (stored as a `process.env.X …` expression) shows as the masked
96
125
  * `$X` instead — the prose, like the code, never reveals the secret. */
@@ -1 +1 @@
1
- {"version":3,"file":"writeSpec.d.ts","sourceRoot":"","sources":["../../src/specs/writeSpec.ts"],"names":[],"mappings":"AAqBA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAatD,MAAM,MAAM,QAAQ,GAAG,SAAS,CAAC;AAEjC;;;;;;;GAOG;AACH,eAAO,MAAM,kBAAkB,yBAAyB,CAAC;AAEzD;;0DAE0D;AAC1D,wBAAgB,uBAAuB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAE9D;AA2CD,MAAM,WAAW,aAAa;IAC5B,oEAAoE;IACpE,IAAI,EAAE,MAAM,CAAC;IACb,qDAAqD;IACrD,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,qBAAa,eAAgB,SAAQ,KAAK;aACZ,IAAI,EAAE,MAAM;aAAkB,IAAI,EAAE,MAAM;gBAA1C,IAAI,EAAE,MAAM,EAAkB,IAAI,EAAE,MAAM;CAIvE;AAED;;;;;;;GAOG;AACH,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;IAC7B,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,0EAA0E;IAC1E,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB;;;;yEAIqE;IACrE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;sFAIkF;IAClF,WAAW,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACtE;;;;qEAIiE;IACjE,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAiDD,MAAM,WAAW,eAAe;IAC9B,8EAA8E;IAC9E,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,+EAA+E;IAC/E,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IACtD;;;;;2DAKuD;IACvD,gBAAgB,CAAC,EAAE;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,cAAc,EAAE,MAAM,CAAA;KAAE,CAAC;CACnE;AAQD,wBAAsB,SAAS,CAAC,IAAI,EAAE,gBAAgB,GAAG,OAAO,CAAC,eAAe,CAAC,CAchF;AA+mBD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,eAAe,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,CAM9E;AAED;;oCAEoC;AACpC,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAEpD;AAED;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,SAAS,GAAG,MAAM,CAqB9E;AA0DD;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,SAAS,GAAG,MAAM,CAMxE;AAED;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,SAAS,GAAG,MAAM,CAQ1F"}
1
+ {"version":3,"file":"writeSpec.d.ts","sourceRoot":"","sources":["../../src/specs/writeSpec.ts"],"names":[],"mappings":"AAqBA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAatD,MAAM,MAAM,QAAQ,GAAG,SAAS,CAAC;AAEjC;;;;;;;GAOG;AACH,eAAO,MAAM,kBAAkB,yBAAyB,CAAC;AAEzD;;0DAE0D;AAC1D,wBAAgB,uBAAuB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAE9D;AA2CD,MAAM,WAAW,aAAa;IAC5B,oEAAoE;IACpE,IAAI,EAAE,MAAM,CAAC;IACb,qDAAqD;IACrD,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,qBAAa,eAAgB,SAAQ,KAAK;aACZ,IAAI,EAAE,MAAM;aAAkB,IAAI,EAAE,MAAM;gBAA1C,IAAI,EAAE,MAAM,EAAkB,IAAI,EAAE,MAAM;CAIvE;AAED;;;;;;;GAOG;AACH,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;IAC7B,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,0EAA0E;IAC1E,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB;;;;yEAIqE;IACrE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;sFAIkF;IAClF,WAAW,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACtE;;;;qEAIiE;IACjE,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAiDD,MAAM,WAAW,eAAe;IAC9B,8EAA8E;IAC9E,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,+EAA+E;IAC/E,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IACtD;;;;;2DAKuD;IACvD,gBAAgB,CAAC,EAAE;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,cAAc,EAAE,MAAM,CAAA;KAAE,CAAC;CACnE;AAQD,wBAAsB,SAAS,CAAC,IAAI,EAAE,gBAAgB,GAAG,OAAO,CAAC,eAAe,CAAC,CAchF;AAonBD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,eAAe,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,CAM9E;AAED;;oCAEoC;AACpC,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAEpD;AAED;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,SAAS,GAAG,MAAM,CAqB9E;AA0DD;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,SAAS,GAAG,MAAM,CAMxE;AAED;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,SAAS,GAAG,MAAM,CAQ1F"}
@@ -201,13 +201,17 @@ async function writeOneSpec(opts, slug, displayName, rawSteps) {
201
201
  const envVars = (opts.redactions ?? []).map(r => r.envVar);
202
202
  const detectedPrefix = authPrefixLength(cleanActions, envVars);
203
203
  const userConfigName = PLAYWRIGHT_CONFIG_NAMES.find(n => existsSync(join(opts.devRoot, n)));
204
+ // A config Hover scaffolded earlier this run (e.g. a non-login spec wrote a
205
+ // plain one first) is OURS to upgrade — treat it like "no user config", not a
206
+ // hands-off user file. ensurePlaywrightConfig adds the setup project to it.
207
+ const ownScaffold = !!userConfigName && readFileSync(join(opts.devRoot, userConfigName), 'utf-8').includes(SCAFFOLD_MARKER);
204
208
  // Already opted in: auth.setup.ts exists from a prior approval (and the config
205
209
  // already registers it), so engage AUTOMATICALLY — don't re-ask or re-edit.
206
210
  const authSetupExists = existsSync(join(dir, 'auth.setup.ts'));
207
211
  // Engage the fixture when a login is detected AND we can register the setup
208
- // project: we scaffold the config (no user config), the caller approved editing
209
- // it (opts.authFixture, Stage 4), or the fixture was already set up earlier.
210
- const engage = detectedPrefix > 0 && (!userConfigName || opts.authFixture === true || authSetupExists);
212
+ // project: we scaffold/own the config, the caller approved editing a user
213
+ // config (opts.authFixture, Stage 4), or the fixture was already set up earlier.
214
+ const engage = detectedPrefix > 0 && (!userConfigName || ownScaffold || opts.authFixture === true || authSetupExists);
211
215
  const authPrefix = engage ? detectedPrefix : 0;
212
216
  const authFile = engage ? AUTH_STATE_FILE : undefined;
213
217
  let authFixtureOffer;
@@ -1005,28 +1009,24 @@ async function ensureResetStateHelper(devRoot, keys) {
1005
1009
  ].join('\n');
1006
1010
  await writeFile(join(dir, 'resetState.ts'), source, 'utf-8');
1007
1011
  }
1008
- async function ensurePlaywrightConfig(devRoot, steps, startUrl, authFile) {
1009
- if (PLAYWRIGHT_CONFIG_NAMES.some(n => existsSync(join(devRoot, n))))
1010
- return;
1011
- const origin = firstNavigateOrigin(steps) ?? originOf(startUrl);
1012
- if (!origin)
1013
- return;
1014
- // Auth-as-fixture: register a `setup` project (matches auth.setup.ts) that the
1015
- // main project depends on, so login runs ONCE before the specs. Only emitted
1016
- // when scaffolding our own config (we never touch a user's existing one).
1012
+ const SCAFFOLD_MARKER = 'Scaffolded by Hover';
1013
+ /** The scaffolded config source. When `authFile` is set, a `setup` project runs
1014
+ * auth.setup.ts ONCE and the main `chromium` project reuses the saved
1015
+ * storageState so EVERY spec starts authenticated, not just the login flow. */
1016
+ function renderScaffoldConfig(origin, authFile) {
1017
1017
  const projects = authFile
1018
1018
  ? [
1019
1019
  ` projects: [`,
1020
1020
  ` { name: 'setup', testMatch: /.*\\.setup\\.ts$/ },`,
1021
- ` { name: 'chromium', dependencies: ['setup'] },`,
1021
+ ` { name: 'chromium', dependencies: ['setup'], use: { storageState: ${JSON.stringify(authFile)} } },`,
1022
1022
  ` ],`,
1023
1023
  ]
1024
1024
  : [];
1025
- const source = [
1025
+ return [
1026
1026
  `import { defineConfig } from '@playwright/test';`,
1027
1027
  ``,
1028
1028
  `/**`,
1029
- ` * Scaffolded by Hover so crystallized specs (which use relative URLs like`,
1029
+ ` * ${SCAFFOLD_MARKER} so crystallized specs (which use relative URLs like`,
1030
1030
  ` * page.goto("/")) resolve against a base. Override HOVER_BASE_URL in CI to`,
1031
1031
  ` * point the same specs at staging/prod.`,
1032
1032
  ` */`,
@@ -1039,7 +1039,50 @@ async function ensurePlaywrightConfig(devRoot, steps, startUrl, authFile) {
1039
1039
  `});`,
1040
1040
  ``,
1041
1041
  ].join('\n');
1042
- await writeFile(join(devRoot, 'playwright.config.ts'), source, 'utf-8');
1042
+ }
1043
+ async function ensurePlaywrightConfig(devRoot, steps, startUrl, authFile) {
1044
+ const origin = firstNavigateOrigin(steps) ?? originOf(startUrl);
1045
+ if (!origin)
1046
+ return;
1047
+ const existingName = PLAYWRIGHT_CONFIG_NAMES.find(n => existsSync(join(devRoot, n)));
1048
+ if (existingName) {
1049
+ // A config already exists. Only ever UPGRADE our OWN scaffold — and only to
1050
+ // add the auth `setup` project when a login was just lifted (authFile) and
1051
+ // it isn't there yet. This makes auth order-independent: whichever spec in
1052
+ // the run triggers auth-fixture upgrades the config, even if a non-login
1053
+ // spec scaffolded a plain config first. A user's own config is never touched
1054
+ // (that's the Stage-4 approval flow via authFixtureOffer).
1055
+ if (!authFile)
1056
+ return;
1057
+ try {
1058
+ const cur = readFileSync(join(devRoot, existingName), 'utf-8');
1059
+ if (!cur.includes(SCAFFOLD_MARKER) || cur.includes(`name: 'setup'`))
1060
+ return;
1061
+ await writeFile(join(devRoot, existingName), renderScaffoldConfig(origin, authFile), 'utf-8');
1062
+ }
1063
+ catch { /* upgrade is best-effort */ }
1064
+ return;
1065
+ }
1066
+ await writeFile(join(devRoot, 'playwright.config.ts'), renderScaffoldConfig(origin, authFile), 'utf-8');
1067
+ await ensurePlaywrightDep(devRoot);
1068
+ }
1069
+ /** When Hover scaffolds the config it also ensures `@playwright/test` is a
1070
+ * devDependency — otherwise `npx playwright test` can't run the specs locally.
1071
+ * Best-effort + idempotent: skips if already present (either dep list) or if
1072
+ * there's no package.json. Reformats to 2-space JSON (the npm norm). */
1073
+ const PLAYWRIGHT_TEST_RANGE = '^1.50.0';
1074
+ async function ensurePlaywrightDep(devRoot) {
1075
+ const pkgPath = join(devRoot, 'package.json');
1076
+ if (!existsSync(pkgPath))
1077
+ return;
1078
+ try {
1079
+ const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
1080
+ if (pkg.dependencies?.['@playwright/test'] || pkg.devDependencies?.['@playwright/test'])
1081
+ return;
1082
+ pkg.devDependencies = { ...(pkg.devDependencies ?? {}), '@playwright/test': PLAYWRIGHT_TEST_RANGE };
1083
+ await writeFile(pkgPath, JSON.stringify(pkg, null, 2) + '\n', 'utf-8');
1084
+ }
1085
+ catch { /* best-effort — never break Save */ }
1043
1086
  }
1044
1087
  function stripBaseUrl(url) {
1045
1088
  // http://localhost:5173/checkout → /checkout, http://localhost:5173/ → /
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hover-dev/core",
3
- "version": "0.19.0",
3
+ "version": "0.21.0",
4
4
  "description": "Hover's local Node service: agent invocation, Playwright CDP preflight, WebSocket bridge.",
5
5
  "license": "Apache-2.0",
6
6
  "author": "Hyperyond",
@@ -1,55 +0,0 @@
1
- /**
2
- * Agent system-prompt directives — the static prose fragments appended to the
3
- * coding agent's system prompt to shape how it drives the browser and writes its
4
- * report. Extracted from service.ts (which assembles them per run) so the prompt
5
- * engineering lives in one readable place, separate from the run orchestration.
6
- *
7
- * Which directives apply when:
8
- * - ZH_OUTPUT — only when the user's prompt contains CJK.
9
- * - REPORTING / NARRATION / ASK_FORMAT / EXPLORATION_CHECKPOINT — all modes.
10
- * - GROUNDED_ACTUATION (directive + DENY list) — modes whose ModeBehavior has
11
- * groundedActuation = true (Flow, QA). See ./modes.ts.
12
- */
13
- /** CJK-presence test — mirrors voice.js's detectLanguage. Any Han character
14
- * in the prompt flips the agent's prose output to Chinese. */
15
- export declare const CJK_RE: RegExp;
16
- /** Appended to the agent's system prompt when the user's prompt contains CJK,
17
- * so the human-facing prose (verification summary / ## Findings / step
18
- * narration) comes back in Chinese — matching how Voice mode picks a Chinese
19
- * TTS voice for the same prompt. Deliberately scoped to PROSE only: the agent
20
- * must still use the page's real (often English) accessible names, labels,
21
- * and selectors when driving the browser. */
22
- export declare const ZH_OUTPUT_DIRECTIVE: string;
23
- /**
24
- * Grounded-actuation deny list. The Playwright MCP interaction tools take a
25
- * free-form `element` description that doesn't round-trip to a replayable
26
- * selector (it gets crystallized as a confabulated getByText). So in
27
- * grounded-actuation modes we DENY them and route every interaction through the
28
- * Hover control MCP, whose role+name/testId/text args come straight from the
29
- * snapshot and crystallize 1:1. (Plugin modes keep the Playwright tools — they
30
- * explore to capture traffic, not to crystallize browser steps.)
31
- */
32
- export declare const GROUNDED_ACTUATION_DENY: string[];
33
- export declare const REPORTING_DIRECTIVE: string;
34
- export declare const NARRATION_DIRECTIVE: string;
35
- export declare const ASK_FORMAT_DIRECTIVE: string;
36
- export declare const EXPLORATION_CHECKPOINT_DIRECTIVE: string;
37
- /** State-reset recon (debt-2 reproducible-state-isolation). Appended for grounded
38
- * modes ONLY when the extension explicitly requests it (run payload `reconReset`)
39
- * — recon clears client state, which would wipe a logged-in session, so it never
40
- * runs unsolicited and never on a plain Flow recording. The agent discovers +
41
- * validates the reset recipe ONCE, then reports it via record_reset_recipe for
42
- * the engine to forward to the environment store. */
43
- export declare const RECON_DIRECTIVE: string;
44
- /** QA Testing mode — appended on top of the grounded-actuation directive. Turns
45
- * a directed run into autonomous exploratory testing. (Behavioral effect needs
46
- * live verification; the wiring just appends this when mode === 'qa'.) */
47
- export declare const QA_EXPLORATION_DIRECTIVE: string;
48
- /** Appended to the FIRST (functional verify) pass when a penetration-testing
49
- * pass is queued to run right after it. Keeps the two passes from both doing
50
- * security work: the verify pass stays functional-only, all security/vuln work
51
- * is deferred to the dedicated pentest pass. (Only added when QA has pentest on
52
- * AND this is the pre-pentest verify phase — see service.ts `splitting`.) */
53
- export declare const QA_VERIFY_DEFER_SECURITY_DIRECTIVE: string;
54
- export declare const GROUNDED_ACTUATION_DIRECTIVE: string;
55
- //# sourceMappingURL=agentDirectives.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"agentDirectives.d.ts","sourceRoot":"","sources":["../src/agentDirectives.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH;+DAC+D;AAC/D,eAAO,MAAM,MAAM,QAAU,CAAC;AAE9B;;;;;8CAK8C;AAC9C,eAAO,MAAM,mBAAmB,QAOM,CAAC;AAEvC;;;;;;;;GAQG;AACH,eAAO,MAAM,uBAAuB,UAgBnC,CAAC;AAEF,eAAO,MAAM,mBAAmB,QA0BI,CAAC;AAErC,eAAO,MAAM,mBAAmB,QAK2B,CAAC;AAE5D,eAAO,MAAM,oBAAoB,QAc6B,CAAC;AAE/D,eAAO,MAAM,gCAAgC,QAqB8B,CAAC;AAE5E;;;;;sDAKsD;AACtD,eAAO,MAAM,eAAe,QAcL,CAAC;AAExB;;2EAE2E;AAC3E,eAAO,MAAM,wBAAwB,QAoDmB,CAAC;AAEzD;;;;8EAI8E;AAC9E,eAAO,MAAM,kCAAkC,QASD,CAAC;AAE/C,eAAO,MAAM,4BAA4B,QAmE2C,CAAC"}
@@ -1,276 +0,0 @@
1
- /**
2
- * Agent system-prompt directives — the static prose fragments appended to the
3
- * coding agent's system prompt to shape how it drives the browser and writes its
4
- * report. Extracted from service.ts (which assembles them per run) so the prompt
5
- * engineering lives in one readable place, separate from the run orchestration.
6
- *
7
- * Which directives apply when:
8
- * - ZH_OUTPUT — only when the user's prompt contains CJK.
9
- * - REPORTING / NARRATION / ASK_FORMAT / EXPLORATION_CHECKPOINT — all modes.
10
- * - GROUNDED_ACTUATION (directive + DENY list) — modes whose ModeBehavior has
11
- * groundedActuation = true (Flow, QA). See ./modes.ts.
12
- */
13
- /** CJK-presence test — mirrors voice.js's detectLanguage. Any Han character
14
- * in the prompt flips the agent's prose output to Chinese. */
15
- export const CJK_RE = /[一-鿿]/;
16
- /** Appended to the agent's system prompt when the user's prompt contains CJK,
17
- * so the human-facing prose (verification summary / ## Findings / step
18
- * narration) comes back in Chinese — matching how Voice mode picks a Chinese
19
- * TTS voice for the same prompt. Deliberately scoped to PROSE only: the agent
20
- * must still use the page's real (often English) accessible names, labels,
21
- * and selectors when driving the browser. */
22
- export const ZH_OUTPUT_DIRECTIVE = '用户使用中文下达指令。请用简体中文撰写【所有】面向用户的文字:最终报告的概述、' +
23
- '`## Findings` 里的每一条,以及过程中每一步的简短说明。' +
24
- '这一点【贯穿整个过程,不分顺利与否】:当你在排查、卡住、改变思路、或自言自语式地推理时' +
25
- '(例如“让我先找一下…”“这个按钮点不到,换个方式”),也必须用中文,绝不要中途切回英文。' +
26
- '注意:这只影响写给用户看的叙述文字。操作浏览器时仍要使用页面真实的(通常是英文的)' +
27
- '角色名、标签、可访问名称和选择器——不要翻译成中文;严重级别标记' +
28
- '(high / medium / low / info)也保持英文。';
29
- /**
30
- * Grounded-actuation deny list. The Playwright MCP interaction tools take a
31
- * free-form `element` description that doesn't round-trip to a replayable
32
- * selector (it gets crystallized as a confabulated getByText). So in
33
- * grounded-actuation modes we DENY them and route every interaction through the
34
- * Hover control MCP, whose role+name/testId/text args come straight from the
35
- * snapshot and crystallize 1:1. (Plugin modes keep the Playwright tools — they
36
- * explore to capture traffic, not to crystallize browser steps.)
37
- */
38
- export const GROUNDED_ACTUATION_DENY = [
39
- 'mcp__playwright__browser_click',
40
- 'mcp__playwright__browser_type',
41
- 'mcp__playwright__browser_fill_form',
42
- 'mcp__playwright__browser_select_option',
43
- // Uploads go through mcp__hovercontrol__upload_file (which crystallizes to a
44
- // real filechooser + setFiles); Playwright's browser_file_upload would only
45
- // leave an untranslatable optimizable marker.
46
- 'mcp__playwright__browser_file_upload',
47
- // Screenshots go through mcp__hovercontrol__take_screenshot (viewport only).
48
- // Playwright's browser_take_screenshot does a fullPage capture by RESIZING the
49
- // live window, which fires a window 'resize' the app may react to (lost
50
- // transient UI state — e.g. a flipped card snapping back), so the agent never
51
- // sees the result of its own action. Deny it; the viewport tool has no such
52
- // side effect.
53
- 'mcp__playwright__browser_take_screenshot',
54
- ];
55
- export const REPORTING_DIRECTIVE = 'YOUR REPORT IS ABOUT THE APP, NOT THE TOOLING. The final summary and any ' +
56
- '## Findings are for the developer of the app under test — write them in plain ' +
57
- 'product terms about what the APP did: which user flows worked, and real ' +
58
- 'defects only (wrong validation, broken navigation, lost data, a genuinely ' +
59
- 'confusing UX). NEVER mention how you drove the page or any Hover/Playwright ' +
60
- 'mechanics: no tool names (click_control, check_control, getByRole, ' +
61
- 'browser_snapshot, upload_file, …), no selectors, no "strict mode", "grounded", ' +
62
- '"display:none", "filechooser", "tab index", and no internal file names. ' +
63
- 'Trouble OPERATING a control (a hidden input, a label repeated across groups, a ' +
64
- 'lingering dialog, any tool quirk) is YOUR technique to work out — do it ' +
65
- 'silently; it is NOT an app bug and must never appear as a finding. NEVER ' +
66
- 'propose changes to Hover or its tools, and do not narrate your own environment, ' +
67
- 'capabilities, or memory. Report only what a user of the app would care about.\n\n' +
68
- 'WRITE YOUR FINAL REPORT AS PLAIN MARKDOWN — NOT JSON, and NOT wrapped in any ' +
69
- 'fenced code block. Structure it exactly so:\n' +
70
- '• ONE short outcome sentence on the first line.\n' +
71
- '• Then a blank line, then concise `- ` bullets for the key things you checked ' +
72
- '(one per step / area / flow). Never cram it all into one paragraph.\n' +
73
- '• ONLY if you found real defects, add a line `## Findings` followed by one ' +
74
- '`- ` bullet per defect, each written as `- **severity** — what happened and why ' +
75
- 'it matters` (severity = high / medium / low / info; name the endpoint + method ' +
76
- 'inline when the defect is about a specific API call). No real defects → omit ' +
77
- 'the Findings section entirely.\n' +
78
- 'Use real line breaks (a literal newline, NEVER the characters backslash-n). ' +
79
- 'Do not output JSON, a "summary"/"findings" object, or any ```fenced``` wrapper — ' +
80
- 'just the Markdown report itself.';
81
- export const NARRATION_DIRECTIVE = 'NARRATION — As you work, keep each interim status to ONE short present-tense ' +
82
- 'line stating your immediate intent before you act ("Filling the address ' +
83
- 'fields", "Now testing an underage date of birth"). Do not write paragraphs ' +
84
- 'between actions and do not restate what just happened — the steps are already ' +
85
- 'shown. Save the full wrap-up for the final report only.';
86
- export const ASK_FORMAT_DIRECTIVE = 'OFFERING CHOICES — if the user\'s request is NOT a concrete instruction you ' +
87
- 'can act on (a concrete instruction looks like "test the login flow", "log ' +
88
- 'in", "register an account", "complete checkout", "run the X flow", "fill the ' +
89
- 'form") — i.e. it is vague, conversational, or just asks you to ask (e.g. ' +
90
- '"ask me a question", "what can you do", "test this") — do NOT reply with an ' +
91
- 'open-ended question like "what would you like me to test?". Instead, LOOK at ' +
92
- 'the current page first, then PROPOSE 2-4 concrete things you could test on ' +
93
- 'THIS page. Whenever you offer the user a choice, write the question as a ' +
94
- 'normal sentence, then put ONLY the options in a fenced block tagged ' +
95
- 'hover-ask, one per line with a leading "- ":\n' +
96
- '```hover-ask\n- first concrete option\n- second concrete option\n```\n' +
97
- 'Each line becomes a clickable button, so keep options short, specific to this ' +
98
- 'page, and directly actionable. ALWAYS give concrete options this way — never ' +
99
- 'a bare open question, a numbered list, or inline "A or B".';
100
- export const EXPLORATION_CHECKPOINT_DIRECTIVE = 'OPEN-ENDED TASKS — CHECK IN BEFORE YOU STOP. When the request is vague or ' +
101
- 'unscoped (e.g. just "test", "test this", "check the app") YOU chose what to ' +
102
- 'cover, so you do not actually know when the user considers it done. If you ' +
103
- 'reach a natural stopping point with MATERIAL scope still untested — whole ' +
104
- 'sections / flows / steps you noticed but did not exercise — do NOT end the ' +
105
- 'run on your own. First call mcp__hovercontrol__ask_user: briefly say what ' +
106
- 'you have covered and what remains, and offer concrete options such as ' +
107
- 'continuing with a specific untested part, continuing through everything ' +
108
- 'left, or stopping here. Then act on the answer. Ask at a genuine checkpoint ' +
109
- '(a finished chunk), not after every step, and ask once per checkpoint — do ' +
110
- 'not loop. Skip this entirely when the task was explicit and bounded (you ' +
111
- 'finished exactly what was asked) or when the user already said to stop / ' +
112
- 'that it is enough — then just finish and report. ' +
113
- 'IN-APP LIMITS ARE NOT BLOCKERS. When an EXPLICIT task is stopped by something ' +
114
- 'you can change inside the app — a daily quota reached, a feature behind a ' +
115
- 'setting/toggle, a smaller default that a control can raise — do the in-app ' +
116
- 'workaround yourself (open settings, raise the limit, flip the toggle) and ' +
117
- 'COMPLETE the task. Do NOT stop to offer a menu of choices. Only a truly ' +
118
- 'EXTERNAL blocker (missing credentials, a file you cannot obtain) justifies ' +
119
- 'asking; and if the explicit task is already satisfied, just conclude and ' +
120
- 'report it — never end an explicit task with a "what next?" option list.';
121
- /** State-reset recon (debt-2 reproducible-state-isolation). Appended for grounded
122
- * modes ONLY when the extension explicitly requests it (run payload `reconReset`)
123
- * — recon clears client state, which would wipe a logged-in session, so it never
124
- * runs unsolicited and never on a plain Flow recording. The agent discovers +
125
- * validates the reset recipe ONCE, then reports it via record_reset_recipe for
126
- * the engine to forward to the environment store. */
127
- export const RECON_DIRECTIVE = 'STATE-RESET RECON — do this ONCE, before you start testing. For saved tests to ' +
128
- 'be reproducible, Hover needs to know how to reset this app to a clean start. ' +
129
- '(1) Note which controls/screens reflect the app\'s stored state. (2) Call ' +
130
- 'mcp__hovercontrol__clear_client_state, then look at the page after it reloads. ' +
131
- '(3) Decide: did the app return to its INITIAL state (its state is client-side ' +
132
- '— Tier 1) or did your prior progress come BACK (it is re-hydrated from a ' +
133
- 'backend / your logged-in account — Tier 2)? Prefer a FULL clear (clear ' +
134
- 'everything) — if that logged you out and the app needs auth, log back in ' +
135
- 'using the test account credentials provided for this run, then continue. Only ' +
136
- 'fall back to naming specific storageKeys if a full clear breaks something you ' +
137
- 'cannot re-establish. (4) Report it with mcp__hovercontrol__record_reset_recipe: ' +
138
- 'tier 1 (clear-all, the default; or with storageKeys only if you had to scope), ' +
139
- 'or tier 2 (not client-resettable). Do this recon only once, at the start; ' +
140
- 'then test normally.';
141
- /** QA Testing mode — appended on top of the grounded-actuation directive. Turns
142
- * a directed run into autonomous exploratory testing. (Behavioral effect needs
143
- * live verification; the wiring just appends this when mode === 'qa'.) */
144
- export const QA_EXPLORATION_DIRECTIVE = 'QA TESTING MODE — explore, don\'t just follow. YOU ARE A TESTER: your only job ' +
145
- 'is to TEST this app. Never merely read out, describe, summarize, or narrate the ' +
146
- 'page — always EXERCISE the controls (click, fill, submit, toggle, navigate), ' +
147
- 'try negative / boundary inputs, and verify behavior to find defects; describing ' +
148
- 'the page is never an acceptable result on its own. A vague or unscoped request ' +
149
- '("test the app", "test this") MEANS "explore the whole app" — do NOT open with ' +
150
- 'an ask_user or a list of choices, just START testing what you can see (even on ' +
151
- 'a login/landing page: empty submit, bad password, invalid input first). Ask the ' +
152
- 'user (ask_user) ONLY when EXTERNALLY blocked (credentials / a file you cannot ' +
153
- 'get) or for a decisive business judgment you cannot resolve — never just to ' +
154
- 'pick scope. An IN-APP limit you can change yourself (a daily quota, a ' +
155
- 'setting/toggle, a raisable default) is NOT "blocked": adjust it in the app and ' +
156
- 'finish the task — do not stop to ask. Go BEYOND any single instruction: ' +
157
- 'systematically exercise every reachable control and state of the app to find ' +
158
- 'real defects. Maintain a mental frontier of untried controls; try each; do NOT ' +
159
- 'repeat a state you have already explored. Do NEGATIVE testing too — empty / ' +
160
- 'invalid / boundary / special-character inputs on forms — to surface validation ' +
161
- 'gaps, not just happy paths. Flag what you find as Findings (severity high / ' +
162
- 'medium / low / info) in your report; DO NOT crystallize a spec unless the user ' +
163
- 'asks. Treat clearly DESTRUCTIVE / irreversible actions (delete account, submit ' +
164
- 'payment, send email, bulk delete) carefully: confirm with the user once per ' +
165
- 'action-type before doing them, otherwise flag-and-skip. Stay on the app under ' +
166
- 'test (never navigate to external origins). Stop when the frontier is exhausted ' +
167
- 'or you hit the run budget; then WRITE THE FINDINGS REPORT AND END THE RUN. ' +
168
- 'Your turn ENDS with that report — do NOT close by asking whether to test more ' +
169
- 'or offering a menu of further areas (no "shall I also test X?" question, no ' +
170
- 'ask_user, no closing option list). Anything you did not cover belongs in the ' +
171
- 'report\'s `## Coverage` → `Not covered:` list, never in a closing question.\n' +
172
- 'REPORT COVERAGE: end your report with a `## Coverage` section — first a short ' +
173
- '`Tested:` list of the main areas / flows / controls you DID exercise, then a ' +
174
- '`Not covered:` list of anything you saw but did NOT test (and a few words on ' +
175
- 'why: out of scope, blocked, ran out of budget, destructive-and-skipped). This ' +
176
- 'tells the developer exactly what is verified vs still open — be honest about ' +
177
- 'gaps, do not claim coverage you did not do.\n' +
178
- 'CAPTURE CLEAN FLOWS: as you exercise the app, whenever you complete a coherent ' +
179
- 'end-to-end flow worth keeping as a regression test (e.g. "Log in", "Add item ' +
180
- 'to cart", "Submit the registration form"), call record_candidate with just a ' +
181
- 'short imperative name — IN ENGLISH (it becomes the spec\'s filename + test ' +
182
- 'name, even though your report prose is in another language). You do NOT pass ' +
183
- 'steps: Hover automatically captures the successful click / fill / select / ' +
184
- 'check / upload actions you did since your last record_candidate, so call it ' +
185
- 'the MOMENT you finish each distinct flow — before starting the next one or ' +
186
- 'doing unrelated exploration — so its captured steps are exactly that flow. ' +
187
- '(record_candidate only OFFERS the user a one-click "Crystallize" later — it ' +
188
- 'does not write a spec; you never write one yourself.)\n' +
189
- 'REMEMBER WHAT YOU LEARN: when you confirm a durable business rule about this ' +
190
- 'app — an expected behavior, a validation rule, an access policy, or the answer ' +
191
- 'to a "is this a bug or by-design?" you asked the user — call record_fact to ' +
192
- 'persist it, so neither you nor a future run re-asks it. State it as a clean ' +
193
- 'self-contained rule. RULES ONLY — never record secrets, passwords, tokens, or ' +
194
- 'personal data. (Anything in KNOWN BUSINESS KNOWLEDGE above is already ' +
195
- 'remembered — treat it as settled, do not re-ask it.)';
196
- /** Appended to the FIRST (functional verify) pass when a penetration-testing
197
- * pass is queued to run right after it. Keeps the two passes from both doing
198
- * security work: the verify pass stays functional-only, all security/vuln work
199
- * is deferred to the dedicated pentest pass. (Only added when QA has pentest on
200
- * AND this is the pre-pentest verify phase — see service.ts `splitting`.) */
201
- export const QA_VERIFY_DEFER_SECURITY_DIRECTIVE = 'SECURITY IS A SEPARATE LATER PASS — NOT THIS ONE. A dedicated penetration-' +
202
- 'testing pass runs right after this one and owns ALL security / vulnerability ' +
203
- 'work (auth / access control, IDOR, injection, secrets, endpoint abuse, ' +
204
- 'attacking the backend). In THIS pass do FUNCTIONAL testing ONLY: verify the ' +
205
- 'app WORKS — flows, forms, navigation, validation, state — and report only ' +
206
- 'functional defects. Even if the request mentions security, do NOT audit ' +
207
- 'security, do NOT read source looking for vulnerabilities, and do NOT report ' +
208
- 'security findings here. Leave every security concern to the pentest pass so ' +
209
- 'the two passes never duplicate each other.';
210
- export const GROUNDED_ACTUATION_DIRECTIVE = 'INTERACTING WITH THE PAGE — IMPORTANT: You interact with the page ONLY through ' +
211
- 'the Hover control tools: mcp__hovercontrol__click_control, fill_control, ' +
212
- 'select_control, check_control. You ALREADY HAVE FULL PERMISSION to use them — ' +
213
- 'NEVER ask the user to grant permissions, never stop to request access, never ' +
214
- 'narrate a permission request. Just call the tools and keep going until the task ' +
215
- 'is done. Each takes the element\'s accessible role + name exactly as shown in ' +
216
- 'the latest browser_snapshot (fall back to its testId, then its real visible ' +
217
- 'text, only when there is no clean role+name). Workflow: browser_snapshot to read ' +
218
- 'the real roles + names, then call the matching *_control tool for each field / ' +
219
- 'option / button, snapshotting again after navigation. (The Playwright ' +
220
- 'interaction tools are disabled — the control tools replace them, so saved ' +
221
- 'selectors stay grounded.) browser_navigate / browser_snapshot / ' +
222
- 'browser_wait_for / browser_tabs / browser_press_key remain available.\n\n' +
223
- 'WHEN A TARGET ISN\'T UNIQUELY ADDRESSABLE — narrow it, don\'t give up. Two ' +
224
- 'common reasons and the one principle that solves both: (a) its accessible ' +
225
- 'name/label repeats elsewhere on the page, or (b) its real input is hidden so ' +
226
- 'it isn\'t in the snapshot as a control and getByRole/check_control would time ' +
227
- 'out — in which case act on the element you CAN see (its visible label text). ' +
228
- 'Principle: scope to the smallest container in the snapshot that uniquely ' +
229
- 'holds your target by passing `within` = that container\'s role + accessible ' +
230
- 'name, then identify the target inside it (by text when its own name isn\'t ' +
231
- 'unique). To choose the right container and approach, read the snapshot tree, ' +
232
- 'take a mcp__hovercontrol__take_screenshot to SEE the real visual layout (the ' +
233
- 'accessibility tree omits display:none inputs, canvas, and can\'t convey ' +
234
- 'spatial grouping — the screenshot shows what the user actually sees), and ' +
235
- 'read the component source if you\'re unsure how it\'s built. take_screenshot ' +
236
- 'captures the CURRENT VIEWPORT only and never resizes the page — use it, NOT ' +
237
- 'Playwright\'s browser_take_screenshot (disabled here: its fullPage capture ' +
238
- 'resizes the live window, which can reset transient app state so you\'d never ' +
239
- 'see the result of your own action). To see below the fold, scroll first, then ' +
240
- 'take_screenshot. For FINDING elements, rely on browser_snapshot — its tree ' +
241
- 'covers the whole page (off-screen included), so a viewport shot never makes ' +
242
- 'you miss a control. Perceive with the screenshot; ACT through the grounded ' +
243
- '*_control tools. This is routine; work it ' +
244
- 'out and keep going rather than reporting it as a limitation.\n\n' +
245
- 'WHEN YOU ARE TRULY BLOCKED — ASK VIA THE CARD, DON\'T DEAD-END: only after ' +
246
- 'you\'ve tried to work it out yourself (re-read the snapshot, scope with ' +
247
- '`within`, read the component source), if something genuinely needs the user — ' +
248
- 'credentials you don\'t have, a file only they can provide, a decision only ' +
249
- 'they can make — call mcp__hovercontrol__ask_user. Never surface that as a ' +
250
- 'plain chat question and end your turn: the user can only answer an ask_user ' +
251
- 'card, so a bare question dead-ends the run. (WHEN to ask vs. keep going on ' +
252
- 'your own — and how to start and stop — is governed by the mode directive ' +
253
- 'below; this paragraph only fixes HOW to ask.) Engine helper: ' +
254
- 'mcp__hovercontrol__upload_file (path or placeholder) sets a file on an upload ' +
255
- 'control, since you have no filesystem access yourself.\n\n' +
256
- 'VOLATILE CONTENT — FLAG IT, DON\'T FREEZE IT. Two kinds of text live on a ' +
257
- 'page: FIXED UI labels the app ships (button / field / menu text like ' +
258
- '"Submit", "Email", "Add to cart") and APP DATA the page draws from its ' +
259
- 'content or state (a word on a card, a product or item title, a person\'s ' +
260
- 'name, a generated id, an order number, a date, a price, a count). Whenever the ' +
261
- 'name / text you ground on is APP DATA — NOT a fixed label — you MUST pass ' +
262
- 'dynamic:true AND anchor on something stable (a testId, the `within` container, ' +
263
- 'or just the role), never the changing text itself. Quick test before every ' +
264
- 'click/assert: "would this EXACT text be on the page on a fresh run with ' +
265
- 'different data?" If no → it is dynamic. (Example: a flashcard heading showing ' +
266
- 'the current word is APP DATA — click_control({ role: "heading", dynamic: true ' +
267
- '}), NOT { name: "bathroom" }.) A frozen data value makes the saved test pass ' +
268
- 'once and fail every run after.\n\n' +
269
- 'CAPTURE THE INVARIANT — assert what the flow PROVES, not this run\'s value: ' +
270
- 'when a flow reaches a state worth verifying, call ' +
271
- 'mcp__hovercontrol__assert_visible, and capture at least the key one before ' +
272
- 'record_candidate. Assert the CONTRACT (a result appears, a confirmation shows, ' +
273
- 'the expected number of items render). When the proof is that some APP DATA ' +
274
- 'showed up (a word, a row, a result), assert THAT element with dynamic:true + ' +
275
- 'matcher \'non-empty\' or \'text-contains\' — NOT a fixed button sitting next to ' +
276
- 'it, and NOT the literal value. Use \'text-exact\' only for genuinely fixed text.';
@@ -1,32 +0,0 @@
1
- /**
2
- * QA candidate-flow finalization.
3
- *
4
- * During a QA run the agent calls `record_candidate(name)` right after it
5
- * completes a coherent flow; the hover-control MCP captures the actual grounded
6
- * actuation steps since the previous marker and sends them along — so a
7
- * candidate already carries its real, replayable SkillSteps (no fragile
8
- * step-number citing). This module just validates + de-dupes them before they
9
- * become one-click "Crystallize" cards.
10
- *
11
- * Pure + side-effect-free so it can be unit-tested without a live run.
12
- */
13
- import type { SkillStep } from '../specs/specStep.js';
14
- /** What the agent recorded: a flow name + the real steps Hover captured for it. */
15
- export interface RecordedCandidate {
16
- name: string;
17
- description?: string;
18
- steps: SkillStep[];
19
- }
20
- /** A candidate ready to crystallize. */
21
- export interface ResolvedCandidate {
22
- name: string;
23
- description?: string;
24
- steps: SkillStep[];
25
- stepCount: number;
26
- }
27
- /**
28
- * Validate + de-dupe recorded candidates: drop ones with no name or no steps,
29
- * collapse identical repeats (same name + same step count), and stamp stepCount.
30
- */
31
- export declare function finalizeCandidates(candidates: readonly RecordedCandidate[]): ResolvedCandidate[];
32
- //# sourceMappingURL=candidates.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"candidates.d.ts","sourceRoot":"","sources":["../../src/qa/candidates.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAEtD,mFAAmF;AACnF,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,SAAS,EAAE,CAAC;CACpB;AAED,wCAAwC;AACxC,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,UAAU,EAAE,SAAS,iBAAiB,EAAE,GAAG,iBAAiB,EAAE,CAahG"}
@@ -1,20 +0,0 @@
1
- /**
2
- * Validate + de-dupe recorded candidates: drop ones with no name or no steps,
3
- * collapse identical repeats (same name + same step count), and stamp stepCount.
4
- */
5
- export function finalizeCandidates(candidates) {
6
- const out = [];
7
- const seen = new Set();
8
- for (const c of candidates) {
9
- const name = c.name?.trim();
10
- const steps = Array.isArray(c.steps) ? c.steps.filter((s) => s && s.kind === 'step') : [];
11
- if (!name || !steps.length)
12
- continue;
13
- const key = `${name}|${steps.length}`;
14
- if (seen.has(key))
15
- continue;
16
- seen.add(key);
17
- out.push({ name, description: c.description?.trim() || undefined, steps, stepCount: steps.length });
18
- }
19
- return out;
20
- }
@@ -1,19 +0,0 @@
1
- import type { SessionFinding } from '../sessions/sessions.js';
2
- export interface QaReportInput {
3
- prompt: string;
4
- summary: string;
5
- findings: SessionFinding[];
6
- endedAt: string;
7
- targetUrl?: string;
8
- }
9
- /** Render the report Markdown (pure — exported for testing). */
10
- export declare function renderQaReport(input: QaReportInput): string;
11
- /** Write the QA report into the run's folder as `report.md`. Each run (incl.
12
- * each phase of a two-pass run) has its own folder, so there's no name
13
- * collision. NEVER throws; returns the path or an error string. */
14
- export declare function writeQaReport(runDirPath: string, input: QaReportInput): Promise<{
15
- path: string;
16
- } | {
17
- error: string;
18
- }>;
19
- //# sourceMappingURL=qaReport.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"qaReport.d.ts","sourceRoot":"","sources":["../../src/qa/qaReport.ts"],"names":[],"mappings":"AAcA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAE9D,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,gEAAgE;AAChE,wBAAgB,cAAc,CAAC,KAAK,EAAE,aAAa,GAAG,MAAM,CAkB3D;AAED;;oEAEoE;AACpE,wBAAsB,aAAa,CACjC,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAS/C"}
@@ -1,50 +0,0 @@
1
- /**
2
- * QA report artifact — the durable, human-readable output of a QA Testing run.
3
- *
4
- * QA is report-first: a run produces findings (rendered live in the chat's
5
- * Findings card via the normal parseFindings pipeline) AND this persistent
6
- * Markdown report under `<devRoot>/.hover/qa-reports/<slug>.md`, mirroring
7
- * pentest's report file. Latest-run-wins per prompt slug (the session ledger
8
- * keeps the full history; this is the readable artifact).
9
- *
10
- * Best-effort by contract: a report-write failure must NEVER break a run or the
11
- * ledger (same rule as the session ledger + business memory).
12
- */
13
- import { mkdir, writeFile } from 'node:fs/promises';
14
- import { join } from 'node:path';
15
- /** Render the report Markdown (pure — exported for testing). */
16
- export function renderQaReport(input) {
17
- const { prompt, summary, findings, endedAt, targetUrl } = input;
18
- const meta = [endedAt, targetUrl, `${findings.length} finding${findings.length === 1 ? '' : 's'}`]
19
- .filter(Boolean)
20
- .join(' · ');
21
- const body = [`# QA report — ${prompt.trim()}`, '', `_${meta}_`];
22
- if (summary.trim())
23
- body.push('', summary.trim());
24
- body.push('', '## Findings');
25
- if (findings.length) {
26
- for (const f of findings) {
27
- const sev = (f.severity || 'note').trim();
28
- const head = f.title && f.title !== f.text ? `${f.title} — ` : '';
29
- body.push(`- **${sev}** — ${head}${f.text.trim()}`);
30
- }
31
- }
32
- else {
33
- body.push('_No issues found._');
34
- }
35
- return body.join('\n') + '\n';
36
- }
37
- /** Write the QA report into the run's folder as `report.md`. Each run (incl.
38
- * each phase of a two-pass run) has its own folder, so there's no name
39
- * collision. NEVER throws; returns the path or an error string. */
40
- export async function writeQaReport(runDirPath, input) {
41
- try {
42
- await mkdir(runDirPath, { recursive: true });
43
- const path = join(runDirPath, 'report.md');
44
- await writeFile(path, renderQaReport(input), 'utf-8');
45
- return { path };
46
- }
47
- catch (err) {
48
- return { error: err instanceof Error ? err.message : String(err) };
49
- }
50
- }