npm - @hover-dev/core - Versions diffs - 0.15.0 → 0.17.0 - Mend

@hover-dev/core 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

package/README.md +26 -55
package/dist/agentDirectives.d.ts +55 -0
package/dist/agentDirectives.d.ts.map +1 -0
package/dist/agentDirectives.js +276 -0
package/dist/agents/claude.d.ts.map +1 -1
package/dist/agents/claude.js +28 -3
package/dist/agents/codex.d.ts.map +1 -1
package/dist/agents/codex.js +38 -18
package/dist/agents/gemini.d.ts.map +1 -1
package/dist/agents/gemini.js +3 -14
package/dist/agents/invoke.d.ts.map +1 -1
package/dist/agents/invoke.js +3 -6
package/dist/agents/qwen.d.ts.map +1 -1
package/dist/agents/qwen.js +3 -14
package/dist/agents/registry.d.ts.map +1 -1
package/dist/agents/registry.js +0 -4
package/dist/agents/shared.d.ts +28 -0
package/dist/agents/shared.d.ts.map +1 -0
package/dist/agents/shared.js +35 -0
package/dist/agents/types.d.ts +19 -11
package/dist/agents/types.d.ts.map +1 -1
package/dist/engine.d.ts +53 -0
package/dist/engine.d.ts.map +1 -0
package/dist/engine.js +78 -0
package/dist/mcp/actuateServer.d.ts +3 -0
package/dist/mcp/actuateServer.d.ts.map +1 -0
package/dist/mcp/actuateServer.js +594 -0
package/dist/mcp/sourceFence.d.ts +23 -0
package/dist/mcp/sourceFence.d.ts.map +1 -0
package/dist/mcp/sourceFence.js +79 -0
package/dist/mcp/sourceServer.d.ts +3 -0
package/dist/mcp/sourceServer.d.ts.map +1 -0
package/dist/mcp/sourceServer.js +191 -0
package/dist/memory/businessMemory.d.ts +29 -0
package/dist/memory/businessMemory.d.ts.map +1 -0
package/dist/memory/businessMemory.js +125 -0
package/dist/modes.d.ts +39 -0
package/dist/modes.d.ts.map +1 -0
package/dist/modes.js +34 -0
package/dist/playwright/cdpStatus.d.ts +0 -15
package/dist/playwright/cdpStatus.d.ts.map +1 -1
package/dist/playwright/cdpStatus.js +0 -67
package/dist/playwright/launchChrome.d.ts +18 -0
package/dist/playwright/launchChrome.d.ts.map +1 -1
package/dist/playwright/launchChrome.js +46 -3
package/dist/playwright/preflight.d.ts.map +1 -1
package/dist/playwright/preflight.js +6 -1
package/dist/playwright/resolveMcpConfig.d.ts +12 -0
package/dist/playwright/resolveMcpConfig.d.ts.map +1 -1
package/dist/playwright/resolveMcpConfig.js +36 -5
package/dist/plugin-api.d.ts +35 -26
package/dist/plugin-api.d.ts.map +1 -1
package/dist/plugin-api.js +2 -2
package/dist/qa/candidates.d.ts +32 -0
package/dist/qa/candidates.d.ts.map +1 -0
package/dist/qa/candidates.js +20 -0
package/dist/qa/classify.d.ts +38 -0
package/dist/qa/classify.d.ts.map +1 -0
package/dist/qa/classify.js +138 -0
package/dist/qa/intensity.d.ts +33 -0
package/dist/qa/intensity.d.ts.map +1 -0
package/dist/qa/intensity.js +25 -0
package/dist/qa/qaReport.d.ts +19 -0
package/dist/qa/qaReport.d.ts.map +1 -0
package/dist/qa/qaReport.js +50 -0
package/dist/runSession.d.ts +14 -3
package/dist/runSession.d.ts.map +1 -1
package/dist/runSession.js +31 -11
package/dist/service/cdpHandlers.d.ts +3 -27
package/dist/service/cdpHandlers.d.ts.map +1 -1
package/dist/service/cdpHandlers.js +6 -53
package/dist/service/cdpHint.d.ts +21 -28
package/dist/service/cdpHint.d.ts.map +1 -1
package/dist/service/cdpHint.js +106 -164
package/dist/service/relayHandlers.d.ts +28 -0
package/dist/service/relayHandlers.d.ts.map +1 -0
package/dist/service/relayHandlers.js +105 -0
package/dist/service/saveHandlers.d.ts +1 -3
package/dist/service/saveHandlers.d.ts.map +1 -1
package/dist/service/saveHandlers.js +17 -15
package/dist/service/types.d.ts +108 -8
package/dist/service/types.d.ts.map +1 -1
package/dist/service.d.ts +13 -3
package/dist/service.d.ts.map +1 -1
package/dist/service.js +1022 -236
package/dist/sessions/sessions.d.ts +125 -0
package/dist/sessions/sessions.d.ts.map +1 -0
package/dist/sessions/sessions.js +175 -0
package/dist/specs/authFixture.d.ts +30 -0
package/dist/specs/authFixture.d.ts.map +1 -0
package/dist/specs/authFixture.js +145 -0
package/dist/specs/businessMap.d.ts +29 -0
package/dist/specs/businessMap.d.ts.map +1 -0
package/dist/specs/businessMap.js +95 -0
package/dist/specs/detectSharedFlows.d.ts +1 -1
package/dist/specs/detectSharedFlows.d.ts.map +1 -1
package/dist/specs/detectSharedFlows.js +20 -21
package/dist/specs/generatePageObject.d.ts +1 -1
package/dist/specs/generatePageObject.d.ts.map +1 -1
package/dist/specs/healPrompt.d.ts +19 -0
package/dist/specs/healPrompt.d.ts.map +1 -0
package/dist/specs/healPrompt.js +48 -0
package/dist/specs/humanSteps.d.ts +4 -8
package/dist/specs/humanSteps.d.ts.map +1 -1
package/dist/specs/humanSteps.js +6 -1
package/dist/specs/optimizeSpec.d.ts +15 -8
package/dist/specs/optimizeSpec.d.ts.map +1 -1
package/dist/specs/optimizeSpec.js +98 -46
package/dist/specs/optimizeSpecWithAgent.d.ts +0 -2
package/dist/specs/optimizeSpecWithAgent.d.ts.map +1 -1
package/dist/specs/optimizeSpecWithAgent.js +0 -1
package/dist/specs/pageObjectManifest.d.ts +3 -1
package/dist/specs/pageObjectManifest.d.ts.map +1 -1
package/dist/specs/pageObjectManifest.js +13 -9
package/dist/specs/replayGrounded.d.ts +45 -0
package/dist/specs/replayGrounded.d.ts.map +1 -0
package/dist/specs/replayGrounded.js +155 -0
package/dist/specs/runFailures.d.ts +34 -0
package/dist/specs/runFailures.d.ts.map +1 -0
package/dist/specs/runFailures.js +93 -0
package/dist/specs/seeds.d.ts +16 -15
package/dist/specs/seeds.d.ts.map +1 -1
package/dist/specs/seeds.js +86 -54
package/dist/specs/sidecar.d.ts +34 -6
package/dist/specs/sidecar.d.ts.map +1 -1
package/dist/specs/sidecar.js +79 -9
package/dist/specs/softBatch.d.ts +14 -0
package/dist/specs/softBatch.d.ts.map +1 -0
package/dist/specs/softBatch.js +177 -0
package/dist/specs/specStep.d.ts +21 -0
package/dist/specs/specStep.d.ts.map +1 -0
package/dist/specs/specStep.js +1 -0
package/dist/specs/text.d.ts +19 -0
package/dist/specs/text.d.ts.map +1 -0
package/dist/specs/text.js +27 -0
package/dist/specs/writeSpec.d.ts +62 -1
package/dist/specs/writeSpec.d.ts.map +1 -1
package/dist/specs/writeSpec.js +598 -30
package/package.json +10 -10
package/dist/agents/aider.d.ts +0 -16
package/dist/agents/aider.d.ts.map +0 -1
package/dist/agents/aider.js +0 -169
package/dist/agents/cursor.d.ts +0 -18
package/dist/agents/cursor.d.ts.map +0 -1
package/dist/agents/cursor.js +0 -229
package/dist/playwright/raiseWindow.d.ts +0 -10
package/dist/playwright/raiseWindow.d.ts.map +0 -1
package/dist/playwright/raiseWindow.js +0 -139
package/dist/scripts/bench-multi-tab.d.ts +0 -2
package/dist/scripts/bench-multi-tab.d.ts.map +0 -1
package/dist/scripts/bench-multi-tab.js +0 -192
package/dist/scripts/bench-ttfb.d.ts +0 -2
package/dist/scripts/bench-ttfb.d.ts.map +0 -1
package/dist/scripts/bench-ttfb.js +0 -127
package/dist/scripts/start-chrome.d.ts +0 -3
package/dist/scripts/start-chrome.d.ts.map +0 -1
package/dist/scripts/start-chrome.js +0 -23
package/dist/skills/writeSkill.d.ts +0 -27
package/dist/skills/writeSkill.d.ts.map +0 -1
package/dist/skills/writeSkill.js +0 -13
package/dist/specs/listSpecs.d.ts +0 -52
package/dist/specs/listSpecs.d.ts.map +0 -1
package/dist/specs/listSpecs.js +0 -139
package/dist/specs/optimizationSuggestion.d.ts +0 -26
package/dist/specs/optimizationSuggestion.d.ts.map +0 -1
package/dist/specs/optimizationSuggestion.js +0 -28
package/dist/specs/writeCaseCsv.d.ts +0 -28
package/dist/specs/writeCaseCsv.d.ts.map +0 -1
package/dist/specs/writeCaseCsv.js +0 -140

package/dist/qa/intensity.js ADDED Viewed

@@ -0,0 +1,25 @@
+export const QA_INTENSITY = {
+    quick: { label: 'Quick', maxSteps: 45, blurb: 'a fast pass over the main flows — breadth over depth (~20–45 steps)' },
+    standard: { label: 'Standard', maxSteps: 150, blurb: 'the main flows plus key negative tests (~45–150 steps)' },
+    deep: { label: 'Deep', maxSteps: 500, blurb: 'exhaustive — every reachable control and state (~150–500 steps)' },
+};
+export const DEFAULT_QA_INTENSITY = 'standard';
+/** Coerce arbitrary input (from the run payload) to a valid intensity. */
+export function asQaIntensity(v) {
+    return v === 'quick' || v === 'deep' || v === 'standard' ? v : DEFAULT_QA_INTENSITY;
+}
+/**
+ * Prompt directive: tell the agent its STEP budget so it paces and ALWAYS wraps
+ * up with a report before the ceiling. The `--max-turns` backstop is the hard
+ * limit; this prose is what guarantees a report.
+ */
+export function qaBudgetDirective(intensity) {
+    const spec = QA_INTENSITY[intensity];
+    const wrapAt = Math.max(5, spec.maxSteps - Math.ceil(spec.maxSteps * 0.1));
+    return (`RUN BUDGET — ${spec.label}: ${spec.blurb}. You have about ${spec.maxSteps} steps ` +
+        `(tool actions) this run, enforced. Pace yourself to fit: cover the most ` +
+        `important flows FIRST. By roughly step ${wrapAt}, STOP exploring and ` +
+        `immediately WRITE YOUR FINDINGS REPORT (and record any clean candidate flows) ` +
+        `while you still can — never end a run without a report. On Quick, be decisive ` +
+        `and favour breadth; on Deep, be exhaustive.`);
+}

package/dist/qa/qaReport.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import type { SessionFinding } from '../sessions/sessions.js';
+export interface QaReportInput {
+    prompt: string;
+    summary: string;
+    findings: SessionFinding[];
+    endedAt: string;
+    targetUrl?: string;
+}
+/** Render the report Markdown (pure — exported for testing). */
+export declare function renderQaReport(input: QaReportInput): string;
+/** Write the QA report into the run's folder as `report.md`. Each run (incl.
+ *  each phase of a two-pass run) has its own folder, so there's no name
+ *  collision. NEVER throws; returns the path or an error string. */
+export declare function writeQaReport(runDirPath: string, input: QaReportInput): Promise<{
+    path: string;
+} | {
+    error: string;
+}>;
+//# sourceMappingURL=qaReport.d.ts.map

package/dist/qa/qaReport.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"qaReport.d.ts","sourceRoot":"","sources":["../../src/qa/qaReport.ts"],"names":[],"mappings":"AAcA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAE9D,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,gEAAgE;AAChE,wBAAgB,cAAc,CAAC,KAAK,EAAE,aAAa,GAAG,MAAM,CAkB3D;AAED;;oEAEoE;AACpE,wBAAsB,aAAa,CACjC,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAS/C"}

package/dist/qa/qaReport.js ADDED Viewed

@@ -0,0 +1,50 @@
+/**
+ * QA report artifact — the durable, human-readable output of a QA Testing run.
+ *
+ * QA is report-first: a run produces findings (rendered live in the chat's
+ * Findings card via the normal parseFindings pipeline) AND this persistent
+ * Markdown report under `<devRoot>/.hover/qa-reports/<slug>.md`, mirroring
+ * pentest's report file. Latest-run-wins per prompt slug (the session ledger
+ * keeps the full history; this is the readable artifact).
+ *
+ * Best-effort by contract: a report-write failure must NEVER break a run or the
+ * ledger (same rule as the session ledger + business memory).
+ */
+import { mkdir, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+/** Render the report Markdown (pure — exported for testing). */
+export function renderQaReport(input) {
+    const { prompt, summary, findings, endedAt, targetUrl } = input;
+    const meta = [endedAt, targetUrl, `${findings.length} finding${findings.length === 1 ? '' : 's'}`]
+        .filter(Boolean)
+        .join(' · ');
+    const body = [`# QA report — ${prompt.trim()}`, '', `_${meta}_`];
+    if (summary.trim())
+        body.push('', summary.trim());
+    body.push('', '## Findings');
+    if (findings.length) {
+        for (const f of findings) {
+            const sev = (f.severity || 'note').trim();
+            const head = f.title && f.title !== f.text ? `${f.title} — ` : '';
+            body.push(`- **${sev}** — ${head}${f.text.trim()}`);
+        }
+    }
+    else {
+        body.push('_No issues found._');
+    }
+    return body.join('\n') + '\n';
+}
+/** Write the QA report into the run's folder as `report.md`. Each run (incl.
+ *  each phase of a two-pass run) has its own folder, so there's no name
+ *  collision. NEVER throws; returns the path or an error string. */
+export async function writeQaReport(runDirPath, input) {
+    try {
+        await mkdir(runDirPath, { recursive: true });
+        const path = join(runDirPath, 'report.md');
+        await writeFile(path, renderQaReport(input), 'utf-8');
+        return { path };
+    }
+    catch (err) {
+        return { error: err instanceof Error ? err.message : String(err) };
+    }
+}

package/dist/runSession.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { InvokeEvent } from './agents/types.js';
-import type { SkillStep } from './skills/writeSkill.js';
+import type { SkillStep } from './specs/specStep.js';
 export interface RunSessionOptions {
     prompt: string;
     agentId: string;
@@ -7,9 +7,14 @@ export interface RunSessionOptions {
      *  is supplied (the service passes a pre-built config; the CLI passes this). */
     cdpUrl?: string;
     model?: string;
+    /** Reasoning-effort level forwarded to the agent (claude --effort / codex
+     *  -c model_reasoning_effort). Undefined = agent/model default. */
+    effort?: string;
+    /** Extra env for the spawned CLI (Local LLM: OPENAI_BASE_URL / _API_KEY). */
+    env?: Record<string, string>;
     maxBudgetUsd?: number;
-    /** Optional model API key, injected into the spawned CLI's env. */
-    apiKey?: string;
+    /** Hard ceiling on agent turns (~steps) — QA intensity step budget. */
+    maxTurns?: number;
     /** Agent cwd (project root) — where Claude Code reads CLAUDE.md and where a
      *  `--save` / re-record writes the spec. Defaults to the process cwd. */
     cwd?: string;
@@ -23,6 +28,12 @@ export interface RunSessionOptions {
     /** Extra hard-sandbox allow-list prefixes — e.g. active-mode plugin MCP
      *  server ids the service contributes. Appended to ['mcp__playwright']. */
     allowedToolsExtra?: string[];
+    /** Extra hard-sandbox deny entries — specific tools to forbid even though
+     *  their server is allowed. Normal mode passes the Playwright interaction
+     *  tools (browser_click / _type / _fill_form / _select_option) here so the
+     *  agent must use the grounded mcp__hover-control__* actuation tools, whose
+     *  role+name selectors crystallize 1:1 instead of confabulating getByText. */
+    disallowedToolsExtra?: string[];
     /** Appended to the agent's system prompt (the service folds in cdpHint +
      *  conventions + plugin additions + a language directive; the CLI omits it). */
     appendSystemPrompt?: string;

package/dist/runSession.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"runSession.d.ts","sourceRoot":"","sources":["../src/runSession.ts"],"names":[],"mappings":"AAoBA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,~~wBAAwB~~,CAAC;~~AAGxD~~,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB;oFACgF;IAChF,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,~~mEAAmE~~;~~IACnE~~,~~MAAM~~,CAAC,EAAE,MAAM,CAAC;~~IAChB~~;6EACyE;IACzE,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,kEAAkE;IAClE,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB;;yCAEqC;IACrC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;+EAC2E;IAC3E,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B;oFACgF;IAChF,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,2DAA2D;IAC3D,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B;mCAC+B;IAC/B,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,yCAAyC;IACzC,OAAO,EAAE,MAAM,CAAC;IAChB,qDAAqD;IACrD,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,wBAAsB,UAAU,CAC9B,IAAI,EAAE,iBAAiB,EACvB,OAAO,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,GACjC,OAAO,CAAC,gBAAgB,CAAC,~~CAuD3B~~"}
1	+ {"version":3,"file":"runSession.d.ts","sourceRoot":"","sources":["../src/runSession.ts"],"names":[],"mappings":"AAoBA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAGrD,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB;oFACgF;IAChF,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;uEACmE;IACnE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,6EAA6E;IAC7E,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,uEAAuE;IACvE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;6EACyE;IACzE,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,kEAAkE;IAClE,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB;;yCAEqC;IACrC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;+EAC2E;IAC3E,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B;;;;kFAI8E;IAC9E,oBAAoB,CAAC,EAAE,MAAM,EAAE,CAAC;IAChC;oFACgF;IAChF,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,2DAA2D;IAC3D,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B;mCAC+B;IAC/B,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,yCAAyC;IACzC,OAAO,EAAE,MAAM,CAAC;IAChB,qDAAqD;IACrD,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,wBAAsB,UAAU,CAC9B,IAAI,EAAE,iBAAiB,EACvB,OAAO,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,GACjC,OAAO,CAAC,gBAAgB,CAAC,CAyE3B"}

package/dist/runSession.js CHANGED Viewed

@@ -27,6 +27,11 @@ export async function runSession(opts, onEvent) {
     const steps = [{ kind: 'user', text: opts.prompt }];
     let summary = '';
     let isError = false;
+    // Index of the most recently captured tool step, so the tool_result that
+    // follows can mark whether that action errored. Without this, every captured
+    // step looks successful and the agent's failed exploration attempts get
+    // crystallized into the spec as if they were real flow.
+    let lastStepIdx = -1;
     const mcpConfig = opts.mcpConfig ??
         resolveMcpConfig({
             cdpUrl: opts.cdpUrl ?? 'http://localhost:9222',
@@ -43,25 +48,35 @@ export async function runSession(opts, onEvent) {
         mcpConfig,
         cwd: opts.cwd,
         appendSystemPrompt: opts.appendSystemPrompt,
-        // Hard sandbox: only Playwright MCP (+ any active-mode plugin servers) is
-        // callable, every built-in tool denied — a hijacked prompt can't reach the
-        // shell or filesystem. Soft agents (codex, …) enforce their own sandbox via
-        // buildArgs, so the lists stay undefined for them — exactly what the
-        // service does.
-        allowedTools: isHardSandbox
-            ? ['mcp__playwright', ...(opts.allowedToolsExtra ?? [])]
-            : undefined,
+        // The allowed-tool set (Playwright MCP + the active mode's plugin servers:
+        // hover-control, api-test flows, source reader, …) is the SAME for every
+        // agent — hard-sandbox agents enforce it via --allowedTools; soft agents
+        // (codex) surface it in their developer_instructions so they don't
+        // self-restrict to Playwright and refuse the plugin tools (e.g. api_request).
+        // The DISallow list is hard-sandbox only (soft agents can't enforce it).
+        allowedTools: ['mcp__playwright', ...(opts.allowedToolsExtra ?? [])],
         disallowedTools: isHardSandbox
-            ? (descriptor?.defaultDisallowedTools ? [...descriptor.defaultDisallowedTools] : undefined)
+            ? [...(descriptor?.defaultDisallowedTools ?? []), ...(opts.disallowedToolsExtra ?? [])]
             : undefined,
         maxBudgetUsd: opts.maxBudgetUsd,
+        maxTurns: opts.maxTurns,
         model: opts.model,
-        apiKey: opts.apiKey,
+        effort: opts.effort,
+        env: opts.env,
         signal: opts.signal,
     })) {
         onEvent(ev);
         if (ev.kind === 'tool_use') {
-            steps.push({ kind: 'step', tool: ev.tool, input: ev.input });
+            lastStepIdx = steps.push({ kind: 'step', tool: ev.tool, input: ev.input }) - 1;
+        }
+        else if (ev.kind === 'tool_result') {
+            // Mark the step this result belongs to (the normalized stream emits
+            // tool_result right after its tool_use). A failed action stays in the
+            // sidecar as part of the full-fidelity record, but writeSpec drops it from
+            // the runnable spec so the artifact reflects the working flow, not the agent's
+            // trial-and-error.
+            if (lastStepIdx >= 0 && ev.isError)
+                steps[lastStepIdx].isError = true;
         }
         else if (ev.kind === 'session_end') {
             if (ev.summary)
@@ -70,6 +85,11 @@ export async function runSession(opts, onEvent) {
                 isError = true;
         }
     }
+    // On abort (opts.signal), invokeAgent SIGTERMs the child and no session_end
+    // arrives, so the error flag above never gets set. Honour the doc contract
+    // ("True if the run ended in error or was aborted") by flipping it here.
+    if (opts.signal?.aborted)
+        isError = true;
     if (summary)
         steps.push({ kind: 'done', summary });
     return { steps, summary, isError };

package/dist/service/cdpHandlers.d.ts CHANGED Viewed

@@ -1,11 +1,8 @@
 /**
  * CDP-related WebSocket message handlers.
  *
- *   check-cdp     → checkCdpStatus → emit cdp-status
  *   launch-chrome → emit "launching" placeholder → launchDebugChrome →
  *                   re-check status → emit cdp-status
- *   focus-debug   → focusDebugTab → no message on success (the widget the
- *                   user is about to focus runs its own check-cdp anyway)
  *
  * Extracted from service.ts during the v0.2.x refactor pass so the main
  * file can be a thin orchestrator.
@@ -14,22 +11,9 @@ import type { WebSocket } from 'ws';
 import { type LaunchOptions } from '../playwright/launchChrome.js';
 import { type ClientMessage } from './types.js';
 /** Extra launch options surfaced from the active mode (security plugin
- *  needs proxy + spki + separate profile + non-default CDP port). When
- *  none are set, behaviour is identical to pre-v0.7 normal-mode launch. */
-export type LaunchExtras = Pick<LaunchOptions, 'userDataDir' | 'proxy'> & {
-    /** Override CDP port (mode-specific, e.g. 9333 for security). When set,
-     *  this also wins over the `port` parsed from cdpUrl. */
-    cdpPort?: number;
-};
-/**
- * "Is this widget running inside the debug Chrome?" The widget asks this on
- * connect (and after every status-changing event) so it can render itself as
- * either:
- *   - same-window  → normal, drives the page
- *   - wrong-window → disabled, with a "use the other window" notice
- *   - no-cdp       → enabled but click triggers launch-chrome instead
- */
-export declare function handleCheckCdp(ws: WebSocket, msg: ClientMessage, cdpUrl: string, extras?: LaunchExtras): Promise<void>;
+ *  needs a resident proxy + spki). When none are set, behaviour is identical
+ *  to pre-v0.7 normal-mode launch. */
+export type LaunchExtras = Pick<LaunchOptions, 'proxy' | 'userDataDir'>;
 /**
  * Launch a debug Chrome navigated to `pageUrl`, then re-check status. The
  * re-check usually returns 'wrong-window' (because the widget asking is in
@@ -37,12 +21,4 @@ export declare function handleCheckCdp(ws: WebSocket, msg: ClientMessage, cdpUrl
  * displays the "use the other window" state.
  */
 export declare function handleLaunchChrome(ws: WebSocket, msg: ClientMessage, cdpUrl: string, extras?: LaunchExtras): Promise<void>;
-/**
- * bringToFront the debug-Chrome tab matching `pageUrl`'s origin (or open one
- * if none exists). Used by the wrong-window UI's "switch to debug Chrome"
- * button. Doesn't return cdp-status — bringToFront doesn't change anything
- * the widget cares about, and the widget the user is about to focus is a
- * different page (and will run its own check-cdp on its own ws connection).
- */
-export declare function handleFocusDebug(ws: WebSocket, msg: ClientMessage, cdpUrl: string, extras?: LaunchExtras): Promise<void>;
 //# sourceMappingURL=cdpHandlers.d.ts.map

package/dist/service/cdpHandlers.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"cdpHandlers.d.ts","sourceRoot":"","sources":["../../src/service/cdpHandlers.ts"],"names":[],"mappings":"AAAA~~;;;;;;;;;;;GAWG~~;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAEpC,OAAO,EAAqB,KAAK,aAAa,EAAE,MAAM,+BAA+B,CAAC;AACtF,OAAO,EAAQ,KAAK,aAAa,EAAE,MAAM,YAAY,CAAC;AAEtD;;~~2EAE2E~~;~~AAC3E~~,MAAM,MAAM,YAAY,GAAG,IAAI,CAAC,aAAa,EAAE,~~aAAa,GAAG,~~OAAO,~~CAAC,~~GAAG~~;IACxE;6DACyD;IACzD~~,~~OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,CAAC;AAEF;;;;;;;GAOG;AACH,wBAAsB,cAAc,CAClC,EAAE,EAAE,SAAS,EACb,GAAG,EAAE,~~aAAa,~~EAClB,MAAM,EAAE,MAAM,EACd,MAAM,~~CAAC,~~EAAE,YAAY,GACpB,OAAO,~~CAAC~~,IAAI,CAAC,CAWf~~;~~AAED~~;;;;;GAKG;AACH,wBAAsB,kBAAkB,CACtC,EAAE,EAAE,SAAS,EACb,GAAG,EAAE,aAAa,EAClB,MAAM,EAAE,MAAM,EACd,MAAM,CAAC,EAAE,YAAY,GACpB,OAAO,CAAC,IAAI,CAAC,~~CAkCf;AAED;;;;;;GAMG;AACH,wBAAsB,gBAAgB,CACpC,EAAE,EAAE,SAAS,EACb,GAAG,EAAE,aAAa,EAClB,MAAM,EAAE,MAAM,EACd,MAAM,CAAC,EAAE,YAAY,GACpB,OAAO,CAAC,IAAI,CAAC,CAaf~~"}
1	+ {"version":3,"file":"cdpHandlers.d.ts","sourceRoot":"","sources":["../../src/service/cdpHandlers.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAEpC,OAAO,EAAqB,KAAK,aAAa,EAAE,MAAM,+BAA+B,CAAC;AACtF,OAAO,EAAQ,KAAK,aAAa,EAAE,MAAM,YAAY,CAAC;AAEtD;;sCAEsC;AACtC,MAAM,MAAM,YAAY,GAAG,IAAI,CAAC,aAAa,EAAE,OAAO,GAAG,aAAa,CAAC,CAAC;AAExE;;;;;GAKG;AACH,wBAAsB,kBAAkB,CACtC,EAAE,EAAE,SAAS,EACb,GAAG,EAAE,aAAa,EAClB,MAAM,EAAE,MAAM,EACd,MAAM,CAAC,EAAE,YAAY,GACpB,OAAO,CAAC,IAAI,CAAC,CA+Bf"}

package/dist/service/cdpHandlers.js CHANGED Viewed

@@ -1,38 +1,15 @@
 /**
  * CDP-related WebSocket message handlers.
  *
- *   check-cdp     → checkCdpStatus → emit cdp-status
  *   launch-chrome → emit "launching" placeholder → launchDebugChrome →
  *                   re-check status → emit cdp-status
- *   focus-debug   → focusDebugTab → no message on success (the widget the
- *                   user is about to focus runs its own check-cdp anyway)
  *
  * Extracted from service.ts during the v0.2.x refactor pass so the main
  * file can be a thin orchestrator.
  */
-import { checkCdpStatus, focusDebugTab } from '../playwright/cdpStatus.js';
+import { checkCdpStatus } from '../playwright/cdpStatus.js';
 import { launchDebugChrome } from '../playwright/launchChrome.js';
 import { send } from './types.js';
-/**
- * "Is this widget running inside the debug Chrome?" The widget asks this on
- * connect (and after every status-changing event) so it can render itself as
- * either:
- *   - same-window  → normal, drives the page
- *   - wrong-window → disabled, with a "use the other window" notice
- *   - no-cdp       → enabled but click triggers launch-chrome instead
- */
-export async function handleCheckCdp(ws, msg, cdpUrl, extras) {
-    const pageUrl = msg.payload?.pageUrl;
-    if (typeof pageUrl !== 'string' || !pageUrl) {
-        send(ws, { type: 'error', payload: { message: 'check-cdp: pageUrl is required' } });
-        return;
-    }
-    const effectiveCdpUrl = extras?.cdpPort
-        ? `http://localhost:${extras.cdpPort}`
-        : cdpUrl;
-    const status = await checkCdpStatus(effectiveCdpUrl, pageUrl);
-    send(ws, { type: 'cdp-status', payload: status });
-}
 /**
  * Launch a debug Chrome navigated to `pageUrl`, then re-check status. The
  * re-check usually returns 'wrong-window' (because the widget asking is in
@@ -48,7 +25,7 @@ export async function handleLaunchChrome(ws, msg, cdpUrl, extras) {
     // Tell the widget we're launching so it can render a spinner immediately —
     // findChromeBinary + spawn + ready-poll can take a few seconds.
     send(ws, { type: 'cdp-status', payload: { state: 'no-cdp', launching: true } });
-    const port = extras?.cdpPort ?? (() => {
+    const port = (() => {
         try {
             return Number(new URL(cdpUrl).port) || 9222;
         }
@@ -59,39 +36,15 @@ export async function handleLaunchChrome(ws, msg, cdpUrl, extras) {
     const result = await launchDebugChrome({
         url: pageUrl,
         port,
-        userDataDir: extras?.userDataDir,
         proxy: extras?.proxy,
+        userDataDir: extras?.userDataDir,
+        headless: msg.payload?.headless === true,
+        force: msg.payload?.force === true,
     });
     if (!result.ok) {
         send(ws, { type: 'cdp-status', payload: { state: 'no-cdp', reason: result.reason } });
         return;
     }
-    // Re-check status against the port we actually launched on, so a mode-
-    // specific port (9333 for security) doesn't get probed at 9222.
-    const effectiveCdpUrl = extras?.cdpPort
-        ? `http://localhost:${extras.cdpPort}`
-        : cdpUrl;
-    const status = await checkCdpStatus(effectiveCdpUrl, pageUrl);
+    const status = await checkCdpStatus(cdpUrl, pageUrl);
     send(ws, { type: 'cdp-status', payload: status });
 }
-/**
- * bringToFront the debug-Chrome tab matching `pageUrl`'s origin (or open one
- * if none exists). Used by the wrong-window UI's "switch to debug Chrome"
- * button. Doesn't return cdp-status — bringToFront doesn't change anything
- * the widget cares about, and the widget the user is about to focus is a
- * different page (and will run its own check-cdp on its own ws connection).
- */
-export async function handleFocusDebug(ws, msg, cdpUrl, extras) {
-    const pageUrl = msg.payload?.pageUrl;
-    if (typeof pageUrl !== 'string' || !pageUrl) {
-        send(ws, { type: 'error', payload: { message: 'focus-debug: pageUrl is required' } });
-        return;
-    }
-    const effectiveCdpUrl = extras?.cdpPort
-        ? `http://localhost:${extras.cdpPort}`
-        : cdpUrl;
-    const result = await focusDebugTab(effectiveCdpUrl, pageUrl);
-    if (!result.ok) {
-        send(ws, { type: 'error', payload: { message: `focus-debug: ${result.reason}` } });
-    }
-}

package/dist/service/cdpHint.d.ts CHANGED Viewed

@@ -1,27 +1,24 @@
 /**
  * System-prompt addendum sent to the agent on every command.
  *
- * Two roles:
- *   1. Navigation rules — the most failure-prone agent behaviours are
- *      `browser_navigate` to same-origin paths (kills the widget) and
- *      reading the JS bundle for credentials. We tell the agent both
- *      mistakes by name, including the actual origin to forbid.
- *   2. Narration format — how the widget renders the run depends on the
- *      agent emitting short imperative one-liners before each logical
- *      step. The good/bad examples are present-tense and 3–8 words.
+ * Principle-first and deliberately short (v0.16 prompt-trim pass). With
+ * Opus 4.x, emphatic "do NOT / CRITICAL" rule-stacking over-triggers and the
+ * middle of a long prompt gets ignored, so behaviour is steered with a few
+ * stated principles — each negative carrying its reason — rather than an
+ * enumerated rule list. Ordering follows attention, not chronology: the
+ * highest-value instructions (verify, trust boundary, scope) sit at the top,
+ * the volatile tab snapshot at the very bottom.
  *
  * Lives in its own file because this string is the most-tuned text in the
- * repo and the easiest to break with a typo. Tests can import directly.
+ * repo and the easiest to break with a typo. Tests import it directly.
  *
- * Two-tier split (since v0.4.x perf pass):
- *   - `buildCdpHint(tabs)` returns the full rules + narration block.
- *     Used on the *first* turn of a session (no `--resume`).
- *   - `buildCdpHintResume(tabs)` returns ONLY the volatile tab list +
- *     active-origin guard. Used on subsequent turns once `--resume`
- *     re-anchors the agent to the prior turn's full system prompt —
- *     the stable rules are already in context, so re-sending them
- *     fragments Anthropic's prompt cache and bills ~500 extra input
- *     tokens per turn for zero behavioural change.
+ * Two-tier split (prompt-cache aware):
+ *   - `buildCdpHint(tabs)`: the full block. First turn of a session (no
+ *     `--resume`).
+ *   - `buildCdpHintResume(tabs)`: ONLY the volatile tab list — the rules
+ *     persist in the agent's context from turn 1. Re-sending the stable rules
+ *     each turn would fragment Anthropic's prompt cache and bill ~500 extra
+ *     input tokens per turn for zero behavioural change.
  */
 interface Tab {
     url: string;
@@ -32,16 +29,12 @@ export declare function buildCdpHint(tabs: Tab[]): string;
  * Volatile-only hint for `--resume` turns: just the tab list snapshot.
  * Empty string when the tab list is empty (nothing to refresh).
  *
- * The rules and narration format from `buildCdpHint` are already
- * established in the prior turn's context; re-sending them here would
- * fragment Anthropic's prompt-cache fingerprint (cache hits require the
- * system prompt to match byte-for-byte across turns) and bill ~500
- * extra input tokens per follow-up turn for no behaviour change.
- *
- * We DO re-send the tab list because it can drift between turns (user
- * opens a second tab, switches focus). The active-origin nav-guard is
- * not repeated — the agent has it from turn 1 and the tab-list update
- * keeps it grounded in the current URL.
+ * The rules and narration format from `buildCdpHint` are already established
+ * in the prior turn's context; re-sending them here would fragment Anthropic's
+ * prompt-cache fingerprint (cache hits require the system prompt to match
+ * byte-for-byte across turns) and bill ~500 extra input tokens per follow-up
+ * turn for no behaviour change. We DO re-send the tab list because it drifts
+ * between turns (user opens a second tab, switches focus).
  */
 export declare function buildCdpHintResume(tabs: Tab[]): string;
 export {};

package/dist/service/cdpHint.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"cdpHint.d.ts","sourceRoot":"","sources":["../../src/service/cdpHint.ts"],"names":[],"mappings":"AAAA~~;;;;;;;;;;;;;;;;;;;;;;;;GAwBG~~;AAEH,UAAU,GAAG;IAAG,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE;AAa7C,wBAAgB,YAAY,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,MAAM,~~CAmJhD~~;AAED~~;;;;;;;;;;;;;;GAcG~~;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,MAAM,CAYtD"}
1	+ {"version":3,"file":"cdpHint.d.ts","sourceRoot":"","sources":["../../src/service/cdpHint.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH,UAAU,GAAG;IAAG,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE;AAa7C,wBAAgB,YAAY,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,MAAM,CAgGhD;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,MAAM,CAYtD"}