pi-agent-browser-native 0.2.34 → 0.2.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +14 -14
  3. package/docs/ARCHITECTURE.md +19 -13
  4. package/docs/COMMAND_REFERENCE.md +257 -42
  5. package/docs/ELECTRON.md +3 -3
  6. package/docs/RELEASE.md +11 -11
  7. package/docs/REQUIREMENTS.md +5 -5
  8. package/docs/SUPPORT_MATRIX.md +23 -21
  9. package/docs/TOOL_CONTRACT.md +38 -27
  10. package/extensions/agent-browser/index.ts +518 -2402
  11. package/extensions/agent-browser/lib/argv-descriptor.ts +90 -0
  12. package/extensions/agent-browser/lib/argv-grammar.ts +128 -0
  13. package/extensions/agent-browser/lib/command-policy.ts +71 -0
  14. package/extensions/agent-browser/lib/command-taxonomy.ts +336 -0
  15. package/extensions/agent-browser/lib/electron/cleanup.ts +1 -0
  16. package/extensions/agent-browser/lib/executable-path.ts +19 -0
  17. package/extensions/agent-browser/lib/input-modes/params.ts +6 -6
  18. package/extensions/agent-browser/lib/orchestration/batch-stdin.ts +65 -0
  19. package/extensions/agent-browser/lib/orchestration/browser-run/browser-action-model.ts +154 -0
  20. package/extensions/agent-browser/lib/orchestration/browser-run/click-dispatch.ts +149 -0
  21. package/extensions/agent-browser/lib/orchestration/browser-run/diagnostics.ts +10 -28
  22. package/extensions/agent-browser/lib/orchestration/browser-run/final-result.ts +6 -2
  23. package/extensions/agent-browser/lib/orchestration/browser-run/index.ts +33 -27
  24. package/extensions/agent-browser/lib/orchestration/browser-run/prepare.ts +48 -22
  25. package/extensions/agent-browser/lib/orchestration/browser-run/process-output.ts +33 -10
  26. package/extensions/agent-browser/lib/orchestration/browser-run/prompt-guards.ts +93 -0
  27. package/extensions/agent-browser/lib/orchestration/browser-run/session-state.ts +19 -123
  28. package/extensions/agent-browser/lib/orchestration/browser-run/types.ts +26 -1
  29. package/extensions/agent-browser/lib/orchestration/electron-host/index.ts +860 -0
  30. package/extensions/agent-browser/lib/playbook.ts +9 -9
  31. package/extensions/agent-browser/lib/prompt-policy.ts +122 -0
  32. package/extensions/agent-browser/lib/results/action-recommendations.ts +3 -23
  33. package/extensions/agent-browser/lib/results/presentation/navigation.ts +2 -34
  34. package/extensions/agent-browser/lib/runtime.ts +93 -227
  35. package/extensions/agent-browser/lib/session-page-state.ts +31 -14
  36. package/extensions/agent-browser/lib/temp.ts +148 -23
  37. package/package.json +4 -4
  38. package/scripts/agent-browser-capability-baseline.mjs +198 -1
@@ -45,7 +45,7 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
45
45
  role: Type.Optional(Type.String({ description: "Role locator value for locator=role. May be used instead of value; when both are set they must match." })),
46
46
  name: Type.Optional(Type.String({ description: "Accessible name filter for locator=role; compiles to --name <name>." })),
47
47
  session: Type.Optional(Type.String({ description: "Optional upstream session name; prepends --session <name> before the compiled command." })),
48
- }),
48
+ }, { additionalProperties: false }),
49
49
  ),
50
50
  qa: Type.Optional(
51
51
  Type.Union([
@@ -79,7 +79,7 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
79
79
  componentName: Type.Optional(Type.String({ description: "Component name to correlate with react tree output and bounded local workspace search." })),
80
80
  includeDomHints: Type.Optional(Type.Boolean({ description: "Whether selector lookups should inspect DOM HTML attributes for source-like metadata. Defaults to true." })),
81
81
  maxWorkspaceFiles: Type.Optional(Type.Number({ description: "Maximum local source files to scan when componentName is provided. Defaults to 2000 and cannot exceed 5000.", minimum: 1, maximum: SOURCE_LOOKUP_MAX_WORKSPACE_FILES })),
82
- }, { description: "EXPERIMENTAL: local UI-to-source candidates only (confidence/evidence, not guaranteed mappings). Compiles to batch; mutually exclusive with other input modes." }),
82
+ }, { additionalProperties: false, description: "EXPERIMENTAL: local UI-to-source candidates only (confidence/evidence, not guaranteed mappings). Compiles to batch; mutually exclusive with other input modes." }),
83
83
  ),
84
84
  networkSourceLookup: Type.Optional(
85
85
  Type.Object({
@@ -88,7 +88,7 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
88
88
  session: Type.Optional(Type.String({ description: "Optional upstream session name; prepends --session <name> before the generated batch." })),
89
89
  url: Type.Optional(Type.String({ description: "Optional failed request URL or URL fragment to correlate with local source." })),
90
90
  maxWorkspaceFiles: Type.Optional(Type.Number({ description: "Maximum local source files to scan for URL literals. Defaults to 2000 and cannot exceed 5000.", minimum: 1, maximum: SOURCE_LOOKUP_MAX_WORKSPACE_FILES })),
91
- }, { description: "EXPERIMENTAL: failed-request-to-source candidates only (initiator metadata and bounded workspace URL literals; not definitive blame). Compiles to batch; mutually exclusive with other input modes." }),
91
+ }, { additionalProperties: false, description: "EXPERIMENTAL: failed-request-to-source candidates only (initiator metadata and bounded workspace URL literals; not definitive blame). Compiles to batch; mutually exclusive with other input modes." }),
92
92
  ),
93
93
  electron: Type.Optional(
94
94
  Type.Union([
@@ -172,10 +172,10 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
172
172
  values: Type.Optional(Type.Array(Type.String({ description: "Option value for select steps." }), { description: "One or more option values for select steps.", minItems: 1 })),
173
173
  path: Type.Optional(Type.String({ description: "Artifact/download path for waitForDownload or screenshot steps." })),
174
174
  milliseconds: Type.Optional(Type.Number({ description: "Milliseconds for wait steps." })),
175
- }),
175
+ }, { additionalProperties: false }),
176
176
  { minItems: 1 },
177
177
  ),
178
- }),
178
+ }, { additionalProperties: false }),
179
179
  ),
180
180
  stdin: Type.Optional(Type.String({ description: "Optional raw stdin content; only supported for batch, eval --stdin, auth save --password-stdin, and is generated internally by job, qa, sourceLookup, or networkSourceLookup mode. Do not use with electron mode." })),
181
181
  sessionMode: Type.Optional(
@@ -185,4 +185,4 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
185
185
  default: DEFAULT_SESSION_MODE,
186
186
  }),
187
187
  ),
188
- });
188
+ }, { additionalProperties: false });
@@ -0,0 +1,65 @@
1
+ export type BatchCommandStep = [string, ...string[]];
2
+
3
+ function validateUserBatchStep(step: unknown, index: number): { error: string; ok: false } | { ok: true; step: BatchCommandStep } {
4
+ if (!Array.isArray(step)) {
5
+ return {
6
+ error: `agent_browser batch stdin step ${index} must be a non-empty array of string command tokens.`,
7
+ ok: false,
8
+ };
9
+ }
10
+ if (step.length === 0) {
11
+ return {
12
+ error: `agent_browser batch stdin step ${index} must not be empty.`,
13
+ ok: false,
14
+ };
15
+ }
16
+ const invalidTokenIndex = step.findIndex((token) => typeof token !== "string");
17
+ if (invalidTokenIndex !== -1) {
18
+ return {
19
+ error: `agent_browser batch stdin step ${index} token ${invalidTokenIndex} must be a string.`,
20
+ ok: false,
21
+ };
22
+ }
23
+ return { ok: true, step: step as BatchCommandStep };
24
+ }
25
+
26
+ export function parseBatchStdinJsonArray(stdin: string | undefined): { error?: string; steps?: unknown[] } {
27
+ if (stdin === undefined) {
28
+ return { steps: [] };
29
+ }
30
+ try {
31
+ const parsed = JSON.parse(stdin) as unknown;
32
+ if (!Array.isArray(parsed)) {
33
+ return { error: "agent_browser batch stdin must be a JSON array of command steps." };
34
+ }
35
+ return { steps: parsed };
36
+ } catch (error) {
37
+ const message = error instanceof Error ? error.message : String(error);
38
+ return { error: `agent_browser batch stdin could not be parsed as JSON: ${message}` };
39
+ }
40
+ }
41
+
42
+ export function parseUserBatchStdin(stdin: string | undefined): { error?: string; steps?: BatchCommandStep[] } {
43
+ const parsed = parseBatchStdinJsonArray(stdin);
44
+ if (parsed.error || parsed.steps === undefined) {
45
+ return parsed.error ? { error: parsed.error } : { steps: [] };
46
+ }
47
+ const steps: BatchCommandStep[] = [];
48
+ for (const [index, rawStep] of parsed.steps.entries()) {
49
+ const validated = validateUserBatchStep(rawStep, index);
50
+ if (!validated.ok) {
51
+ return { error: validated.error };
52
+ }
53
+ steps.push(validated.step);
54
+ }
55
+ return { steps };
56
+ }
57
+
58
+ export function parseValidBatchStepEntries(stdin: string | undefined): Array<{ index: number; step: BatchCommandStep }> {
59
+ const parsed = parseBatchStdinJsonArray(stdin);
60
+ if (parsed.error || parsed.steps === undefined) return [];
61
+ return parsed.steps.flatMap((step, index) => {
62
+ const validated = validateUserBatchStep(step, index);
63
+ return validated.ok ? [{ index, step: validated.step }] : [];
64
+ });
65
+ }
@@ -0,0 +1,154 @@
1
+ /**
2
+ * Purpose: Normalize planned browser argv into a small action model for prompt-derived guards.
3
+ * Responsibilities: Map command tokens and batch stdin steps to click-like and keyboard-submit actions with target labels.
4
+ * Scope: Best-effort finalizing-action detection only; does not model eval, generic fill/type, or non-Enter keyboard flows.
5
+ */
6
+
7
+ import type { SessionRefSnapshot } from "../../session-page-state.js";
8
+ import { parseValidBatchStepEntries } from "../batch-stdin.js";
9
+
10
+ const FINAL_ACTION_PATTERN = /\b(?:finish|place\s+(?:the\s+)?order|submit\s+(?:the\s+)?order|complete\s+(?:the\s+)?order|confirm\s+(?:the\s+)?order|purchase|buy\s+now|pay\s+now|finali[sz]e|submit\s+payment|checkout\s+complete)\b/i;
11
+
12
+ const CLICK_LIKE_COMMANDS = new Set(["click", "dblclick", "tap"]);
13
+ const FIND_CLICK_ACTIONS = new Set(["click", "dblclick", "tap"]);
14
+ const KEYBOARD_SUBMIT_KEYS = new Set(["enter", "return"]);
15
+
16
+ export type BrowserFinalizingActionKind = "click-like" | "keyboard-submit";
17
+
18
+ export interface BrowserFinalizingAction {
19
+ command: string[];
20
+ kind: BrowserFinalizingActionKind;
21
+ stepIndex?: number;
22
+ targetLabel?: string;
23
+ }
24
+
25
+ export const STOP_BOUNDARY_GUARD_SCOPE = {
26
+ covered: [
27
+ "standalone click, dblclick, and tap",
28
+ "find … click|dblclick|tap",
29
+ "batch steps with the click-like shapes above",
30
+ "press <key> and key <key> when key is Enter or Return",
31
+ ],
32
+ excluded: [
33
+ "eval --stdin and other scripted activation",
34
+ "fill, type, select, drag, and upload without an explicit click-like command",
35
+ "keyboard type/inserttext and keyboard shortcuts other than Enter/Return",
36
+ "semanticAction and job/qa compiled plans unless their batch stdin contains a covered step",
37
+ ],
38
+ } as const;
39
+
40
+ function normalizeTargetText(value: string): string {
41
+ return value
42
+ .replace(/[_-]+/g, " ")
43
+ .replace(/[\[\]{}()#.'\"=:/]+/g, " ")
44
+ .replace(/\s+/g, " ")
45
+ .trim();
46
+ }
47
+
48
+ export function matchesFinalActionLabel(value: string | undefined): boolean {
49
+ return value !== undefined && FINAL_ACTION_PATTERN.test(normalizeTargetText(value));
50
+ }
51
+
52
+ function parseRefId(value: string | undefined): string | undefined {
53
+ if (!value) return undefined;
54
+ const trimmed = value.trim();
55
+ const candidate = trimmed.startsWith("@") ? trimmed.slice(1) : trimmed.startsWith("ref=") ? trimmed.slice(4) : trimmed;
56
+ return /^e\d+$/.test(candidate) ? candidate : undefined;
57
+ }
58
+
59
+ function getRefTargetLabel(refSnapshot: SessionRefSnapshot | undefined, refId: string | undefined): string | undefined {
60
+ if (!refId) return undefined;
61
+ const ref = refSnapshot?.refs?.[refId];
62
+ return ref ? [ref.role, ref.name].filter(Boolean).join(" ") : undefined;
63
+ }
64
+
65
+ function getFlagValue(tokens: string[], flag: string): string | undefined {
66
+ for (const [index, token] of tokens.entries()) {
67
+ if (token === flag) return tokens[index + 1];
68
+ if (token.startsWith(`${flag}=`)) return token.slice(flag.length + 1);
69
+ }
70
+ return undefined;
71
+ }
72
+
73
+ function getClickLikeTargetLabel(command: string[], refSnapshot: SessionRefSnapshot | undefined): string | undefined {
74
+ const target = command[1];
75
+ return getRefTargetLabel(refSnapshot, parseRefId(target)) ?? target;
76
+ }
77
+
78
+ function getFindClickTargetLabel(command: string[]): string | undefined {
79
+ if (command[0] !== "find") return undefined;
80
+ const actionIndex = command.findIndex((token, index) => index >= 3 && FIND_CLICK_ACTIONS.has(token));
81
+ if (actionIndex === -1) return undefined;
82
+ return getFlagValue(command, "--name") ?? command[2];
83
+ }
84
+
85
+ function getKeyboardSubmitKey(command: string[]): string | undefined {
86
+ const commandName = command[0];
87
+ if (commandName === "press" || commandName === "key") return command[1];
88
+ return undefined;
89
+ }
90
+
91
+ function collectActionsFromCommand(command: string[], refSnapshot: SessionRefSnapshot | undefined, stepIndex?: number): BrowserFinalizingAction[] {
92
+ const actions: BrowserFinalizingAction[] = [];
93
+ if (CLICK_LIKE_COMMANDS.has(command[0] ?? "")) {
94
+ actions.push({
95
+ command,
96
+ kind: "click-like",
97
+ stepIndex,
98
+ targetLabel: getClickLikeTargetLabel(command, refSnapshot),
99
+ });
100
+ return actions;
101
+ }
102
+ if (command[0] === "find") {
103
+ const actionIndex = command.findIndex((token, index) => index >= 3 && FIND_CLICK_ACTIONS.has(token));
104
+ if (actionIndex !== -1) {
105
+ actions.push({
106
+ command,
107
+ kind: "click-like",
108
+ stepIndex,
109
+ targetLabel: getFindClickTargetLabel(command),
110
+ });
111
+ }
112
+ return actions;
113
+ }
114
+ const submitKey = getKeyboardSubmitKey(command)?.trim().toLowerCase();
115
+ if (submitKey && KEYBOARD_SUBMIT_KEYS.has(submitKey)) {
116
+ actions.push({
117
+ command,
118
+ kind: "keyboard-submit",
119
+ stepIndex,
120
+ targetLabel: submitKey,
121
+ });
122
+ }
123
+ return actions;
124
+ }
125
+
126
+ export function collectBrowserFinalizingActions(options: {
127
+ commandTokens: string[];
128
+ refSnapshot?: SessionRefSnapshot;
129
+ stdin?: string;
130
+ }): BrowserFinalizingAction[] {
131
+ const actions = collectActionsFromCommand(options.commandTokens, options.refSnapshot);
132
+ if (options.commandTokens[0] !== "batch") return actions;
133
+ for (const { index, step } of parseValidBatchStepEntries(options.stdin)) {
134
+ actions.push(...collectActionsFromCommand(step, options.refSnapshot, index));
135
+ }
136
+ return actions;
137
+ }
138
+
139
+ export function shouldBlockFinalizingAction(action: BrowserFinalizingAction): boolean {
140
+ if (action.kind === "keyboard-submit") return true;
141
+ return matchesFinalActionLabel(action.targetLabel);
142
+ }
143
+
144
+ export function findBlockedFinalizingAction(options: {
145
+ commandTokens: string[];
146
+ refSnapshot?: SessionRefSnapshot;
147
+ stdin?: string;
148
+ }): BrowserFinalizingAction | undefined {
149
+ for (const action of collectBrowserFinalizingActions(options)) {
150
+ if (!shouldBlockFinalizingAction(action)) continue;
151
+ return action;
152
+ }
153
+ return undefined;
154
+ }
@@ -0,0 +1,149 @@
1
+ import { isRecord } from "../../parsing.js";
2
+ import { redactSensitiveText } from "../../runtime.js";
3
+ import { withOptionalSessionArgs, type AgentBrowserNextAction } from "../../results/next-actions.js";
4
+ import { runSessionCommandData } from "./session-state.js";
5
+ import type { ClickDispatchDiagnostic, ClickDispatchProbe, ClickDispatchProbeTarget } from "./types.js";
6
+
7
+ const CLICK_DISPATCH_MARKER_PREFIX = "__piAgentBrowserClickDispatchProbe_";
8
+ const CLICK_DISPATCH_CLEANUP_TIMEOUT_MS = 2_000;
9
+
10
+ function parseClickRefId(selector: string): string | undefined {
11
+ const trimmed = selector.trim();
12
+ const candidate = trimmed.startsWith("@") ? trimmed.slice(1) : trimmed.startsWith("ref=") ? trimmed.slice(4) : trimmed;
13
+ return /^e\d+$/.test(candidate) ? candidate : undefined;
14
+ }
15
+
16
+ function getClickDispatchSelectorTarget(commandTokens: string[]): ClickDispatchProbeTarget | undefined {
17
+ if (commandTokens[0] !== "click" || commandTokens.includes("--new-tab")) return undefined;
18
+ const selector = commandTokens[1];
19
+ if (!selector || selector.startsWith("-")) return undefined;
20
+ if (parseClickRefId(selector)) return undefined;
21
+ if (selector.startsWith("xpath=")) return { kind: "xpath", selector: selector.slice("xpath=".length) };
22
+ return { kind: "selector", selector };
23
+ }
24
+
25
+ function getEvalResultRecord(data: unknown): Record<string, unknown> | undefined {
26
+ return isRecord(data) && isRecord(data.result) ? data.result : undefined;
27
+ }
28
+
29
+ function buildClickDispatchProbeInstallScript(probe: ClickDispatchProbe): string {
30
+ const target = probe.target;
31
+ const resolveTarget = target.kind === "selector"
32
+ ? `(() => { try { return document.querySelector(${JSON.stringify(target.selector)}); } catch { return null; } })()`
33
+ : `(() => { try { return document.evaluate(${JSON.stringify(target.selector)}, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; } catch { return null; } })()`;
34
+ return `(() => {
35
+ const marker = ${JSON.stringify(probe.marker)};
36
+ const element = ${resolveTarget};
37
+ if (!element) return { status: "target-not-found", marker };
38
+ const state = { events: [], target: { tagName: element.tagName.toLowerCase() } };
39
+ const eventTypes = ["pointerdown", "mousedown", "pointerup", "mouseup", "click"];
40
+ const listeners = eventTypes.map((type) => {
41
+ const listener = (event) => {
42
+ const path = typeof event.composedPath === "function" ? event.composedPath() : [];
43
+ const eventTarget = event.target;
44
+ const targetMatched = path.includes(element) || eventTarget === element || (eventTarget instanceof Node && element.contains(eventTarget));
45
+ state.events.push({ type: event.type, isTrusted: event.isTrusted === true, targetMatched });
46
+ };
47
+ document.addEventListener(type, listener, true);
48
+ return [type, listener];
49
+ });
50
+ state.cleanup = () => listeners.forEach(([type, listener]) => document.removeEventListener(type, listener, true));
51
+ window[marker] = state;
52
+ return { status: "installed", marker, target: state.target };
53
+ })()`;
54
+ }
55
+
56
+ function buildClickDispatchProbeCheckScript(probe: ClickDispatchProbe): string {
57
+ return `(() => {
58
+ const marker = ${JSON.stringify(probe.marker)};
59
+ const state = window[marker];
60
+ const finish = (payload) => {
61
+ if (state && typeof state.cleanup === "function") state.cleanup();
62
+ try { delete window[marker]; } catch {}
63
+ return payload;
64
+ };
65
+ if (!state || !Array.isArray(state.events)) return finish({ status: "probe-missing", nativeEventCount: 0 });
66
+ const nativeEventCount = state.events.filter((event) => event && event.isTrusted === true && event.targetMatched === true).length;
67
+ if (nativeEventCount > 0) return finish({ status: "native-event-observed", nativeEventCount, target: state.target });
68
+ return finish({ status: "no-native-event-observed", nativeEventCount, target: state.target });
69
+ })()`;
70
+ }
71
+
72
+ function buildClickDispatchProbeCleanupScript(probe: ClickDispatchProbe): string {
73
+ return `(() => {
74
+ const marker = ${JSON.stringify(probe.marker)};
75
+ const state = window[marker];
76
+ if (state && typeof state.cleanup === "function") state.cleanup();
77
+ try { delete window[marker]; } catch {}
78
+ return { status: "cleaned-up" };
79
+ })()`;
80
+ }
81
+
82
+ function redactClickDispatchTarget(target: ClickDispatchProbeTarget): ClickDispatchProbeTarget {
83
+ return target.kind === "selector" || target.kind === "xpath"
84
+ ? { ...target, selector: redactSensitiveText(target.selector) }
85
+ : target;
86
+ }
87
+
88
+ export function formatClickDispatchDiagnosticText(diagnostic: ClickDispatchDiagnostic): string {
89
+ return `Click dispatch diagnostic: ${diagnostic.summary}`;
90
+ }
91
+
92
+ export function buildClickDispatchNextActions(options: { commandTokens: string[]; sessionName?: string }): AgentBrowserNextAction[] {
93
+ const retryArgs = options.commandTokens[0] === "click" ? options.commandTokens : ["click", ...options.commandTokens];
94
+ return [
95
+ {
96
+ id: "inspect-click-dispatch-miss",
97
+ params: { args: withOptionalSessionArgs(options.sessionName, ["snapshot", "-i"]) },
98
+ reason: "Refresh interactive refs and verify the intended click target before retrying upstream click.",
99
+ safety: "Read-only snapshot; the wrapper does not replay clicks in-page when upstream reports success without DOM events.",
100
+ tool: "agent_browser",
101
+ },
102
+ {
103
+ id: "retry-click-after-dispatch-miss",
104
+ params: { args: withOptionalSessionArgs(options.sessionName, retryArgs) },
105
+ reason: "Retry the same upstream click after confirming the target is visible; do not assume the prior success mutated the page.",
106
+ safety: "Only retry when the target is still intended; use page-change evidence or a fresh snapshot before continuing the workflow.",
107
+ tool: "agent_browser",
108
+ },
109
+ ];
110
+ }
111
+
112
+ export async function prepareClickDispatchProbe(options: { commandTokens: string[]; cwd: string; sessionName?: string; signal?: AbortSignal }): Promise<ClickDispatchProbe | undefined> {
113
+ if (!options.sessionName || options.commandTokens[0] !== "click" || options.commandTokens.includes("--new-tab")) return undefined;
114
+ const target = getClickDispatchSelectorTarget(options.commandTokens);
115
+ if (!target) return undefined;
116
+ const probe: ClickDispatchProbe = { marker: `${CLICK_DISPATCH_MARKER_PREFIX}${Date.now().toString(36)}_${Math.random().toString(36).slice(2)}`, target };
117
+ const installData = await runSessionCommandData({ args: ["eval", "--stdin"], cwd: options.cwd, sessionName: options.sessionName, signal: options.signal, stdin: buildClickDispatchProbeInstallScript(probe) });
118
+ const installResult = getEvalResultRecord(installData);
119
+ return installResult?.status === "installed" ? probe : undefined;
120
+ }
121
+
122
+ export async function collectClickDispatchDiagnostic(options: { cwd: string; probe?: ClickDispatchProbe; sessionName?: string; signal?: AbortSignal }): Promise<ClickDispatchDiagnostic | undefined> {
123
+ if (!options.probe || !options.sessionName) return undefined;
124
+ const data = await runSessionCommandData({ args: ["eval", "--stdin"], cwd: options.cwd, sessionName: options.sessionName, signal: options.signal, stdin: buildClickDispatchProbeCheckScript(options.probe) });
125
+ const result = getEvalResultRecord(data);
126
+ if (!result) return undefined;
127
+ const status = typeof result.status === "string" ? result.status : undefined;
128
+ if (status !== "no-native-event-observed") return undefined;
129
+ const nativeEventCount = typeof result.nativeEventCount === "number" ? result.nativeEventCount : 0;
130
+ const summary = "Upstream click reported success but no trusted DOM event reached the selected element. Gather evidence with snapshot or page-change checks, then retry upstream click or report the workflow issue; the wrapper does not replay clicks in-page.";
131
+ return {
132
+ nativeEventCount,
133
+ reason: "native-click-produced-no-target-dom-event",
134
+ status,
135
+ summary,
136
+ target: redactClickDispatchTarget(options.probe.target),
137
+ };
138
+ }
139
+
140
+ export async function cleanupClickDispatchProbe(options: { cwd: string; probe?: ClickDispatchProbe; sessionName?: string }): Promise<void> {
141
+ if (!options.probe || !options.sessionName) return;
142
+ await runSessionCommandData({
143
+ args: ["eval", "--stdin"],
144
+ cwd: options.cwd,
145
+ sessionName: options.sessionName,
146
+ stdin: buildClickDispatchProbeCleanupScript(options.probe),
147
+ timeoutMs: CLICK_DISPATCH_CLEANUP_TIMEOUT_MS,
148
+ }).catch(() => undefined);
149
+ }
@@ -1,8 +1,9 @@
1
- import { constants as fsConstants } from "node:fs";
2
- import { access, stat } from "node:fs/promises";
3
- import { delimiter, isAbsolute, join, resolve } from "node:path";
1
+ import { stat } from "node:fs/promises";
2
+ import { isAbsolute, resolve } from "node:path";
4
3
 
4
+ import { isCloseCommand, isOpenNavigationCommand } from "../../command-taxonomy.js";
5
5
  import type { ElectronLaunchRecord } from "../../electron/launch.js";
6
+ import { executableExistsOnPath } from "../../executable-path.js";
6
7
  import type { AgentBrowserSourceLookupAnalysis, CompiledAgentBrowserJob, CompiledAgentBrowserSemanticAction } from "../../input-modes.js";
7
8
  import { isHttpOrHttpsUrl } from "../../input-modes/job.js";
8
9
  import type { AgentBrowserNextAction } from "../../results.js";
@@ -20,12 +21,14 @@ import {
20
21
  getGuardedRefUsage,
21
22
  runSessionCommandData,
22
23
  } from "./session-state.js";
24
+ import { parseValidBatchStepEntries } from "../batch-stdin.js";
23
25
  import { getScreenshotPathTokenIndex } from "./prepare.js";
24
26
  import type {
25
27
  ArtifactCleanupGuidance,
26
28
  ComboboxFocusDiagnostic,
27
29
  ElectronBroadGetTextScopeDiagnostic,
28
30
  ElectronHandoffSummary,
31
+ ElectronManagedSessionTarget,
29
32
  FillVerificationDiagnostic,
30
33
  NavigationSummary,
31
34
  OverlayBlockerCandidate,
@@ -238,23 +241,6 @@ function getRecordStartLikeCommand(command: string | undefined, commandTokens: s
238
241
  return undefined;
239
242
  }
240
243
 
241
- async function executableExistsOnPath(command: string): Promise<boolean> {
242
- const pathValue = process.env.PATH ?? "";
243
- const extensions = process.platform === "win32" ? (process.env.PATHEXT ?? ".EXE;.CMD;.BAT;.COM").split(";").filter(Boolean) : [""];
244
- for (const directory of pathValue.split(delimiter).filter(Boolean)) {
245
- for (const extension of extensions) {
246
- try {
247
- const candidate = join(directory, `${command}${extension}`);
248
- await access(candidate, fsConstants.X_OK);
249
- if ((await stat(candidate)).isFile()) return true;
250
- } catch {
251
- // Try the next candidate.
252
- }
253
- }
254
- }
255
- return false;
256
- }
257
-
258
244
  export async function collectRecordingDependencyWarning(options: { command: string | undefined; commandTokens: string[]; succeeded: boolean }): Promise<RecordingDependencyWarning | undefined> {
259
245
  if (!options.succeeded) return undefined;
260
246
  const recordCommand = getRecordStartLikeCommand(options.command, options.commandTokens);
@@ -466,7 +452,7 @@ export function formatEvalStdinHintText(hint: ReturnType<typeof getEvalStdinHint
466
452
  }
467
453
 
468
454
  export async function getArtifactCleanupGuidance(options: { command?: string; cwd: string; manifest?: SessionArtifactManifest; succeeded: boolean }): Promise<ArtifactCleanupGuidance | undefined> {
469
- if (!options.succeeded || options.command !== "close" || !options.manifest || options.manifest.entries.length === 0) return undefined;
455
+ if (!options.succeeded || !isCloseCommand(options.command) || !options.manifest || options.manifest.entries.length === 0) return undefined;
470
456
  const explicitEntries = options.manifest.entries.filter((entry) => entry.storageScope === "explicit-path");
471
457
  const explicitArtifactPaths: string[] = [];
472
458
  const seenPaths = new Set<string>();
@@ -505,7 +491,7 @@ async function collectElectronManagedSessionUrl(options: { cwd: string; sessionN
505
491
  return urlResult.error ? { error: urlResult.error } : { url };
506
492
  }
507
493
 
508
- async function collectElectronManagedSessionTarget(options: { cwd: string; sessionName?: string; signal?: AbortSignal; timeoutMs?: number }): Promise<QaAttachedTarget | undefined> {
494
+ export async function collectElectronManagedSessionTarget(options: { cwd: string; sessionName?: string; signal?: AbortSignal; timeoutMs?: number }): Promise<ElectronManagedSessionTarget | undefined> {
509
495
  if (!options.sessionName) return undefined;
510
496
  const [titleResult, urlResult] = await Promise.all([
511
497
  collectManagedSessionCommandData({ args: ["get", "title"], cwd: options.cwd, sessionName: options.sessionName, signal: options.signal, timeoutMs: options.timeoutMs }),
@@ -647,11 +633,7 @@ export async function collectElectronHandoff(options: { cwd: string; handoff: "c
647
633
  function getTimeoutProgressSteps(compiledJob: CompiledAgentBrowserJob | undefined, command: string | undefined, stdin: string | undefined): Array<{ args: string[]; index: number }> {
648
634
  if (compiledJob) return compiledJob.steps.map((step, index) => ({ args: step.args, index: index + 1 }));
649
635
  if (command !== "batch" || !stdin) return [];
650
- try {
651
- const parsed = JSON.parse(stdin) as unknown;
652
- if (!Array.isArray(parsed)) return [];
653
- return parsed.flatMap((step, index) => Array.isArray(step) && step.every((token) => typeof token === "string") ? [{ args: step as string[], index: index + 1 }] : []);
654
- } catch { return []; }
636
+ return parseValidBatchStepEntries(stdin).map(({ index, step }) => ({ args: step, index: index + 1 }));
655
637
  }
656
638
 
657
639
  function getLastPositionalToken(args: string[], startIndex = 1): string | undefined {
@@ -709,7 +691,7 @@ async function collectTimeoutArtifactEvidence(cwd: string, steps: Array<{ args:
709
691
  function getPlannedCurrentPageUrl(steps: Array<{ args: string[]; index: number }>): string | undefined {
710
692
  for (let index = steps.length - 1; index >= 0; index -= 1) {
711
693
  const args = steps[index]?.args ?? [];
712
- if (args[0] === "open" || args[0] === "navigate" || args[0] === "pushstate") return getLastPositionalToken(args);
694
+ if (isOpenNavigationCommand(args[0]) || args[0] === "pushstate") return getLastPositionalToken(args);
713
695
  }
714
696
  return undefined;
715
697
  }
@@ -43,6 +43,7 @@ import {
43
43
  } from "../../session-page-state.js";
44
44
  import { extractExplicitSessionName, redactInvocationArgs, redactSensitiveText, redactSensitiveValue, type OpenResultTabCorrection } from "../../runtime.js";
45
45
  import { isRecord } from "../../parsing.js";
46
+ import { buildClickDispatchNextActions, formatClickDispatchDiagnosticText } from "./click-dispatch.js";
46
47
  import {
47
48
  buildComboboxFocusNextActions,
48
49
  buildElectronBroadGetTextScopeNextActions,
@@ -212,7 +213,7 @@ export function buildElectronHostFailureResult(options: {
212
213
  return { content: [{ type: "text", text: redactSensitiveText(text) }], details: redactToolDetails(details, []), isError: true };
213
214
  }
214
215
 
215
- function formatElectronTargetLines(targets: ElectronCdpTarget[], limit = 8): string[] {
216
+ export function formatElectronTargetLines(targets: ElectronCdpTarget[], limit = 8): string[] {
216
217
  const shownTargets = targets.slice(0, limit);
217
218
  const lines = shownTargets.map((target) => {
218
219
  const label = [target.type, target.title].filter(Boolean).join(" ") || target.id || "target";
@@ -319,6 +320,7 @@ function buildResultNextActions(options: FinalResultInput): AgentBrowserNextActi
319
320
  if (options.selectorTextVisibilityDiagnostics.length > 0) nextActionCollector.append(buildSelectorTextVisibilityNextActions({ diagnostics: options.selectorTextVisibilityDiagnostics, sessionName: options.executionPlan.sessionName }));
320
321
  if (options.electronBroadGetTextScopeDiagnostics.length > 0) nextActionCollector.append(buildElectronBroadGetTextScopeNextActions({ diagnostics: options.electronBroadGetTextScopeDiagnostics, sessionName: options.executionPlan.sessionName }));
321
322
  if (options.sourceLookup?.electronContext) nextActionCollector.appendUnique(buildSourceLookupElectronNextActions(options.sourceLookup));
323
+ if (options.clickDispatchDiagnostic) nextActionCollector.append(buildClickDispatchNextActions({ commandTokens: options.commandTokens, sessionName: options.executionPlan.sessionName }));
322
324
  if (options.scrollNoopDiagnostic) nextActionCollector.append(buildScrollNoopNextActions(options.executionPlan.sessionName));
323
325
  if (options.comboboxFocusDiagnostic) nextActionCollector.append(buildComboboxFocusNextActions(options.executionPlan.sessionName));
324
326
  if (options.categoryDetails.failureCategory === "stale-ref" && options.redactedCompiledSemanticAction && isCompiledSemanticActionFindCommand(options.compiledSemanticAction)) nextActionCollector.append([{ id: "retry-semantic-action-after-stale-ref", params: { args: options.redactedCompiledSemanticAction.args }, reason: "Retry the same semantic target via its compiled find command after the upstream stale-ref failure proves the prior action did not execute.", safety: "Use only for the same intended target; direct stale @refs still require a fresh snapshot or stable locator before retrying.", tool: "agent_browser" as const }]);
@@ -369,6 +371,7 @@ function buildAgentBrowserResultDetails(options: FinalResultInput, nextActions:
369
371
  imagePaths: options.presentation.imagePaths,
370
372
  nextActions,
371
373
  pageChangeSummary,
374
+ clickDispatch: options.clickDispatchDiagnostic,
372
375
  overlayBlockers: options.overlayBlockerDiagnostic,
373
376
  fillVerification: options.fillVerificationDiagnostic,
374
377
  visibleRefFallback: publicVisibleRefFallbackDiagnostic,
@@ -411,6 +414,7 @@ export function buildFinalAgentBrowserToolResult(options: FinalResultInput): Age
411
414
  const visibleRefFallbackText = formatVisibleRefFallbackText(options.visibleRefFallbackDiagnostic);
412
415
  const richInputRecoveryText = formatRichInputRecoveryText(options.richInputRecoveryDiagnostic);
413
416
  const semanticActionCandidateText = nextActions ? formatSemanticActionCandidateText(nextActions) : undefined;
417
+ const clickDispatchText = options.clickDispatchDiagnostic ? formatClickDispatchDiagnosticText(options.clickDispatchDiagnostic) : undefined;
414
418
  const overlayBlockerText = options.overlayBlockerDiagnostic ? formatOverlayBlockerText(options.overlayBlockerDiagnostic) : undefined;
415
419
  const fillVerificationText = formatFillVerificationText(options.fillVerificationDiagnostic);
416
420
  const electronRefFreshnessText = formatElectronRefFreshnessText(options.electronRefFreshnessDiagnostic);
@@ -423,7 +427,7 @@ export function buildFinalAgentBrowserToolResult(options: FinalResultInput): Age
423
427
  const artifactCleanupText = formatArtifactCleanupGuidanceText(options.artifactCleanup);
424
428
  const timeoutPartialProgressText = options.timeoutPartialProgress ? formatTimeoutPartialProgressText(options.timeoutPartialProgress) : undefined;
425
429
  const managedSessionOutcomeText = formatManagedSessionOutcomeText(options.managedSessionOutcome);
426
- const rawAppendedDiagnosticText = [visibleRefFallbackText, richInputRecoveryText, semanticActionCandidateText, overlayBlockerText, fillVerificationText, electronRefFreshnessText, selectorTextVisibilityText, electronBroadGetTextScopeText, scrollNoopDiagnosticText, comboboxFocusDiagnosticText, recordingDependencyWarningText, evalStdinHintText, artifactCleanupText, timeoutPartialProgressText, managedSessionOutcomeText].filter((item): item is string => item !== undefined).join("\n\n");
430
+ const rawAppendedDiagnosticText = [visibleRefFallbackText, richInputRecoveryText, semanticActionCandidateText, clickDispatchText, overlayBlockerText, fillVerificationText, electronRefFreshnessText, selectorTextVisibilityText, electronBroadGetTextScopeText, scrollNoopDiagnosticText, comboboxFocusDiagnosticText, recordingDependencyWarningText, evalStdinHintText, artifactCleanupText, timeoutPartialProgressText, managedSessionOutcomeText].filter((item): item is string => item !== undefined).join("\n\n");
427
431
  const appendedDiagnosticText = redactSensitiveText(redactExactSensitiveText(rawAppendedDiagnosticText, options.exactSensitiveValues));
428
432
  const shouldAppendDiagnosticText = appendedDiagnosticText.length > 0 && (!options.userRequestedJson || options.plainTextInspection);
429
433
  let content = shouldAppendDiagnosticText && options.redactedContent[0]?.type === "text" ? [{ ...options.redactedContent[0], text: `${options.redactedContent[0].text}\n\n${appendedDiagnosticText}` }, ...options.redactedContent.slice(1)] : options.redactedContent;
@@ -1,11 +1,13 @@
1
1
  import { runAgentBrowserProcess } from "../../process.js";
2
+ import { cleanupClickDispatchProbe } from "./click-dispatch.js";
2
3
  import { applyBrowserRunStatePatch } from "./session-state.js";
3
4
  import { buildMissingBinaryFailureResult } from "./final-result.js";
4
5
  import { prepareBrowserRun } from "./prepare.js";
5
6
  import { processBrowserOutput } from "./process-output.js";
6
7
  import type { AgentBrowserToolResult, BrowserRunOptions } from "./types.js";
7
8
 
8
- export type { BrowserRunOptions, BrowserRunState } from "./types.js";
9
+ export { closeManagedSession } from "./session-state.js";
10
+ export type { AgentBrowserToolResult, BrowserRunOptions, BrowserRunState, TraceOwner } from "./types.js";
9
11
 
10
12
  export async function runAgentBrowserTool(options: BrowserRunOptions): Promise<AgentBrowserToolResult> {
11
13
  const preparedResult = await prepareBrowserRun(options);
@@ -15,32 +17,36 @@ export async function runAgentBrowserTool(options: BrowserRunOptions): Promise<A
15
17
  }
16
18
 
17
19
  const { prepared } = preparedResult;
18
- const processResult = await runAgentBrowserProcess({
19
- args: prepared.processArgs,
20
- cwd: options.cwd,
21
- env: prepared.executionPlan.managedSessionName ? { AGENT_BROWSER_IDLE_TIMEOUT_MS: options.implicitSessionIdleTimeoutMs } : undefined,
22
- signal: options.signal,
23
- stdin: prepared.processStdin,
24
- });
20
+ try {
21
+ const processResult = await runAgentBrowserProcess({
22
+ args: prepared.processArgs,
23
+ cwd: options.cwd,
24
+ env: prepared.executionPlan.managedSessionName ? { AGENT_BROWSER_IDLE_TIMEOUT_MS: options.implicitSessionIdleTimeoutMs } : undefined,
25
+ signal: options.signal,
26
+ stdin: prepared.processStdin,
27
+ });
25
28
 
26
- const missingBinaryResult = await buildMissingBinaryFailureResult({
27
- compatibilityWorkaround: prepared.compatibilityWorkaround,
28
- electronLaunch: prepared.electronLaunch,
29
- executionPlan: prepared.executionPlan,
30
- implicitSessionCloseTimeoutMs: options.implicitSessionCloseTimeoutMs,
31
- managedSessionActive: options.state.managedSessionActive,
32
- managedSessionName: options.state.managedSessionName,
33
- processResult,
34
- redactedArgs: prepared.redactedArgs,
35
- redactedProcessArgs: prepared.redactedProcessArgs,
36
- sessionMode: prepared.sessionMode,
37
- sessionTabCorrection: prepared.sessionTabCorrection,
38
- });
39
- if (missingBinaryResult) {
40
- return missingBinaryResult;
41
- }
29
+ const missingBinaryResult = await buildMissingBinaryFailureResult({
30
+ compatibilityWorkaround: prepared.compatibilityWorkaround,
31
+ electronLaunch: prepared.electronLaunch,
32
+ executionPlan: prepared.executionPlan,
33
+ implicitSessionCloseTimeoutMs: options.implicitSessionCloseTimeoutMs,
34
+ managedSessionActive: options.state.managedSessionActive,
35
+ managedSessionName: options.state.managedSessionName,
36
+ processResult,
37
+ redactedArgs: prepared.redactedArgs,
38
+ redactedProcessArgs: prepared.redactedProcessArgs,
39
+ sessionMode: prepared.sessionMode,
40
+ sessionTabCorrection: prepared.sessionTabCorrection,
41
+ });
42
+ if (missingBinaryResult) {
43
+ return missingBinaryResult;
44
+ }
42
45
 
43
- const output = await processBrowserOutput({ ...options, prepared, processResult });
44
- applyBrowserRunStatePatch(options.state, output.statePatch);
45
- return output.result;
46
+ const output = await processBrowserOutput({ ...options, prepared, processResult });
47
+ applyBrowserRunStatePatch(options.state, output.statePatch);
48
+ return output.result;
49
+ } finally {
50
+ await cleanupClickDispatchProbe({ cwd: options.cwd, probe: prepared.clickDispatchProbe, sessionName: prepared.executionPlan.sessionName });
51
+ }
46
52
  }