pi-agent-browser-native 0.2.44 → 0.2.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/README.md +20 -15
  3. package/docs/ARCHITECTURE.md +12 -10
  4. package/docs/COMMAND_REFERENCE.md +49 -27
  5. package/docs/ELECTRON.md +1 -1
  6. package/docs/RELEASE.md +6 -5
  7. package/docs/REQUIREMENTS.md +6 -3
  8. package/docs/SUPPORT_MATRIX.md +17 -13
  9. package/docs/TOOL_CONTRACT.md +87 -46
  10. package/docs/platform-smoke.md +4 -3
  11. package/extensions/agent-browser/index.ts +29 -445
  12. package/extensions/agent-browser/lib/bash-guard.ts +205 -0
  13. package/extensions/agent-browser/lib/electron/cdp.ts +69 -0
  14. package/extensions/agent-browser/lib/electron/cleanup.ts +5 -58
  15. package/extensions/agent-browser/lib/electron/discovery.ts +2 -9
  16. package/extensions/agent-browser/lib/electron/launch.ts +11 -65
  17. package/extensions/agent-browser/lib/electron/text.ts +13 -0
  18. package/extensions/agent-browser/lib/fs-utils.ts +18 -0
  19. package/extensions/agent-browser/lib/input-modes/job.ts +207 -21
  20. package/extensions/agent-browser/lib/input-modes/params.ts +17 -7
  21. package/extensions/agent-browser/lib/input-modes/semantic-action.ts +22 -2
  22. package/extensions/agent-browser/lib/input-modes/types.ts +5 -1
  23. package/extensions/agent-browser/lib/input-modes.ts +1 -0
  24. package/extensions/agent-browser/lib/orchestration/browser-run/click-dispatch.ts +82 -11
  25. package/extensions/agent-browser/lib/orchestration/browser-run/diagnostics.ts +153 -30
  26. package/extensions/agent-browser/lib/orchestration/browser-run/final-result.ts +53 -2
  27. package/extensions/agent-browser/lib/orchestration/browser-run/index.ts +1 -0
  28. package/extensions/agent-browser/lib/orchestration/browser-run/prepare.ts +751 -32
  29. package/extensions/agent-browser/lib/orchestration/browser-run/process-output.ts +38 -7
  30. package/extensions/agent-browser/lib/orchestration/browser-run/prompt-guards.ts +0 -46
  31. package/extensions/agent-browser/lib/orchestration/browser-run/session-state.ts +10 -1
  32. package/extensions/agent-browser/lib/orchestration/browser-run/types.ts +28 -1
  33. package/extensions/agent-browser/lib/orchestration/electron-host/index.ts +1 -6
  34. package/extensions/agent-browser/lib/orchestration/input-plan.ts +15 -3
  35. package/extensions/agent-browser/lib/orchestration/output-file.ts +86 -0
  36. package/extensions/agent-browser/lib/pi-tool-rendering.ts +231 -0
  37. package/extensions/agent-browser/lib/playbook.ts +26 -26
  38. package/extensions/agent-browser/lib/process.ts +1 -1
  39. package/extensions/agent-browser/lib/prompt-policy.ts +1 -18
  40. package/extensions/agent-browser/lib/results/artifact-manifest.ts +1 -4
  41. package/extensions/agent-browser/lib/results/artifact-state.ts +7 -3
  42. package/extensions/agent-browser/lib/results/contracts.ts +6 -2
  43. package/extensions/agent-browser/lib/results/envelope.ts +11 -2
  44. package/extensions/agent-browser/lib/results/network-routes.ts +7 -4
  45. package/extensions/agent-browser/lib/results/network.ts +7 -1
  46. package/extensions/agent-browser/lib/results/presentation/artifacts.ts +88 -20
  47. package/extensions/agent-browser/lib/results/presentation/batch.ts +84 -12
  48. package/extensions/agent-browser/lib/results/presentation/diagnostics.ts +81 -26
  49. package/extensions/agent-browser/lib/results/presentation/errors.ts +13 -0
  50. package/extensions/agent-browser/lib/results/presentation/registry.ts +60 -0
  51. package/extensions/agent-browser/lib/results/presentation.ts +10 -1
  52. package/extensions/agent-browser/lib/results/snapshot-high-value-controls.ts +16 -5
  53. package/extensions/agent-browser/lib/results/snapshot.ts +2 -0
  54. package/extensions/agent-browser/lib/runtime.ts +10 -1
  55. package/extensions/agent-browser/lib/session-page-state.ts +15 -6
  56. package/extensions/agent-browser/lib/web-search.ts +1 -1
  57. package/package.json +2 -2
  58. package/platform-smoke.config.mjs +5 -2
  59. package/scripts/platform-smoke/build-ubuntu-image.mjs +25 -0
  60. package/scripts/platform-smoke/crabbox-runner.mjs +5 -1
  61. package/scripts/platform-smoke/doctor.mjs +6 -2
  62. package/scripts/platform-smoke/linux-image/Dockerfile +3 -5
  63. package/scripts/platform-smoke/targets.mjs +2 -1
  64. package/extensions/agent-browser/lib/orchestration/browser-run/browser-action-model.ts +0 -154
@@ -8,8 +8,10 @@ import type { ArtifactVerificationSummary } from "../results/contracts.js";
8
8
  import { isRecord } from "../parsing.js";
9
9
  import { summarizeNetworkFailures } from "../results/network.js";
10
10
  import { getBatchResultItems, getCommandNameFromBatchItem, getSelectValues } from "./shared.js";
11
+ import { compileAgentBrowserSemanticAction } from "./semantic-action.js";
11
12
  import {
12
13
  AGENT_BROWSER_JOB_STEP_ACTIONS,
14
+ AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS,
13
15
  AGENT_BROWSER_QA_LOAD_STATES,
14
16
  type AgentBrowserJobStepAction,
15
17
  type AgentBrowserQaLoadState,
@@ -27,10 +29,88 @@ function getRequiredJobString(step: Record<string, unknown>, field: "path" | "se
27
29
  return { value };
28
30
  }
29
31
 
32
+ function compileJobClickOrFillStep(step: Record<string, unknown>, action: "click" | "fill"): { args?: string[]; error?: string } {
33
+ const hasSelector = typeof step.selector === "string" && step.selector.trim().length > 0;
34
+ const hasLocator = step.locator !== undefined || step.role !== undefined || step.name !== undefined || step.value !== undefined;
35
+ if (hasSelector && hasLocator) {
36
+ return { error: `job step ${action} must use either selector or semantic locator fields, not both.` };
37
+ }
38
+ if (hasSelector) {
39
+ if (action === "click") return { args: ["click", step.selector as string] };
40
+ const text = getRequiredJobString(step, "text", action);
41
+ if (text.error) return { error: text.error };
42
+ return { args: ["fill", step.selector as string, text.value as string] };
43
+ }
44
+ if (!hasLocator) {
45
+ return { error: `job step ${action} requires either a non-empty selector string or semantic locator fields.` };
46
+ }
47
+ const compiled = compileAgentBrowserSemanticAction({
48
+ action,
49
+ locator: step.locator,
50
+ name: step.name,
51
+ role: step.role,
52
+ text: step.text,
53
+ value: step.value,
54
+ });
55
+ if (compiled.error) return { error: compiled.error.replaceAll("semanticAction", `job step ${action}`) };
56
+ return { args: compiled.compiled?.args };
57
+ }
58
+
59
+ function getUnsupportedJobStepField(step: Record<string, unknown>, allowedFields: ReadonlySet<string>): string | undefined {
60
+ return Object.keys(step).find((field) => !allowedFields.has(field));
61
+ }
62
+
63
+ const JOB_TYPE_ALLOWED_FIELDS = new Set(["action", "delayMs", "press", "selector", "text"]);
64
+
65
+ function compileJobTypeSteps(step: Record<string, unknown>): { error?: string; steps?: CompiledAgentBrowserJobStep[] } {
66
+ const unsupportedField = getUnsupportedJobStepField(step, JOB_TYPE_ALLOWED_FIELDS);
67
+ if (unsupportedField) return { error: `job step type does not support ${unsupportedField}; supported fields are selector, text, delayMs, and press.` };
68
+ const text = getRequiredJobString(step, "text", "type");
69
+ if (text.error) return { error: text.error };
70
+ const selector = step.selector;
71
+ if (selector !== undefined && (typeof selector !== "string" || selector.trim().length === 0)) {
72
+ return { error: "job step type selector must be a non-empty string when provided." };
73
+ }
74
+ if (step.locator !== undefined || step.role !== undefined || step.name !== undefined || step.value !== undefined || step.values !== undefined) {
75
+ return { error: "job step type supports selector, text, delayMs, and press only; focus the target first or use click/fill semantic locator fields in a separate step." };
76
+ }
77
+ const delayMs = step.delayMs;
78
+ if (delayMs !== undefined && (typeof delayMs !== "number" || !Number.isInteger(delayMs) || delayMs <= 0)) {
79
+ return { error: "job step type delayMs must be a positive integer when provided." };
80
+ }
81
+ const press = step.press;
82
+ if (press !== undefined && (typeof press !== "string" || press.trim().length === 0)) {
83
+ return { error: "job step type press must be a non-empty key string when provided." };
84
+ }
85
+ const typedText = text.value as string;
86
+ const typedChars = Array.from(typedText);
87
+ if (typedChars.length === 0) return { error: "job step type requires non-empty text." };
88
+ if (delayMs !== undefined && typedChars.length > AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS) {
89
+ return { error: `job step type delayMs supports at most ${AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS} characters; split longer text into shorter calls or omit delayMs.` };
90
+ }
91
+ const compiledSteps: CompiledAgentBrowserJobStep[] = [];
92
+ if (delayMs === undefined) {
93
+ compiledSteps.push({ action: "type", args: typeof selector === "string" ? ["type", selector, typedText] : ["keyboard", "type", typedText] });
94
+ } else {
95
+ if (typeof selector === "string") compiledSteps.push({ action: "type", args: ["focus", selector], generatedFrom: "type.selector" });
96
+ for (const [index, char] of typedChars.entries()) {
97
+ compiledSteps.push({ action: "type", args: ["keyboard", "type", char], generatedFrom: "type.delayMs" });
98
+ if (index < typedChars.length - 1) compiledSteps.push({ action: "wait", args: ["wait", String(delayMs)], generatedFrom: "type.delayMs" });
99
+ }
100
+ }
101
+ if (typeof press === "string") compiledSteps.push({ action: "type", args: ["press", press], generatedFrom: "type.press" });
102
+ return { steps: compiledSteps };
103
+ }
104
+
30
105
  export function compileAgentBrowserJob(input: unknown): { compiled?: CompiledAgentBrowserJob; error?: string } {
31
106
  if (!isRecord(input)) {
32
107
  return { error: "job must be an object." };
33
108
  }
109
+ const rawFailFast = input.failFast;
110
+ if (rawFailFast !== undefined && typeof rawFailFast !== "boolean") {
111
+ return { error: "job.failFast must be a boolean when provided." };
112
+ }
113
+ const failFast = rawFailFast !== false;
34
114
  const rawSteps = input.steps;
35
115
  if (!Array.isArray(rawSteps) || rawSteps.length === 0) {
36
116
  return { error: "job.steps must be a non-empty array." };
@@ -46,20 +126,29 @@ export function compileAgentBrowserJob(input: unknown): { compiled?: CompiledAge
46
126
  }
47
127
  const jobAction = action as AgentBrowserJobStepAction;
48
128
  let args: string[];
129
+ let generatedFrom: string | undefined;
130
+ let extraSteps: CompiledAgentBrowserJobStep[] = [];
49
131
  if (jobAction === "open") {
50
132
  const result = getRequiredJobString(rawStep, "url", jobAction);
51
133
  if (result.error) return { error: `job.steps[${index}]: ${result.error}` };
52
134
  args = ["open", result.value as string];
53
- } else if (jobAction === "click") {
54
- const result = getRequiredJobString(rawStep, "selector", jobAction);
135
+ if (rawStep.loadState !== undefined) {
136
+ if (typeof rawStep.loadState !== "string" || !AGENT_BROWSER_QA_LOAD_STATES.includes(rawStep.loadState as AgentBrowserQaLoadState)) {
137
+ return { error: `job.steps[${index}].loadState must be one of: ${AGENT_BROWSER_QA_LOAD_STATES.join(", ")}.` };
138
+ }
139
+ extraSteps = [{ action: "wait", args: ["wait", "--load", rawStep.loadState], generatedFrom: "open.loadState" }];
140
+ }
141
+ } else if (jobAction === "click" || jobAction === "fill") {
142
+ const result = compileJobClickOrFillStep(rawStep, jobAction);
55
143
  if (result.error) return { error: `job.steps[${index}]: ${result.error}` };
56
- args = ["click", result.value as string];
57
- } else if (jobAction === "fill") {
58
- const selector = getRequiredJobString(rawStep, "selector", jobAction);
59
- if (selector.error) return { error: `job.steps[${index}]: ${selector.error}` };
60
- const text = getRequiredJobString(rawStep, "text", jobAction);
61
- if (text.error) return { error: `job.steps[${index}]: ${text.error}` };
62
- args = ["fill", selector.value as string, text.value as string];
144
+ args = result.args as string[];
145
+ } else if (jobAction === "type") {
146
+ const result = compileJobTypeSteps(rawStep);
147
+ if (result.error) return { error: `job.steps[${index}]: ${result.error}` };
148
+ const [firstStep, ...restSteps] = result.steps as CompiledAgentBrowserJobStep[];
149
+ args = firstStep.args;
150
+ generatedFrom = firstStep.generatedFrom;
151
+ extraSteps = restSteps;
63
152
  } else if (jobAction === "select") {
64
153
  const selector = getRequiredJobString(rawStep, "selector", jobAction);
65
154
  if (selector.error) return { error: `job.steps[${index}]: ${selector.error}` };
@@ -84,14 +173,16 @@ export function compileAgentBrowserJob(input: unknown): { compiled?: CompiledAge
84
173
  const result = getRequiredJobString(rawStep, "path", jobAction);
85
174
  if (result.error) return { error: `job.steps[${index}]: ${result.error}` };
86
175
  args = ["wait", "--download", result.value as string];
176
+ } else if (jobAction === "snapshot") {
177
+ args = ["snapshot", "-i"];
87
178
  } else {
88
179
  const result = getRequiredJobString(rawStep, "path", jobAction);
89
180
  if (result.error) return { error: `job.steps[${index}]: ${result.error}` };
90
181
  args = ["screenshot", result.value as string];
91
182
  }
92
- steps.push({ action: jobAction, args });
183
+ steps.push({ action: jobAction, args, generatedFrom }, ...extraSteps);
93
184
  }
94
- return { compiled: { args: ["batch"], stdin: JSON.stringify(steps.map((step) => step.args)), steps } };
185
+ return { compiled: { args: failFast ? ["batch", "--bail"] : ["batch"], failFast, stdin: JSON.stringify(steps.map((step) => step.args)), steps } };
95
186
  }
96
187
 
97
188
  export function isHttpOrHttpsUrl(url: string): boolean {
@@ -110,6 +201,8 @@ function describeQaChecksRun(checks: CompiledAgentBrowserQaPreset["checks"]): st
110
201
  if (checks.checkNetwork) parts.push("network");
111
202
  if (checks.checkConsole) parts.push("console");
112
203
  if (checks.checkErrors) parts.push("errors");
204
+ if (checks.diagnosticsResetAtStart) parts.push("diagnostics-reset");
205
+ else if (checks.checkNetwork || checks.checkConsole || checks.checkErrors) parts.push("attached-diagnostics-preserved");
113
206
  if (checks.screenshotPath) parts.push("screenshot");
114
207
  return parts.join(", ");
115
208
  }
@@ -145,6 +238,9 @@ export function buildQaCompactPassText(options: {
145
238
  const pageParts = [options.page?.title, options.page?.url].filter((part): part is string => typeof part === "string" && part.length > 0);
146
239
  if (pageParts.length > 0) lines.push(`Page: ${pageParts.join(" — ")}`);
147
240
  lines.push(`Checks run: ${describeQaChecksRun(options.checks)} (${options.batchStepCount} batch step${options.batchStepCount === 1 ? "" : "s"})`);
241
+ if (options.checks.attached && !options.checks.diagnosticsResetAtStart && (options.checks.checkNetwork || options.checks.checkConsole || options.checks.checkErrors)) {
242
+ lines.push("Attached diagnostics: existing upstream session console/network/error buffers were preserved; rows may include events from before qa.attached started.");
243
+ }
148
244
  if (options.checks.screenshotPath) {
149
245
  const verification = options.artifactVerification;
150
246
  lines.push(verification
@@ -155,7 +251,83 @@ export function buildQaCompactPassText(options: {
155
251
  return lines.join("\n");
156
252
  }
157
253
 
158
- export function analyzeQaPresetResults(data: unknown): AgentBrowserQaPresetAnalysis | undefined {
254
+ const QA_VISIBLE_TEXT_TIMEOUT_MS = 5_000;
255
+
256
+ function formatQaExpectedTextPreview(text: string): string {
257
+ return JSON.stringify(text.length > 80 ? `${text.slice(0, 77)}...` : text);
258
+ }
259
+
260
+ function buildQaVisibleTextPredicate(text: string): string {
261
+ return `(() => {
262
+ const expected = ${JSON.stringify(text)}.replace(/\\s+/g, " ").trim();
263
+ if (!expected) return false;
264
+ const root = document.body || document.documentElement;
265
+ if (!root) return false;
266
+ const skipTags = new Set(["SCRIPT", "STYLE", "NOSCRIPT", "SVG"]);
267
+ const normalize = (value) => String(value ?? "").replace(/\\s+/g, " ").trim();
268
+ const isVisibleElement = (element) => {
269
+ if (!(element instanceof HTMLElement)) return false;
270
+ if (skipTags.has(element.tagName)) return false;
271
+ const style = window.getComputedStyle(element);
272
+ if (style.display === "none" || style.visibility === "hidden" || Number(style.opacity) === 0) return false;
273
+ return element.getClientRects().length > 0;
274
+ };
275
+ const hasVisibleAncestors = (node) => {
276
+ for (let element = node.parentElement; element; element = element.parentElement) {
277
+ if (!isVisibleElement(element)) return false;
278
+ if (element === root) break;
279
+ }
280
+ return true;
281
+ };
282
+ const textWalker = document.createTreeWalker(root, NodeFilter.SHOW_TEXT);
283
+ let visitedText = 0;
284
+ for (let node = textWalker.nextNode(); node && visitedText < 6000; node = textWalker.nextNode(), visitedText += 1) {
285
+ if (!hasVisibleAncestors(node)) continue;
286
+ if (normalize(node.nodeValue).includes(expected)) return true;
287
+ }
288
+ const elementWalker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT);
289
+ let visitedElements = 0;
290
+ for (let node = elementWalker.nextNode(); node && visitedElements < 3000; node = elementWalker.nextNode(), visitedElements += 1) {
291
+ const element = node;
292
+ if (!isVisibleElement(element) || !("value" in element)) continue;
293
+ if (normalize(element.value).includes(expected)) return true;
294
+ }
295
+ return false;
296
+ })()`;
297
+ }
298
+
299
+ function qaVisibleTextWaitPassed(item: ReturnType<typeof getBatchResultItems>[number] | undefined, step: CompiledAgentBrowserJobStep): boolean | undefined {
300
+ if (step.args[0] !== "wait" || step.args[1] !== "--fn") return undefined;
301
+ if (!item || item.success === false) return false;
302
+ if (typeof item.result === "boolean") return item.result;
303
+ if (isRecord(item.result) && typeof item.result.result === "boolean") return item.result.result;
304
+ return true;
305
+ }
306
+
307
+ function extractQaTextAssertionResultText(item: ReturnType<typeof getBatchResultItems>[number] | undefined): string | undefined {
308
+ if (!item || item.success === false) return undefined;
309
+ const result = item.result;
310
+ if (typeof result === "string") return result;
311
+ if (!isRecord(result)) return undefined;
312
+ for (const key of ["result", "text", "value"] as const) {
313
+ const value = result[key];
314
+ if (typeof value === "string") return value;
315
+ }
316
+ return undefined;
317
+ }
318
+
319
+ export function analyzeQaPresetTimeout(compiled: CompiledAgentBrowserQaPreset): AgentBrowserQaPresetAnalysis | undefined {
320
+ if (compiled.checks.expectedText.length === 0) return undefined;
321
+ const failedChecks = compiled.checks.expectedText.map((text) => `expected text was not verified before timeout: ${formatQaExpectedTextPreview(text)}`);
322
+ return {
323
+ failedChecks,
324
+ passed: false,
325
+ summary: `QA preset failed: ${failedChecks.join("; ")}.`,
326
+ warnings: ["The wrapper timed out before expected-text evidence could be verified; inspect timeoutPartialProgress and retry with a narrower readiness condition if the page was still loading."],
327
+ };
328
+ }
329
+
330
+ export function analyzeQaPresetResults(data: unknown, compiled?: CompiledAgentBrowserQaPreset): AgentBrowserQaPresetAnalysis | undefined {
159
331
  const items = getBatchResultItems(data);
160
332
  if (items.length === 0) return undefined;
161
333
  const failedChecks: string[] = [];
@@ -179,6 +351,18 @@ export function analyzeQaPresetResults(data: unknown): AgentBrowserQaPresetAnaly
179
351
  if (networkFailures.benignCount > 0) warnings.push(`${networkFailures.benignCount} benign network request failure(s) ignored`);
180
352
  }
181
353
  }
354
+ if (compiled?.checks.expectedText.length) {
355
+ let expectedTextIndex = 0;
356
+ compiled.steps.forEach((step, index) => {
357
+ if (step.action !== "assertText") return;
358
+ const expected = compiled.checks.expectedText[expectedTextIndex++];
359
+ if (!expected) return;
360
+ const visibleTextPassed = qaVisibleTextWaitPassed(items[index], step);
361
+ if (visibleTextPassed === true) return;
362
+ const actual = extractQaTextAssertionResultText(items[index]);
363
+ if (!actual || !actual.includes(expected)) failedChecks.push(`expected text not found: ${formatQaExpectedTextPreview(expected)}`);
364
+ });
365
+ }
182
366
  const uniqueFailures = [...new Set(failedChecks)];
183
367
  const uniqueWarnings = [...new Set(warnings)];
184
368
  return {
@@ -234,18 +418,19 @@ export function compileAgentBrowserQaPreset(input: unknown): { compiled?: Compil
234
418
  if (rawLoadState !== undefined && (typeof rawLoadState !== "string" || !AGENT_BROWSER_QA_LOAD_STATES.includes(rawLoadState as AgentBrowserQaLoadState))) {
235
419
  return { error: `qa.loadState must be one of: ${AGENT_BROWSER_QA_LOAD_STATES.join(", ")}.` };
236
420
  }
237
- const checkConsole = input.checkConsole !== false;
238
- const checkErrors = input.checkErrors !== false;
239
- const checkNetwork = input.checkNetwork !== false;
421
+ const checkConsole = typeof input.checkConsole === "boolean" ? input.checkConsole : !attached;
422
+ const checkErrors = typeof input.checkErrors === "boolean" ? input.checkErrors : !attached;
423
+ const checkNetwork = typeof input.checkNetwork === "boolean" ? input.checkNetwork : !attached;
240
424
  const loadState = (rawLoadState as AgentBrowserQaLoadState | undefined) ?? "domcontentloaded";
425
+ const diagnosticsResetAtStart = !attached;
241
426
  const steps: CompiledAgentBrowserJobStep[] = [];
242
- if (checkNetwork) steps.push({ action: "wait", args: ["network", "requests", "--clear"] });
243
- if (checkConsole) steps.push({ action: "wait", args: ["console", "--clear"] });
244
- if (checkErrors) steps.push({ action: "wait", args: ["errors", "--clear"] });
427
+ if (diagnosticsResetAtStart && checkNetwork) steps.push({ action: "wait", args: ["network", "requests", "--clear"] });
428
+ if (diagnosticsResetAtStart && checkConsole) steps.push({ action: "wait", args: ["console", "--clear"] });
429
+ if (diagnosticsResetAtStart && checkErrors) steps.push({ action: "wait", args: ["errors", "--clear"] });
245
430
  if (!attached && normalizedUrl) steps.push({ action: "open", args: ["open", normalizedUrl] });
246
431
  steps.push({ action: "wait", args: ["wait", "--load", loadState] });
247
432
  for (const text of expectedText) {
248
- steps.push({ action: "assertText", args: ["wait", "--text", text] });
433
+ steps.push({ action: "assertText", args: ["wait", "--fn", buildQaVisibleTextPredicate(text), "--timeout", String(QA_VISIBLE_TEXT_TIMEOUT_MS)] });
249
434
  }
250
435
  if (typeof expectedSelector === "string") {
251
436
  steps.push({ action: "wait", args: ["wait", expectedSelector] });
@@ -256,8 +441,9 @@ export function compileAgentBrowserQaPreset(input: unknown): { compiled?: Compil
256
441
  if (typeof screenshotPath === "string") steps.push({ action: "screenshot", args: ["screenshot", screenshotPath] });
257
442
  return {
258
443
  compiled: {
259
- args: ["batch"],
260
- checks: { attached, checkConsole, checkErrors, checkNetwork, expectedSelector, expectedText, loadState, screenshotPath, url: normalizedUrl },
444
+ args: ["batch", "--bail"],
445
+ checks: { attached, checkConsole, checkErrors, checkNetwork, diagnosticsResetAtStart, expectedSelector, expectedText, loadState, screenshotPath, url: normalizedUrl },
446
+ failFast: true,
261
447
  stdin: JSON.stringify(steps.map((step) => step.args)),
262
448
  steps,
263
449
  },
@@ -15,6 +15,7 @@ import {
15
15
  AGENT_BROWSER_ELECTRON_HANDOFFS,
16
16
  AGENT_BROWSER_ELECTRON_TARGET_TYPES,
17
17
  AGENT_BROWSER_JOB_STEP_ACTIONS,
18
+ AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS,
18
19
  AGENT_BROWSER_QA_LOAD_STATES,
19
20
  AGENT_BROWSER_SEMANTIC_ACTIONS,
20
21
  AGENT_BROWSER_SEMANTIC_LOCATORS,
@@ -33,14 +34,14 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
33
34
  semanticAction: Type.Optional(
34
35
  Type.Object({
35
36
  action: StringEnum(AGENT_BROWSER_SEMANTIC_ACTIONS, {
36
- description: "Intent action to compile to an existing agent-browser find command, or to upstream select when action=select.",
37
+ description: "Intent action to compile to an existing agent-browser find command, direct selector/ref command, or upstream select when action=select.",
37
38
  }),
38
39
  locator: Type.Optional(StringEnum(AGENT_BROWSER_SEMANTIC_LOCATORS, {
39
40
  description: "Upstream find locator family to use for check/click/fill actions.",
40
41
  })),
41
42
  value: Type.Optional(Type.String({ description: "Locator value for find actions, or a single option value for select actions. For locator=role, role may be supplied instead." })),
42
43
  values: Type.Optional(Type.Array(Type.String({ description: "Option value for select actions." }), { description: "One or more option values for select actions.", minItems: 1 })),
43
- selector: Type.Optional(Type.String({ description: "Selector or @ref for select actions; compiled to select <selector> <value...>." })),
44
+ selector: Type.Optional(Type.String({ description: "Selector or @ref for direct click/check/fill actions, or for select actions compiled to select <selector> <value...>." })),
44
45
  text: Type.Optional(Type.String({ description: "Text/value argument for fill actions." })),
45
46
  role: Type.Optional(Type.String({ description: "Role locator value for locator=role. May be used instead of value; when both are set they must match." })),
46
47
  name: Type.Optional(Type.String({ description: "Accessible name filter for locator=role; compiles to --name <name>." })),
@@ -54,9 +55,9 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
54
55
  expectedText: Type.Optional(Type.Union([Type.String(), Type.Array(Type.String())], { description: "Text that must appear on the page." })),
55
56
  expectedSelector: Type.Optional(Type.String({ description: "Selector or @ref that must appear on the page." })),
56
57
  screenshotPath: Type.Optional(Type.String({ description: "Optional evidence screenshot path captured at the end of the QA preset." })),
57
- checkConsole: Type.Optional(Type.Boolean({ description: "Whether to fail on console error messages. Defaults to true." })),
58
- checkErrors: Type.Optional(Type.Boolean({ description: "Whether to fail on page errors. Defaults to true." })),
59
- checkNetwork: Type.Optional(Type.Boolean({ description: "Whether to inspect network requests and fail on actionable request failures; benign icon misses warn. Defaults to true." })),
58
+ checkConsole: Type.Optional(Type.Boolean({ description: "Whether to inspect console messages and fail on console errors. Defaults to false for qa.attached because upstream buffers may predate the check." })),
59
+ checkErrors: Type.Optional(Type.Boolean({ description: "Whether to inspect page errors and fail when errors are present. Defaults to false for qa.attached because upstream buffers may predate the check." })),
60
+ checkNetwork: Type.Optional(Type.Boolean({ description: "Whether to inspect network requests and fail on actionable request failures; benign icon misses warn. Defaults to false for qa.attached because upstream buffers may predate the check." })),
60
61
  loadState: Type.Optional(StringEnum(AGENT_BROWSER_QA_LOAD_STATES, { description: "Page readiness state for the QA preset before assertions and diagnostics. Defaults to domcontentloaded; use networkidle only for pages without long-lived background requests." })),
61
62
  }, { additionalProperties: false }),
62
63
  Type.Object({
@@ -160,17 +161,24 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
160
161
  ),
161
162
  job: Type.Optional(
162
163
  Type.Object({
164
+ failFast: Type.Optional(Type.Boolean({ description: "Stop the compiled batch on the first failed job step. Defaults to true so later mutating steps do not run after setup/assertion failures." })),
163
165
  steps: Type.Array(
164
166
  Type.Object({
165
167
  action: StringEnum(AGENT_BROWSER_JOB_STEP_ACTIONS, {
166
168
  description: "Constrained one-call job step compiled to existing upstream batch commands.",
167
169
  }),
168
170
  url: Type.Optional(Type.String({ description: "URL for open steps, or URL pattern for assertUrl steps." })),
169
- selector: Type.Optional(Type.String({ description: "Selector or @ref for click/fill/select-like steps." })),
171
+ loadState: Type.Optional(StringEnum(AGENT_BROWSER_QA_LOAD_STATES, { description: "Optional readiness wait to insert immediately after an open step; use domcontentloaded/load/networkidle when the next job step needs page hydration evidence before clicking or reading." })),
172
+ selector: Type.Optional(Type.String({ description: "Selector or @ref for click/fill/type/select-like steps; omit when using semantic locator fields on click/fill steps." })),
173
+ locator: Type.Optional(StringEnum(AGENT_BROWSER_SEMANTIC_LOCATORS, { description: "Semantic locator for click/fill steps when selector is omitted." })),
174
+ role: Type.Optional(Type.String({ description: "Role locator value for click/fill steps when locator is role." })),
175
+ name: Type.Optional(Type.String({ description: "Accessible name filter for role locator click/fill steps." })),
170
176
  text: Type.Optional(Type.String({ description: "Text for fill steps or visible text for assertText steps." })),
171
- value: Type.Optional(Type.String({ description: "Single option value for select steps." })),
177
+ value: Type.Optional(Type.String({ description: "Single option value for select steps, or locator value for semantic click/fill steps." })),
172
178
  values: Type.Optional(Type.Array(Type.String({ description: "Option value for select steps." }), { description: "One or more option values for select steps.", minItems: 1 })),
173
179
  path: Type.Optional(Type.String({ description: "Artifact/download path for waitForDownload or screenshot steps." })),
180
+ delayMs: Type.Optional(Type.Integer({ description: `Optional per-character delay for type steps; when set, the job compiles to focus/keyboard type/wait steps instead of instant fill-like typing, capped at ${AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS} characters.`, minimum: 1 })),
181
+ press: Type.Optional(Type.String({ description: "Optional key to press after a type step, for example Enter." })),
174
182
  milliseconds: Type.Optional(Type.Number({ description: "Milliseconds for wait steps." })),
175
183
  }, { additionalProperties: false }),
176
184
  { minItems: 1 },
@@ -178,6 +186,8 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
178
186
  }, { additionalProperties: false }),
179
187
  ),
180
188
  stdin: Type.Optional(Type.String({ description: "Optional raw stdin content; only supported for batch, eval --stdin, auth save --password-stdin, and is generated internally by job, qa, sourceLookup, or networkSourceLookup mode. Do not use with electron mode." })),
189
+ outputPath: Type.Optional(Type.String({ description: "Optional workspace-relative or absolute file path that receives the model-facing command data/result after the browser command completes. Useful for eval/get/snapshot captures that should become durable local artifacts.", minLength: 1 })),
190
+ timeoutMs: Type.Optional(Type.Integer({ description: "Optional per-call wrapper subprocess watchdog in milliseconds for browser CLI args/job/qa/source lookup calls. Use for long opens or large output captures; fixed wait steps still must stay below the upstream IPC wait budget. Electron actions use electron.timeoutMs instead.", minimum: 1 })),
181
191
  sessionMode: Type.Optional(
182
192
  StringEnum(["auto", "fresh"] as const, {
183
193
  description:
@@ -62,8 +62,28 @@ export function compileAgentBrowserSemanticAction(input: unknown): { compiled?:
62
62
  const args = typeof session === "string" ? ["--session", session, "select", selector, ...(selectedValues.values as string[])] : ["select", selector, ...(selectedValues.values as string[])];
63
63
  return { compiled: { action: "select", selector, values: selectedValues.values, args } };
64
64
  }
65
- if (selector !== undefined || values !== undefined) {
66
- return { error: "semanticAction.selector and values are only supported for select actions." };
65
+ if (values !== undefined) {
66
+ return { error: "semanticAction.values is only supported for select actions." };
67
+ }
68
+ if (selector !== undefined) {
69
+ if (typeof selector !== "string" || selector.trim().length === 0) {
70
+ return { error: "semanticAction.selector must be a non-empty string when provided." };
71
+ }
72
+ if (locator !== undefined || value !== undefined || role !== undefined || name !== undefined) {
73
+ return { error: "semanticAction.selector cannot be combined with locator, value, role, or name; use selector for a direct click/check/fill target or locator fields for find-based actions." };
74
+ }
75
+ if (text !== undefined && typeof text !== "string") {
76
+ return { error: "semanticAction.text must be a string when provided." };
77
+ }
78
+ if (action === "fill" && (typeof text !== "string" || text.length === 0)) {
79
+ return { error: `semanticAction.text is required for ${action}.` };
80
+ }
81
+ if (action !== "fill" && text !== undefined) {
82
+ return { error: "semanticAction.text is only supported for fill actions." };
83
+ }
84
+ const directArgs = typeof session === "string" ? ["--session", session, action, selector] : [action, selector];
85
+ if (action === "fill") directArgs.push(text as string);
86
+ return { compiled: { action: action as AgentBrowserSemanticActionName, selector, args: directArgs } };
67
87
  }
68
88
  if (typeof locator !== "string" || !AGENT_BROWSER_SEMANTIC_LOCATORS.includes(locator as AgentBrowserSemanticLocator)) {
69
89
  return { error: `semanticAction.locator must be one of: ${AGENT_BROWSER_SEMANTIC_LOCATORS.join(", ")}.` };
@@ -10,7 +10,8 @@ export const AGENT_BROWSER_SEMANTIC_ACTIONS = ["check", "click", "fill", "select
10
10
 
11
11
  export const AGENT_BROWSER_SEMANTIC_LOCATORS = ["alt", "label", "placeholder", "role", "testid", "text", "title"] as const;
12
12
 
13
- export const AGENT_BROWSER_JOB_STEP_ACTIONS = ["open", "click", "fill", "select", "wait", "assertText", "assertUrl", "waitForDownload", "screenshot"] as const;
13
+ export const AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS = 200;
14
+ export const AGENT_BROWSER_JOB_STEP_ACTIONS = ["open", "click", "fill", "type", "select", "wait", "assertText", "assertUrl", "waitForDownload", "screenshot", "snapshot"] as const;
14
15
 
15
16
  export const AGENT_BROWSER_QA_LOAD_STATES = ["domcontentloaded", "load", "networkidle"] as const;
16
17
 
@@ -102,10 +103,12 @@ export interface CompiledAgentBrowserSemanticAction {
102
103
  export interface CompiledAgentBrowserJobStep {
103
104
  action: AgentBrowserJobStepAction;
104
105
  args: string[];
106
+ generatedFrom?: string;
105
107
  }
106
108
 
107
109
  export interface CompiledAgentBrowserJob {
108
110
  args: string[];
111
+ failFast: boolean;
109
112
  stdin: string;
110
113
  steps: CompiledAgentBrowserJobStep[];
111
114
  }
@@ -115,6 +118,7 @@ export interface CompiledAgentBrowserQaPreset extends CompiledAgentBrowserJob {
115
118
  checkConsole: boolean;
116
119
  checkErrors: boolean;
117
120
  checkNetwork: boolean;
121
+ diagnosticsResetAtStart: boolean;
118
122
  loadState: AgentBrowserQaLoadState;
119
123
  expectedText: string[];
120
124
  expectedSelector?: string;
@@ -7,6 +7,7 @@
7
7
  export { AGENT_BROWSER_PARAMS } from "./input-modes/params.js";
8
8
  export {
9
9
  analyzeQaPresetResults,
10
+ analyzeQaPresetTimeout,
10
11
  buildQaCompactPassText,
11
12
  compileAgentBrowserJob,
12
13
  compileAgentBrowserQaPreset,
@@ -71,7 +71,48 @@ function buildClickDispatchProbeInstallScript(probe: ClickDispatchProbe): string
71
71
  const marker = ${JSON.stringify(probe.marker)};
72
72
  const element = ${resolveTarget};
73
73
  if (!element) return { status: "target-not-found", marker };
74
- const state = { events: [], target: { tagName: element.tagName.toLowerCase() } };
74
+ const cssEscape = (value) => {
75
+ if (window.CSS && typeof window.CSS.escape === "function") return window.CSS.escape(value);
76
+ return String(value).replace(/[^a-zA-Z0-9_-]/g, "\\$&");
77
+ };
78
+ const getSelector = (node) => {
79
+ if (!(node instanceof Element)) return undefined;
80
+ if (node.id) return "#" + cssEscape(node.id);
81
+ const testId = node.getAttribute("data-testid") || node.getAttribute("data-test-id");
82
+ if (testId) return '[data-testid="' + cssEscape(testId) + '"]';
83
+ const parts = [];
84
+ let current = node;
85
+ while (current && current !== document.body && parts.length < 4) {
86
+ const tag = current.tagName.toLowerCase();
87
+ const parent = current.parentElement;
88
+ if (!parent) break;
89
+ const siblings = Array.from(parent.children).filter((child) => child.tagName === current.tagName);
90
+ const index = siblings.indexOf(current) + 1;
91
+ parts.unshift(siblings.length > 1 ? tag + ':nth-of-type(' + index + ')' : tag);
92
+ current = parent;
93
+ }
94
+ return parts.length > 0 ? parts.join(" > ") : undefined;
95
+ };
96
+ const rectInfo = (rect) => ({ bottom: rect.bottom, left: rect.left, right: rect.right, top: rect.top });
97
+ const targetRect = element.getBoundingClientRect();
98
+ const targetOutsideViewport = targetRect.bottom < 0 || targetRect.right < 0 || targetRect.top > window.innerHeight || targetRect.left > window.innerWidth;
99
+ let nearestScrollContainer;
100
+ for (let current = element.parentElement; current && current !== document.body; current = current.parentElement) {
101
+ if (current.scrollHeight > current.clientHeight + 1 || current.scrollWidth > current.clientWidth + 1) {
102
+ const containerRect = current.getBoundingClientRect();
103
+ nearestScrollContainer = {
104
+ selector: getSelector(current),
105
+ tagName: current.tagName.toLowerCase(),
106
+ targetOutsideContainer: targetRect.bottom < containerRect.top || targetRect.top > containerRect.bottom || targetRect.right < containerRect.left || targetRect.left > containerRect.right,
107
+ targetOutsideViewport,
108
+ rect: rectInfo(containerRect),
109
+ scrollLeft: current.scrollLeft,
110
+ scrollTop: current.scrollTop,
111
+ };
112
+ break;
113
+ }
114
+ }
115
+ const state = { events: [], target: { tagName: element.tagName.toLowerCase(), nearestScrollContainer, rect: rectInfo(targetRect), targetOutsideViewport } };
75
116
  const eventTypes = ["pointerdown", "mousedown", "pointerup", "mouseup", "click"];
76
117
  const listeners = eventTypes.map((type) => {
77
118
  const listener = (event) => {
@@ -126,9 +167,9 @@ export function formatClickDispatchDiagnosticText(diagnostic: ClickDispatchDiagn
126
167
  return `Click dispatch diagnostic: ${diagnostic.summary}`;
127
168
  }
128
169
 
129
- export function buildClickDispatchNextActions(options: { commandTokens: string[]; sessionName?: string }): AgentBrowserNextAction[] {
170
+ export function buildClickDispatchNextActions(options: { commandTokens: string[]; diagnostic?: ClickDispatchDiagnostic; sessionName?: string }): AgentBrowserNextAction[] {
130
171
  const retryArgs = options.commandTokens[0] === "click" ? options.commandTokens : ["click", ...options.commandTokens];
131
- return [
172
+ const actions: AgentBrowserNextAction[] = [
132
173
  {
133
174
  id: "inspect-click-dispatch-miss",
134
175
  params: { args: withOptionalSessionArgs(options.sessionName, ["snapshot", "-i"]) },
@@ -136,14 +177,26 @@ export function buildClickDispatchNextActions(options: { commandTokens: string[]
136
177
  safety: "Read-only snapshot; the wrapper does not replay clicks in-page when upstream reports success without DOM events.",
137
178
  tool: "agent_browser",
138
179
  },
139
- {
140
- id: "retry-click-after-dispatch-miss",
141
- params: { args: withOptionalSessionArgs(options.sessionName, retryArgs) },
142
- reason: "Retry the same upstream click after confirming the target is visible; do not assume the prior success mutated the page.",
143
- safety: "Only retry when the target is still intended; use page-change evidence or a fresh snapshot before continuing the workflow.",
144
- tool: "agent_browser",
145
- },
146
180
  ];
181
+ if (options.diagnostic?.scrollContainer) {
182
+ actions.push({
183
+ id: "scroll-target-into-view-after-dispatch-miss",
184
+ params: { args: withOptionalSessionArgs(options.sessionName, ["scrollintoview", retryArgs[1]].filter((item): item is string => typeof item === "string")) },
185
+ reason: options.diagnostic.scrollContainer.selector
186
+ ? `The target may be outside nested scroll container ${options.diagnostic.scrollContainer.selector}; scroll the target into view before retrying the click.`
187
+ : "The target may be inside an offscreen nested scroll container; scroll the target into view before retrying the click.",
188
+ safety: "Use only for the same current page and target; run snapshot -i again if the page rerendered.",
189
+ tool: "agent_browser",
190
+ });
191
+ }
192
+ actions.push({
193
+ id: "retry-click-after-dispatch-miss",
194
+ params: { args: withOptionalSessionArgs(options.sessionName, retryArgs) },
195
+ reason: "Retry the same upstream click after confirming the target is visible; do not assume the prior success mutated the page.",
196
+ safety: "Only retry when the target is still intended; use page-change evidence or a fresh snapshot before continuing the workflow.",
197
+ tool: "agent_browser",
198
+ });
199
+ return actions;
147
200
  }
148
201
 
149
202
  export async function prepareClickDispatchProbe(options: { commandTokens: string[]; cwd: string; refSnapshot?: SessionRefSnapshot; sessionName?: string; signal?: AbortSignal }): Promise<ClickDispatchProbe | undefined> {
@@ -156,6 +209,20 @@ export async function prepareClickDispatchProbe(options: { commandTokens: string
156
209
  return installResult?.status === "installed" ? probe : undefined;
157
210
  }
158
211
 
212
+ function getClickDispatchScrollContainerDiagnostic(result: Record<string, unknown>): ClickDispatchDiagnostic["scrollContainer"] {
213
+ const target = isRecord(result.target) ? result.target : undefined;
214
+ const scrollContainer = isRecord(target?.nearestScrollContainer) ? target.nearestScrollContainer : undefined;
215
+ const targetOutsideViewport = typeof target?.targetOutsideViewport === "boolean" ? target.targetOutsideViewport : undefined;
216
+ const targetOutsideContainer = typeof scrollContainer?.targetOutsideContainer === "boolean" ? scrollContainer.targetOutsideContainer : undefined;
217
+ if (!scrollContainer && !targetOutsideViewport) return undefined;
218
+ if (targetOutsideContainer !== true && targetOutsideViewport !== true) return undefined;
219
+ const selector = typeof scrollContainer?.selector === "string" ? redactSensitiveText(scrollContainer.selector) : undefined;
220
+ const summary = selector
221
+ ? `Target appears outside nested scroll container ${selector}; use scrollintoview on the target or scroll that container before retrying.`
222
+ : "Target appears outside the viewport or a nested scroll container; use scrollintoview on the target before retrying.";
223
+ return { selector, summary, targetOutsideContainer, targetOutsideViewport };
224
+ }
225
+
159
226
  export async function collectClickDispatchDiagnostic(options: { cwd: string; probe?: ClickDispatchProbe; sessionName?: string; signal?: AbortSignal }): Promise<ClickDispatchDiagnostic | undefined> {
160
227
  if (!options.probe || !options.sessionName) return undefined;
161
228
  const data = await runSessionCommandData({ args: ["eval", "--stdin"], cwd: options.cwd, sessionName: options.sessionName, signal: options.signal, stdin: buildClickDispatchProbeCheckScript(options.probe) });
@@ -164,10 +231,14 @@ export async function collectClickDispatchDiagnostic(options: { cwd: string; pro
164
231
  const status = typeof result.status === "string" ? result.status : undefined;
165
232
  if (status !== "no-native-event-observed") return undefined;
166
233
  const nativeEventCount = typeof result.nativeEventCount === "number" ? result.nativeEventCount : 0;
167
- const summary = "Upstream click reported success but no trusted DOM event reached the selected element. Gather evidence with snapshot or page-change checks, then retry upstream click or report the workflow issue; the wrapper does not replay clicks in-page.";
234
+ const scrollContainer = getClickDispatchScrollContainerDiagnostic(result);
235
+ const summary = scrollContainer
236
+ ? `Upstream click reported success but no trusted DOM event reached the selected element. ${scrollContainer.summary}`
237
+ : "Upstream click reported success but no trusted DOM event reached the selected element. Gather evidence with snapshot or page-change checks, then retry upstream click or report the workflow issue; the wrapper does not replay clicks in-page.";
168
238
  return {
169
239
  nativeEventCount,
170
240
  reason: "native-click-produced-no-target-dom-event",
241
+ ...(scrollContainer ? { scrollContainer } : {}),
171
242
  status,
172
243
  summary,
173
244
  target: redactClickDispatchTarget(options.probe.target),