pi-agent-browser-native 0.2.44 → 0.2.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/CHANGELOG.md +42 -0
  2. package/README.md +20 -15
  3. package/docs/ARCHITECTURE.md +12 -10
  4. package/docs/COMMAND_REFERENCE.md +49 -27
  5. package/docs/ELECTRON.md +1 -1
  6. package/docs/RELEASE.md +6 -5
  7. package/docs/REQUIREMENTS.md +6 -3
  8. package/docs/SUPPORT_MATRIX.md +17 -13
  9. package/docs/TOOL_CONTRACT.md +87 -46
  10. package/docs/platform-smoke.md +4 -3
  11. package/extensions/agent-browser/index.ts +43 -450
  12. package/extensions/agent-browser/lib/bash-guard.ts +205 -0
  13. package/extensions/agent-browser/lib/electron/cdp.ts +69 -0
  14. package/extensions/agent-browser/lib/electron/cleanup.ts +5 -58
  15. package/extensions/agent-browser/lib/electron/discovery.ts +2 -9
  16. package/extensions/agent-browser/lib/electron/launch.ts +11 -65
  17. package/extensions/agent-browser/lib/electron/text.ts +13 -0
  18. package/extensions/agent-browser/lib/fs-utils.ts +18 -0
  19. package/extensions/agent-browser/lib/input-modes/job.ts +207 -21
  20. package/extensions/agent-browser/lib/input-modes/params.ts +28 -11
  21. package/extensions/agent-browser/lib/input-modes/semantic-action.ts +22 -2
  22. package/extensions/agent-browser/lib/input-modes/types.ts +5 -1
  23. package/extensions/agent-browser/lib/input-modes.ts +1 -0
  24. package/extensions/agent-browser/lib/json-schema.ts +73 -0
  25. package/extensions/agent-browser/lib/orchestration/browser-run/click-dispatch.ts +82 -11
  26. package/extensions/agent-browser/lib/orchestration/browser-run/diagnostics.ts +159 -30
  27. package/extensions/agent-browser/lib/orchestration/browser-run/final-result.ts +53 -2
  28. package/extensions/agent-browser/lib/orchestration/browser-run/index.ts +1 -0
  29. package/extensions/agent-browser/lib/orchestration/browser-run/prepare.ts +751 -32
  30. package/extensions/agent-browser/lib/orchestration/browser-run/process-output.ts +38 -7
  31. package/extensions/agent-browser/lib/orchestration/browser-run/prompt-guards.ts +0 -46
  32. package/extensions/agent-browser/lib/orchestration/browser-run/session-state.ts +10 -1
  33. package/extensions/agent-browser/lib/orchestration/browser-run/types.ts +28 -1
  34. package/extensions/agent-browser/lib/orchestration/electron-host/index.ts +1 -6
  35. package/extensions/agent-browser/lib/orchestration/input-plan.ts +15 -3
  36. package/extensions/agent-browser/lib/orchestration/output-file.ts +86 -0
  37. package/extensions/agent-browser/lib/pi-tool-rendering.ts +252 -0
  38. package/extensions/agent-browser/lib/playbook.ts +26 -26
  39. package/extensions/agent-browser/lib/process.ts +1 -1
  40. package/extensions/agent-browser/lib/prompt-policy.ts +1 -18
  41. package/extensions/agent-browser/lib/results/artifact-manifest.ts +1 -4
  42. package/extensions/agent-browser/lib/results/artifact-state.ts +7 -3
  43. package/extensions/agent-browser/lib/results/contracts.ts +6 -2
  44. package/extensions/agent-browser/lib/results/envelope.ts +11 -2
  45. package/extensions/agent-browser/lib/results/network-routes.ts +7 -4
  46. package/extensions/agent-browser/lib/results/network.ts +7 -1
  47. package/extensions/agent-browser/lib/results/presentation/artifacts.ts +88 -20
  48. package/extensions/agent-browser/lib/results/presentation/batch.ts +84 -12
  49. package/extensions/agent-browser/lib/results/presentation/diagnostics.ts +81 -26
  50. package/extensions/agent-browser/lib/results/presentation/errors.ts +13 -0
  51. package/extensions/agent-browser/lib/results/presentation/registry.ts +60 -0
  52. package/extensions/agent-browser/lib/results/presentation.ts +10 -1
  53. package/extensions/agent-browser/lib/results/snapshot-high-value-controls.ts +16 -5
  54. package/extensions/agent-browser/lib/results/snapshot.ts +2 -0
  55. package/extensions/agent-browser/lib/runtime.ts +10 -1
  56. package/extensions/agent-browser/lib/session-page-state.ts +15 -6
  57. package/extensions/agent-browser/lib/string-enum-schema.ts +20 -0
  58. package/extensions/agent-browser/lib/web-search.ts +31 -13
  59. package/package.json +2 -2
  60. package/platform-smoke.config.mjs +5 -2
  61. package/scripts/platform-smoke/build-ubuntu-image.mjs +25 -0
  62. package/scripts/platform-smoke/crabbox-runner.mjs +5 -1
  63. package/scripts/platform-smoke/doctor.mjs +6 -2
  64. package/scripts/platform-smoke/linux-image/Dockerfile +3 -5
  65. package/scripts/platform-smoke/targets.mjs +2 -1
  66. package/extensions/agent-browser/lib/orchestration/browser-run/browser-action-model.ts +0 -154
@@ -8,8 +8,10 @@ import type { ArtifactVerificationSummary } from "../results/contracts.js";
8
8
  import { isRecord } from "../parsing.js";
9
9
  import { summarizeNetworkFailures } from "../results/network.js";
10
10
  import { getBatchResultItems, getCommandNameFromBatchItem, getSelectValues } from "./shared.js";
11
+ import { compileAgentBrowserSemanticAction } from "./semantic-action.js";
11
12
  import {
12
13
  AGENT_BROWSER_JOB_STEP_ACTIONS,
14
+ AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS,
13
15
  AGENT_BROWSER_QA_LOAD_STATES,
14
16
  type AgentBrowserJobStepAction,
15
17
  type AgentBrowserQaLoadState,
@@ -27,10 +29,88 @@ function getRequiredJobString(step: Record<string, unknown>, field: "path" | "se
27
29
  return { value };
28
30
  }
29
31
 
32
+ function compileJobClickOrFillStep(step: Record<string, unknown>, action: "click" | "fill"): { args?: string[]; error?: string } {
33
+ const hasSelector = typeof step.selector === "string" && step.selector.trim().length > 0;
34
+ const hasLocator = step.locator !== undefined || step.role !== undefined || step.name !== undefined || step.value !== undefined;
35
+ if (hasSelector && hasLocator) {
36
+ return { error: `job step ${action} must use either selector or semantic locator fields, not both.` };
37
+ }
38
+ if (hasSelector) {
39
+ if (action === "click") return { args: ["click", step.selector as string] };
40
+ const text = getRequiredJobString(step, "text", action);
41
+ if (text.error) return { error: text.error };
42
+ return { args: ["fill", step.selector as string, text.value as string] };
43
+ }
44
+ if (!hasLocator) {
45
+ return { error: `job step ${action} requires either a non-empty selector string or semantic locator fields.` };
46
+ }
47
+ const compiled = compileAgentBrowserSemanticAction({
48
+ action,
49
+ locator: step.locator,
50
+ name: step.name,
51
+ role: step.role,
52
+ text: step.text,
53
+ value: step.value,
54
+ });
55
+ if (compiled.error) return { error: compiled.error.replaceAll("semanticAction", `job step ${action}`) };
56
+ return { args: compiled.compiled?.args };
57
+ }
58
+
59
+ function getUnsupportedJobStepField(step: Record<string, unknown>, allowedFields: ReadonlySet<string>): string | undefined {
60
+ return Object.keys(step).find((field) => !allowedFields.has(field));
61
+ }
62
+
63
+ const JOB_TYPE_ALLOWED_FIELDS = new Set(["action", "delayMs", "press", "selector", "text"]);
64
+
65
+ function compileJobTypeSteps(step: Record<string, unknown>): { error?: string; steps?: CompiledAgentBrowserJobStep[] } {
66
+ const unsupportedField = getUnsupportedJobStepField(step, JOB_TYPE_ALLOWED_FIELDS);
67
+ if (unsupportedField) return { error: `job step type does not support ${unsupportedField}; supported fields are selector, text, delayMs, and press.` };
68
+ const text = getRequiredJobString(step, "text", "type");
69
+ if (text.error) return { error: text.error };
70
+ const selector = step.selector;
71
+ if (selector !== undefined && (typeof selector !== "string" || selector.trim().length === 0)) {
72
+ return { error: "job step type selector must be a non-empty string when provided." };
73
+ }
74
+ if (step.locator !== undefined || step.role !== undefined || step.name !== undefined || step.value !== undefined || step.values !== undefined) {
75
+ return { error: "job step type supports selector, text, delayMs, and press only; focus the target first or use click/fill semantic locator fields in a separate step." };
76
+ }
77
+ const delayMs = step.delayMs;
78
+ if (delayMs !== undefined && (typeof delayMs !== "number" || !Number.isInteger(delayMs) || delayMs <= 0)) {
79
+ return { error: "job step type delayMs must be a positive integer when provided." };
80
+ }
81
+ const press = step.press;
82
+ if (press !== undefined && (typeof press !== "string" || press.trim().length === 0)) {
83
+ return { error: "job step type press must be a non-empty key string when provided." };
84
+ }
85
+ const typedText = text.value as string;
86
+ const typedChars = Array.from(typedText);
87
+ if (typedChars.length === 0) return { error: "job step type requires non-empty text." };
88
+ if (delayMs !== undefined && typedChars.length > AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS) {
89
+ return { error: `job step type delayMs supports at most ${AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS} characters; split longer text into shorter calls or omit delayMs.` };
90
+ }
91
+ const compiledSteps: CompiledAgentBrowserJobStep[] = [];
92
+ if (delayMs === undefined) {
93
+ compiledSteps.push({ action: "type", args: typeof selector === "string" ? ["type", selector, typedText] : ["keyboard", "type", typedText] });
94
+ } else {
95
+ if (typeof selector === "string") compiledSteps.push({ action: "type", args: ["focus", selector], generatedFrom: "type.selector" });
96
+ for (const [index, char] of typedChars.entries()) {
97
+ compiledSteps.push({ action: "type", args: ["keyboard", "type", char], generatedFrom: "type.delayMs" });
98
+ if (index < typedChars.length - 1) compiledSteps.push({ action: "wait", args: ["wait", String(delayMs)], generatedFrom: "type.delayMs" });
99
+ }
100
+ }
101
+ if (typeof press === "string") compiledSteps.push({ action: "type", args: ["press", press], generatedFrom: "type.press" });
102
+ return { steps: compiledSteps };
103
+ }
104
+
30
105
  export function compileAgentBrowserJob(input: unknown): { compiled?: CompiledAgentBrowserJob; error?: string } {
31
106
  if (!isRecord(input)) {
32
107
  return { error: "job must be an object." };
33
108
  }
109
+ const rawFailFast = input.failFast;
110
+ if (rawFailFast !== undefined && typeof rawFailFast !== "boolean") {
111
+ return { error: "job.failFast must be a boolean when provided." };
112
+ }
113
+ const failFast = rawFailFast !== false;
34
114
  const rawSteps = input.steps;
35
115
  if (!Array.isArray(rawSteps) || rawSteps.length === 0) {
36
116
  return { error: "job.steps must be a non-empty array." };
@@ -46,20 +126,29 @@ export function compileAgentBrowserJob(input: unknown): { compiled?: CompiledAge
46
126
  }
47
127
  const jobAction = action as AgentBrowserJobStepAction;
48
128
  let args: string[];
129
+ let generatedFrom: string | undefined;
130
+ let extraSteps: CompiledAgentBrowserJobStep[] = [];
49
131
  if (jobAction === "open") {
50
132
  const result = getRequiredJobString(rawStep, "url", jobAction);
51
133
  if (result.error) return { error: `job.steps[${index}]: ${result.error}` };
52
134
  args = ["open", result.value as string];
53
- } else if (jobAction === "click") {
54
- const result = getRequiredJobString(rawStep, "selector", jobAction);
135
+ if (rawStep.loadState !== undefined) {
136
+ if (typeof rawStep.loadState !== "string" || !AGENT_BROWSER_QA_LOAD_STATES.includes(rawStep.loadState as AgentBrowserQaLoadState)) {
137
+ return { error: `job.steps[${index}].loadState must be one of: ${AGENT_BROWSER_QA_LOAD_STATES.join(", ")}.` };
138
+ }
139
+ extraSteps = [{ action: "wait", args: ["wait", "--load", rawStep.loadState], generatedFrom: "open.loadState" }];
140
+ }
141
+ } else if (jobAction === "click" || jobAction === "fill") {
142
+ const result = compileJobClickOrFillStep(rawStep, jobAction);
55
143
  if (result.error) return { error: `job.steps[${index}]: ${result.error}` };
56
- args = ["click", result.value as string];
57
- } else if (jobAction === "fill") {
58
- const selector = getRequiredJobString(rawStep, "selector", jobAction);
59
- if (selector.error) return { error: `job.steps[${index}]: ${selector.error}` };
60
- const text = getRequiredJobString(rawStep, "text", jobAction);
61
- if (text.error) return { error: `job.steps[${index}]: ${text.error}` };
62
- args = ["fill", selector.value as string, text.value as string];
144
+ args = result.args as string[];
145
+ } else if (jobAction === "type") {
146
+ const result = compileJobTypeSteps(rawStep);
147
+ if (result.error) return { error: `job.steps[${index}]: ${result.error}` };
148
+ const [firstStep, ...restSteps] = result.steps as CompiledAgentBrowserJobStep[];
149
+ args = firstStep.args;
150
+ generatedFrom = firstStep.generatedFrom;
151
+ extraSteps = restSteps;
63
152
  } else if (jobAction === "select") {
64
153
  const selector = getRequiredJobString(rawStep, "selector", jobAction);
65
154
  if (selector.error) return { error: `job.steps[${index}]: ${selector.error}` };
@@ -84,14 +173,16 @@ export function compileAgentBrowserJob(input: unknown): { compiled?: CompiledAge
84
173
  const result = getRequiredJobString(rawStep, "path", jobAction);
85
174
  if (result.error) return { error: `job.steps[${index}]: ${result.error}` };
86
175
  args = ["wait", "--download", result.value as string];
176
+ } else if (jobAction === "snapshot") {
177
+ args = ["snapshot", "-i"];
87
178
  } else {
88
179
  const result = getRequiredJobString(rawStep, "path", jobAction);
89
180
  if (result.error) return { error: `job.steps[${index}]: ${result.error}` };
90
181
  args = ["screenshot", result.value as string];
91
182
  }
92
- steps.push({ action: jobAction, args });
183
+ steps.push({ action: jobAction, args, generatedFrom }, ...extraSteps);
93
184
  }
94
- return { compiled: { args: ["batch"], stdin: JSON.stringify(steps.map((step) => step.args)), steps } };
185
+ return { compiled: { args: failFast ? ["batch", "--bail"] : ["batch"], failFast, stdin: JSON.stringify(steps.map((step) => step.args)), steps } };
95
186
  }
96
187
 
97
188
  export function isHttpOrHttpsUrl(url: string): boolean {
@@ -110,6 +201,8 @@ function describeQaChecksRun(checks: CompiledAgentBrowserQaPreset["checks"]): st
110
201
  if (checks.checkNetwork) parts.push("network");
111
202
  if (checks.checkConsole) parts.push("console");
112
203
  if (checks.checkErrors) parts.push("errors");
204
+ if (checks.diagnosticsResetAtStart) parts.push("diagnostics-reset");
205
+ else if (checks.checkNetwork || checks.checkConsole || checks.checkErrors) parts.push("attached-diagnostics-preserved");
113
206
  if (checks.screenshotPath) parts.push("screenshot");
114
207
  return parts.join(", ");
115
208
  }
@@ -145,6 +238,9 @@ export function buildQaCompactPassText(options: {
145
238
  const pageParts = [options.page?.title, options.page?.url].filter((part): part is string => typeof part === "string" && part.length > 0);
146
239
  if (pageParts.length > 0) lines.push(`Page: ${pageParts.join(" — ")}`);
147
240
  lines.push(`Checks run: ${describeQaChecksRun(options.checks)} (${options.batchStepCount} batch step${options.batchStepCount === 1 ? "" : "s"})`);
241
+ if (options.checks.attached && !options.checks.diagnosticsResetAtStart && (options.checks.checkNetwork || options.checks.checkConsole || options.checks.checkErrors)) {
242
+ lines.push("Attached diagnostics: existing upstream session console/network/error buffers were preserved; rows may include events from before qa.attached started.");
243
+ }
148
244
  if (options.checks.screenshotPath) {
149
245
  const verification = options.artifactVerification;
150
246
  lines.push(verification
@@ -155,7 +251,83 @@ export function buildQaCompactPassText(options: {
155
251
  return lines.join("\n");
156
252
  }
157
253
 
158
- export function analyzeQaPresetResults(data: unknown): AgentBrowserQaPresetAnalysis | undefined {
254
+ const QA_VISIBLE_TEXT_TIMEOUT_MS = 5_000;
255
+
256
+ function formatQaExpectedTextPreview(text: string): string {
257
+ return JSON.stringify(text.length > 80 ? `${text.slice(0, 77)}...` : text);
258
+ }
259
+
260
+ function buildQaVisibleTextPredicate(text: string): string {
261
+ return `(() => {
262
+ const expected = ${JSON.stringify(text)}.replace(/\\s+/g, " ").trim();
263
+ if (!expected) return false;
264
+ const root = document.body || document.documentElement;
265
+ if (!root) return false;
266
+ const skipTags = new Set(["SCRIPT", "STYLE", "NOSCRIPT", "SVG"]);
267
+ const normalize = (value) => String(value ?? "").replace(/\\s+/g, " ").trim();
268
+ const isVisibleElement = (element) => {
269
+ if (!(element instanceof HTMLElement)) return false;
270
+ if (skipTags.has(element.tagName)) return false;
271
+ const style = window.getComputedStyle(element);
272
+ if (style.display === "none" || style.visibility === "hidden" || Number(style.opacity) === 0) return false;
273
+ return element.getClientRects().length > 0;
274
+ };
275
+ const hasVisibleAncestors = (node) => {
276
+ for (let element = node.parentElement; element; element = element.parentElement) {
277
+ if (!isVisibleElement(element)) return false;
278
+ if (element === root) break;
279
+ }
280
+ return true;
281
+ };
282
+ const textWalker = document.createTreeWalker(root, NodeFilter.SHOW_TEXT);
283
+ let visitedText = 0;
284
+ for (let node = textWalker.nextNode(); node && visitedText < 6000; node = textWalker.nextNode(), visitedText += 1) {
285
+ if (!hasVisibleAncestors(node)) continue;
286
+ if (normalize(node.nodeValue).includes(expected)) return true;
287
+ }
288
+ const elementWalker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT);
289
+ let visitedElements = 0;
290
+ for (let node = elementWalker.nextNode(); node && visitedElements < 3000; node = elementWalker.nextNode(), visitedElements += 1) {
291
+ const element = node;
292
+ if (!isVisibleElement(element) || !("value" in element)) continue;
293
+ if (normalize(element.value).includes(expected)) return true;
294
+ }
295
+ return false;
296
+ })()`;
297
+ }
298
+
299
+ function qaVisibleTextWaitPassed(item: ReturnType<typeof getBatchResultItems>[number] | undefined, step: CompiledAgentBrowserJobStep): boolean | undefined {
300
+ if (step.args[0] !== "wait" || step.args[1] !== "--fn") return undefined;
301
+ if (!item || item.success === false) return false;
302
+ if (typeof item.result === "boolean") return item.result;
303
+ if (isRecord(item.result) && typeof item.result.result === "boolean") return item.result.result;
304
+ return true;
305
+ }
306
+
307
+ function extractQaTextAssertionResultText(item: ReturnType<typeof getBatchResultItems>[number] | undefined): string | undefined {
308
+ if (!item || item.success === false) return undefined;
309
+ const result = item.result;
310
+ if (typeof result === "string") return result;
311
+ if (!isRecord(result)) return undefined;
312
+ for (const key of ["result", "text", "value"] as const) {
313
+ const value = result[key];
314
+ if (typeof value === "string") return value;
315
+ }
316
+ return undefined;
317
+ }
318
+
319
+ export function analyzeQaPresetTimeout(compiled: CompiledAgentBrowserQaPreset): AgentBrowserQaPresetAnalysis | undefined {
320
+ if (compiled.checks.expectedText.length === 0) return undefined;
321
+ const failedChecks = compiled.checks.expectedText.map((text) => `expected text was not verified before timeout: ${formatQaExpectedTextPreview(text)}`);
322
+ return {
323
+ failedChecks,
324
+ passed: false,
325
+ summary: `QA preset failed: ${failedChecks.join("; ")}.`,
326
+ warnings: ["The wrapper timed out before expected-text evidence could be verified; inspect timeoutPartialProgress and retry with a narrower readiness condition if the page was still loading."],
327
+ };
328
+ }
329
+
330
+ export function analyzeQaPresetResults(data: unknown, compiled?: CompiledAgentBrowserQaPreset): AgentBrowserQaPresetAnalysis | undefined {
159
331
  const items = getBatchResultItems(data);
160
332
  if (items.length === 0) return undefined;
161
333
  const failedChecks: string[] = [];
@@ -179,6 +351,18 @@ export function analyzeQaPresetResults(data: unknown): AgentBrowserQaPresetAnaly
179
351
  if (networkFailures.benignCount > 0) warnings.push(`${networkFailures.benignCount} benign network request failure(s) ignored`);
180
352
  }
181
353
  }
354
+ if (compiled?.checks.expectedText.length) {
355
+ let expectedTextIndex = 0;
356
+ compiled.steps.forEach((step, index) => {
357
+ if (step.action !== "assertText") return;
358
+ const expected = compiled.checks.expectedText[expectedTextIndex++];
359
+ if (!expected) return;
360
+ const visibleTextPassed = qaVisibleTextWaitPassed(items[index], step);
361
+ if (visibleTextPassed === true) return;
362
+ const actual = extractQaTextAssertionResultText(items[index]);
363
+ if (!actual || !actual.includes(expected)) failedChecks.push(`expected text not found: ${formatQaExpectedTextPreview(expected)}`);
364
+ });
365
+ }
182
366
  const uniqueFailures = [...new Set(failedChecks)];
183
367
  const uniqueWarnings = [...new Set(warnings)];
184
368
  return {
@@ -234,18 +418,19 @@ export function compileAgentBrowserQaPreset(input: unknown): { compiled?: Compil
234
418
  if (rawLoadState !== undefined && (typeof rawLoadState !== "string" || !AGENT_BROWSER_QA_LOAD_STATES.includes(rawLoadState as AgentBrowserQaLoadState))) {
235
419
  return { error: `qa.loadState must be one of: ${AGENT_BROWSER_QA_LOAD_STATES.join(", ")}.` };
236
420
  }
237
- const checkConsole = input.checkConsole !== false;
238
- const checkErrors = input.checkErrors !== false;
239
- const checkNetwork = input.checkNetwork !== false;
421
+ const checkConsole = typeof input.checkConsole === "boolean" ? input.checkConsole : !attached;
422
+ const checkErrors = typeof input.checkErrors === "boolean" ? input.checkErrors : !attached;
423
+ const checkNetwork = typeof input.checkNetwork === "boolean" ? input.checkNetwork : !attached;
240
424
  const loadState = (rawLoadState as AgentBrowserQaLoadState | undefined) ?? "domcontentloaded";
425
+ const diagnosticsResetAtStart = !attached;
241
426
  const steps: CompiledAgentBrowserJobStep[] = [];
242
- if (checkNetwork) steps.push({ action: "wait", args: ["network", "requests", "--clear"] });
243
- if (checkConsole) steps.push({ action: "wait", args: ["console", "--clear"] });
244
- if (checkErrors) steps.push({ action: "wait", args: ["errors", "--clear"] });
427
+ if (diagnosticsResetAtStart && checkNetwork) steps.push({ action: "wait", args: ["network", "requests", "--clear"] });
428
+ if (diagnosticsResetAtStart && checkConsole) steps.push({ action: "wait", args: ["console", "--clear"] });
429
+ if (diagnosticsResetAtStart && checkErrors) steps.push({ action: "wait", args: ["errors", "--clear"] });
245
430
  if (!attached && normalizedUrl) steps.push({ action: "open", args: ["open", normalizedUrl] });
246
431
  steps.push({ action: "wait", args: ["wait", "--load", loadState] });
247
432
  for (const text of expectedText) {
248
- steps.push({ action: "assertText", args: ["wait", "--text", text] });
433
+ steps.push({ action: "assertText", args: ["wait", "--fn", buildQaVisibleTextPredicate(text), "--timeout", String(QA_VISIBLE_TEXT_TIMEOUT_MS)] });
249
434
  }
250
435
  if (typeof expectedSelector === "string") {
251
436
  steps.push({ action: "wait", args: ["wait", expectedSelector] });
@@ -256,8 +441,9 @@ export function compileAgentBrowserQaPreset(input: unknown): { compiled?: Compil
256
441
  if (typeof screenshotPath === "string") steps.push({ action: "screenshot", args: ["screenshot", screenshotPath] });
257
442
  return {
258
443
  compiled: {
259
- args: ["batch"],
260
- checks: { attached, checkConsole, checkErrors, checkNetwork, expectedSelector, expectedText, loadState, screenshotPath, url: normalizedUrl },
444
+ args: ["batch", "--bail"],
445
+ checks: { attached, checkConsole, checkErrors, checkNetwork, diagnosticsResetAtStart, expectedSelector, expectedText, loadState, screenshotPath, url: normalizedUrl },
446
+ failFast: true,
261
447
  stdin: JSON.stringify(steps.map((step) => step.args)),
262
448
  steps,
263
449
  },
@@ -4,8 +4,8 @@
4
4
  * Scope: Schema-only; behavioral validation lives in the mode compilers.
5
5
  */
6
6
 
7
- import { StringEnum } from "@earendil-works/pi-ai";
8
- import { Type } from "typebox";
7
+ import { JsonSchema, type JsonSchemaBuilder } from "../json-schema.js";
8
+ import { StringEnum as localStringEnum, type StringEnumBuilder } from "../string-enum-schema.js";
9
9
 
10
10
  import {
11
11
  ELECTRON_DISCOVERY_DEFAULT_MAX_RESULTS,
@@ -15,6 +15,7 @@ import {
15
15
  AGENT_BROWSER_ELECTRON_HANDOFFS,
16
16
  AGENT_BROWSER_ELECTRON_TARGET_TYPES,
17
17
  AGENT_BROWSER_JOB_STEP_ACTIONS,
18
+ AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS,
18
19
  AGENT_BROWSER_QA_LOAD_STATES,
19
20
  AGENT_BROWSER_SEMANTIC_ACTIONS,
20
21
  AGENT_BROWSER_SEMANTIC_LOCATORS,
@@ -22,7 +23,11 @@ import {
22
23
  SOURCE_LOOKUP_MAX_WORKSPACE_FILES,
23
24
  } from "./types.js";
24
25
 
25
- export const AGENT_BROWSER_PARAMS = Type.Object({
26
+ export function createAgentBrowserParamsSchema(
27
+ Type: JsonSchemaBuilder = JsonSchema,
28
+ StringEnum: StringEnumBuilder = localStringEnum,
29
+ ) {
30
+ return Type.Object({
26
31
 
27
32
  args: Type.Optional(
28
33
  Type.Array(Type.String({ description: "Exact agent-browser CLI arguments, excluding the binary name. Do not pass --json; the wrapper injects it. First-call recipe: open → snapshot -i → click/fill @eN → snapshot -i." }), {
@@ -33,14 +38,14 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
33
38
  semanticAction: Type.Optional(
34
39
  Type.Object({
35
40
  action: StringEnum(AGENT_BROWSER_SEMANTIC_ACTIONS, {
36
- description: "Intent action to compile to an existing agent-browser find command, or to upstream select when action=select.",
41
+ description: "Intent action to compile to an existing agent-browser find command, direct selector/ref command, or upstream select when action=select.",
37
42
  }),
38
43
  locator: Type.Optional(StringEnum(AGENT_BROWSER_SEMANTIC_LOCATORS, {
39
44
  description: "Upstream find locator family to use for check/click/fill actions.",
40
45
  })),
41
46
  value: Type.Optional(Type.String({ description: "Locator value for find actions, or a single option value for select actions. For locator=role, role may be supplied instead." })),
42
47
  values: Type.Optional(Type.Array(Type.String({ description: "Option value for select actions." }), { description: "One or more option values for select actions.", minItems: 1 })),
43
- selector: Type.Optional(Type.String({ description: "Selector or @ref for select actions; compiled to select <selector> <value...>." })),
48
+ selector: Type.Optional(Type.String({ description: "Selector or @ref for direct click/check/fill actions, or for select actions compiled to select <selector> <value...>." })),
44
49
  text: Type.Optional(Type.String({ description: "Text/value argument for fill actions." })),
45
50
  role: Type.Optional(Type.String({ description: "Role locator value for locator=role. May be used instead of value; when both are set they must match." })),
46
51
  name: Type.Optional(Type.String({ description: "Accessible name filter for locator=role; compiles to --name <name>." })),
@@ -54,9 +59,9 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
54
59
  expectedText: Type.Optional(Type.Union([Type.String(), Type.Array(Type.String())], { description: "Text that must appear on the page." })),
55
60
  expectedSelector: Type.Optional(Type.String({ description: "Selector or @ref that must appear on the page." })),
56
61
  screenshotPath: Type.Optional(Type.String({ description: "Optional evidence screenshot path captured at the end of the QA preset." })),
57
- checkConsole: Type.Optional(Type.Boolean({ description: "Whether to fail on console error messages. Defaults to true." })),
58
- checkErrors: Type.Optional(Type.Boolean({ description: "Whether to fail on page errors. Defaults to true." })),
59
- checkNetwork: Type.Optional(Type.Boolean({ description: "Whether to inspect network requests and fail on actionable request failures; benign icon misses warn. Defaults to true." })),
62
+ checkConsole: Type.Optional(Type.Boolean({ description: "Whether to inspect console messages and fail on console errors. Defaults to false for qa.attached because upstream buffers may predate the check." })),
63
+ checkErrors: Type.Optional(Type.Boolean({ description: "Whether to inspect page errors and fail when errors are present. Defaults to false for qa.attached because upstream buffers may predate the check." })),
64
+ checkNetwork: Type.Optional(Type.Boolean({ description: "Whether to inspect network requests and fail on actionable request failures; benign icon misses warn. Defaults to false for qa.attached because upstream buffers may predate the check." })),
60
65
  loadState: Type.Optional(StringEnum(AGENT_BROWSER_QA_LOAD_STATES, { description: "Page readiness state for the QA preset before assertions and diagnostics. Defaults to domcontentloaded; use networkidle only for pages without long-lived background requests." })),
61
66
  }, { additionalProperties: false }),
62
67
  Type.Object({
@@ -160,17 +165,24 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
160
165
  ),
161
166
  job: Type.Optional(
162
167
  Type.Object({
168
+ failFast: Type.Optional(Type.Boolean({ description: "Stop the compiled batch on the first failed job step. Defaults to true so later mutating steps do not run after setup/assertion failures." })),
163
169
  steps: Type.Array(
164
170
  Type.Object({
165
171
  action: StringEnum(AGENT_BROWSER_JOB_STEP_ACTIONS, {
166
172
  description: "Constrained one-call job step compiled to existing upstream batch commands.",
167
173
  }),
168
174
  url: Type.Optional(Type.String({ description: "URL for open steps, or URL pattern for assertUrl steps." })),
169
- selector: Type.Optional(Type.String({ description: "Selector or @ref for click/fill/select-like steps." })),
175
+ loadState: Type.Optional(StringEnum(AGENT_BROWSER_QA_LOAD_STATES, { description: "Optional readiness wait to insert immediately after an open step; use domcontentloaded/load/networkidle when the next job step needs page hydration evidence before clicking or reading." })),
176
+ selector: Type.Optional(Type.String({ description: "Selector or @ref for click/fill/type/select-like steps; omit when using semantic locator fields on click/fill steps." })),
177
+ locator: Type.Optional(StringEnum(AGENT_BROWSER_SEMANTIC_LOCATORS, { description: "Semantic locator for click/fill steps when selector is omitted." })),
178
+ role: Type.Optional(Type.String({ description: "Role locator value for click/fill steps when locator is role." })),
179
+ name: Type.Optional(Type.String({ description: "Accessible name filter for role locator click/fill steps." })),
170
180
  text: Type.Optional(Type.String({ description: "Text for fill steps or visible text for assertText steps." })),
171
- value: Type.Optional(Type.String({ description: "Single option value for select steps." })),
181
+ value: Type.Optional(Type.String({ description: "Single option value for select steps, or locator value for semantic click/fill steps." })),
172
182
  values: Type.Optional(Type.Array(Type.String({ description: "Option value for select steps." }), { description: "One or more option values for select steps.", minItems: 1 })),
173
183
  path: Type.Optional(Type.String({ description: "Artifact/download path for waitForDownload or screenshot steps." })),
184
+ delayMs: Type.Optional(Type.Integer({ description: `Optional per-character delay for type steps; when set, the job compiles to focus/keyboard type/wait steps instead of instant fill-like typing, capped at ${AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS} characters.`, minimum: 1 })),
185
+ press: Type.Optional(Type.String({ description: "Optional key to press after a type step, for example Enter." })),
174
186
  milliseconds: Type.Optional(Type.Number({ description: "Milliseconds for wait steps." })),
175
187
  }, { additionalProperties: false }),
176
188
  { minItems: 1 },
@@ -178,6 +190,8 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
178
190
  }, { additionalProperties: false }),
179
191
  ),
180
192
  stdin: Type.Optional(Type.String({ description: "Optional raw stdin content; only supported for batch, eval --stdin, auth save --password-stdin, and is generated internally by job, qa, sourceLookup, or networkSourceLookup mode. Do not use with electron mode." })),
193
+ outputPath: Type.Optional(Type.String({ description: "Optional workspace-relative or absolute file path that receives the model-facing command data/result after the browser command completes. Useful for eval/get/snapshot captures that should become durable local artifacts.", minLength: 1 })),
194
+ timeoutMs: Type.Optional(Type.Integer({ description: "Optional per-call wrapper subprocess watchdog in milliseconds for browser CLI args/job/qa/source lookup calls. Use for long opens or large output captures; fixed wait steps still must stay below the upstream IPC wait budget. Electron actions use electron.timeoutMs instead.", minimum: 1 })),
181
195
  sessionMode: Type.Optional(
182
196
  StringEnum(["auto", "fresh"] as const, {
183
197
  description:
@@ -185,4 +199,7 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
185
199
  default: DEFAULT_SESSION_MODE,
186
200
  }),
187
201
  ),
188
- }, { additionalProperties: false });
202
+ }, { additionalProperties: false });
203
+ }
204
+
205
+ export const AGENT_BROWSER_PARAMS = createAgentBrowserParamsSchema();
@@ -62,8 +62,28 @@ export function compileAgentBrowserSemanticAction(input: unknown): { compiled?:
62
62
  const args = typeof session === "string" ? ["--session", session, "select", selector, ...(selectedValues.values as string[])] : ["select", selector, ...(selectedValues.values as string[])];
63
63
  return { compiled: { action: "select", selector, values: selectedValues.values, args } };
64
64
  }
65
- if (selector !== undefined || values !== undefined) {
66
- return { error: "semanticAction.selector and values are only supported for select actions." };
65
+ if (values !== undefined) {
66
+ return { error: "semanticAction.values is only supported for select actions." };
67
+ }
68
+ if (selector !== undefined) {
69
+ if (typeof selector !== "string" || selector.trim().length === 0) {
70
+ return { error: "semanticAction.selector must be a non-empty string when provided." };
71
+ }
72
+ if (locator !== undefined || value !== undefined || role !== undefined || name !== undefined) {
73
+ return { error: "semanticAction.selector cannot be combined with locator, value, role, or name; use selector for a direct click/check/fill target or locator fields for find-based actions." };
74
+ }
75
+ if (text !== undefined && typeof text !== "string") {
76
+ return { error: "semanticAction.text must be a string when provided." };
77
+ }
78
+ if (action === "fill" && (typeof text !== "string" || text.length === 0)) {
79
+ return { error: `semanticAction.text is required for ${action}.` };
80
+ }
81
+ if (action !== "fill" && text !== undefined) {
82
+ return { error: "semanticAction.text is only supported for fill actions." };
83
+ }
84
+ const directArgs = typeof session === "string" ? ["--session", session, action, selector] : [action, selector];
85
+ if (action === "fill") directArgs.push(text as string);
86
+ return { compiled: { action: action as AgentBrowserSemanticActionName, selector, args: directArgs } };
67
87
  }
68
88
  if (typeof locator !== "string" || !AGENT_BROWSER_SEMANTIC_LOCATORS.includes(locator as AgentBrowserSemanticLocator)) {
69
89
  return { error: `semanticAction.locator must be one of: ${AGENT_BROWSER_SEMANTIC_LOCATORS.join(", ")}.` };
@@ -10,7 +10,8 @@ export const AGENT_BROWSER_SEMANTIC_ACTIONS = ["check", "click", "fill", "select
10
10
 
11
11
  export const AGENT_BROWSER_SEMANTIC_LOCATORS = ["alt", "label", "placeholder", "role", "testid", "text", "title"] as const;
12
12
 
13
- export const AGENT_BROWSER_JOB_STEP_ACTIONS = ["open", "click", "fill", "select", "wait", "assertText", "assertUrl", "waitForDownload", "screenshot"] as const;
13
+ export const AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS = 200;
14
+ export const AGENT_BROWSER_JOB_STEP_ACTIONS = ["open", "click", "fill", "type", "select", "wait", "assertText", "assertUrl", "waitForDownload", "screenshot", "snapshot"] as const;
14
15
 
15
16
  export const AGENT_BROWSER_QA_LOAD_STATES = ["domcontentloaded", "load", "networkidle"] as const;
16
17
 
@@ -102,10 +103,12 @@ export interface CompiledAgentBrowserSemanticAction {
102
103
  export interface CompiledAgentBrowserJobStep {
103
104
  action: AgentBrowserJobStepAction;
104
105
  args: string[];
106
+ generatedFrom?: string;
105
107
  }
106
108
 
107
109
  export interface CompiledAgentBrowserJob {
108
110
  args: string[];
111
+ failFast: boolean;
109
112
  stdin: string;
110
113
  steps: CompiledAgentBrowserJobStep[];
111
114
  }
@@ -115,6 +118,7 @@ export interface CompiledAgentBrowserQaPreset extends CompiledAgentBrowserJob {
115
118
  checkConsole: boolean;
116
119
  checkErrors: boolean;
117
120
  checkNetwork: boolean;
121
+ diagnosticsResetAtStart: boolean;
118
122
  loadState: AgentBrowserQaLoadState;
119
123
  expectedText: string[];
120
124
  expectedSelector?: string;
@@ -7,6 +7,7 @@
7
7
  export { AGENT_BROWSER_PARAMS } from "./input-modes/params.js";
8
8
  export {
9
9
  analyzeQaPresetResults,
10
+ analyzeQaPresetTimeout,
10
11
  buildQaCompactPassText,
11
12
  compileAgentBrowserJob,
12
13
  compileAgentBrowserQaPreset,
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Purpose: Build the small JSON Schema subset used by Pi tool schemas without importing TypeBox at runtime.
3
+ * Responsibilities: Preserve plain JSON Schema objects Pi consumes while keeping extension startup cheap.
4
+ * Scope: Schema construction only; runtime validation still belongs to Pi and the tool input compilers.
5
+ */
6
+
7
+ import type { TSchema, TSchemaOptions, TUnsafe } from "typebox";
8
+
9
+ const OPTIONAL_SCHEMA = Symbol("pi-agent-browser-optional-schema");
10
+
11
+ type SchemaObject = TSchema & { [OPTIONAL_SCHEMA]?: true };
12
+ type SchemaProperties = Record<string, TSchema>;
13
+
14
+ function withOptions(schema: Record<string, unknown>, options?: TSchemaOptions): TSchema {
15
+ return { ...schema, ...(options ?? {}) } as TSchema;
16
+ }
17
+
18
+ function literalType(value: unknown): "boolean" | "number" | "string" | undefined {
19
+ const valueType = typeof value;
20
+ return valueType === "string" || valueType === "number" || valueType === "boolean" ? valueType : undefined;
21
+ }
22
+
23
+ function propertySchema(schema: TSchema): TSchema {
24
+ const clone = { ...(schema as SchemaObject & Record<PropertyKey, unknown>) };
25
+ delete clone[OPTIONAL_SCHEMA];
26
+ return clone as TSchema;
27
+ }
28
+
29
+ export const JsonSchema = {
30
+ Array(items: TSchema, options?: TSchemaOptions): TSchema {
31
+ return withOptions({ type: "array", items }, options);
32
+ },
33
+ Boolean(options?: TSchemaOptions): TSchema {
34
+ return withOptions({ type: "boolean" }, options);
35
+ },
36
+ Integer(options?: TSchemaOptions): TSchema {
37
+ return withOptions({ type: "integer" }, options);
38
+ },
39
+ Literal(value: unknown, options?: TSchemaOptions): TSchema {
40
+ const type = literalType(value);
41
+ return withOptions(type ? { type, const: value } : { const: value }, options);
42
+ },
43
+ Number(options?: TSchemaOptions): TSchema {
44
+ return withOptions({ type: "number" }, options);
45
+ },
46
+ Object(properties: SchemaProperties, options?: TSchemaOptions): TSchema {
47
+ const required = globalThis.Object.entries(properties)
48
+ .filter(([, schema]) => (schema as SchemaObject)[OPTIONAL_SCHEMA] !== true)
49
+ .map(([key]) => key);
50
+ return withOptions({
51
+ type: "object",
52
+ properties: globalThis.Object.fromEntries(
53
+ globalThis.Object.entries(properties).map(([key, schema]) => [key, propertySchema(schema)]),
54
+ ),
55
+ ...(required.length > 0 ? { required } : {}),
56
+ }, options);
57
+ },
58
+ Optional(schema: TSchema): TSchema {
59
+ return { ...(schema as SchemaObject), [OPTIONAL_SCHEMA]: true } as TSchema;
60
+ },
61
+ String(options?: TSchemaOptions): TSchema {
62
+ return withOptions({ type: "string" }, options);
63
+ },
64
+ Union(types: TSchema[], options?: TSchemaOptions): TSchema {
65
+ return withOptions({ anyOf: types }, options);
66
+ },
67
+ Unsafe<Value>(schema: TSchema): TUnsafe<Value> {
68
+ return schema as TUnsafe<Value>;
69
+ },
70
+ };
71
+
72
+ export type JsonSchemaBuilder = typeof JsonSchema;
73
+ export type { TSchema, TSchemaOptions, TUnsafe };