pi-agent-browser-native 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * Purpose: Register the native agent_browser tool for pi so agents can invoke agent-browser without going through bash.
3
- * Responsibilities: Define the tool schema, inject thin wrapper behavior around the upstream CLI, manage implicit session convenience, and return pi-friendly content/details.
3
+ * Responsibilities: Define the tool schema, inject thin wrapper behavior around the upstream CLI, manage extension-owned browser session convenience, and return pi-friendly content/details.
4
4
  * Scope: Native tool registration and orchestration only; the wrapper intentionally stays close to the upstream agent-browser CLI.
5
5
  * Usage: Loaded by pi through the package manifest in this package, or explicitly via `pi --no-extensions -e .` during local checkout development.
6
6
  * Invariants/Assumptions: agent-browser is installed separately on PATH, the wrapper targets the current locally installed upstream version only, and no backward-compatibility shims are provided.
@@ -17,12 +17,18 @@ import {
17
17
  buildExecutionPlan,
18
18
  buildPromptPolicy,
19
19
  createEphemeralSessionSeed,
20
+ createFreshSessionName,
20
21
  createImplicitSessionName,
21
22
  getImplicitSessionCloseTimeoutMs,
22
23
  getImplicitSessionIdleTimeoutMs,
23
24
  getLatestUserPrompt,
24
25
  hasUsableBraveApiKey,
25
- resolveImplicitSessionActiveState,
26
+ redactInvocationArgs,
27
+ redactSensitiveText,
28
+ redactSensitiveValue,
29
+ restoreManagedSessionStateFromBranch,
30
+ resolveManagedSessionState,
31
+ shouldAppendBrowserSystemPrompt,
26
32
  validateToolArgs,
27
33
  } from "./lib/runtime.js";
28
34
  import { cleanupSecureTempArtifacts } from "./lib/temp.js";
@@ -38,7 +44,7 @@ const AGENT_BROWSER_PARAMS = Type.Object({
38
44
  sessionMode: Type.Optional(
39
45
  Type.Union([Type.Literal("auto"), Type.Literal("fresh")], {
40
46
  description:
41
- "Session handling mode. `auto` reuses the implicit pi-scoped session when possible. `fresh` skips the implicit session so startup-scoped flags like --profile, --session-name, or --cdp can launch a fresh upstream session.",
47
+ "Session handling mode. `auto` reuses the extension-managed pi-scoped session when possible. `fresh` switches that managed session to a fresh upstream launch so startup-scoped flags like --profile, --session-name, or --cdp apply and later auto calls follow the new browser.",
42
48
  default: DEFAULT_SESSION_MODE,
43
49
  }),
44
50
  ),
@@ -46,7 +52,7 @@ const AGENT_BROWSER_PARAMS = Type.Object({
46
52
  const PROJECT_RULE_PROMPT =
47
53
  "Project rule: when browser automation is needed, prefer the native `agent_browser` tool. Do not run direct `agent-browser` bash commands unless the user explicitly asks for a bash-oriented workflow or browser-integration debugging.";
48
54
  const QUICK_START_GUIDELINES = [
49
- "Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch and eval --stdin; sessionMode=fresh starts a fresh upstream launch when you need new --profile, --session-name, or --cdp state.",
55
+ "Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch and eval --stdin; sessionMode=fresh switches the extension-managed session to a fresh upstream launch when you need new --profile, --session-name, or --cdp state.",
50
56
  "Common first calls: { args: [\"open\", \"https://example.com\"] } then { args: [\"snapshot\", \"-i\"] }; after navigation, use { args: [\"click\", \"@e2\"] } then { args: [\"snapshot\", \"-i\"] }.",
51
57
  "Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, and { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }.",
52
58
  ] as const;
@@ -57,7 +63,7 @@ const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
57
63
  "For authenticated or user-specific content like feeds, inboxes, dashboards, and accounts, prefer --profile Default on the first browser call and let the implicit session carry continuity. Use --auto-connect only if profile-based reuse is unavailable or the task is specifically about attaching to a running debug-enabled browser.",
58
64
  "Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.",
59
65
  "When using --profile, --session-name, or --cdp, put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.",
60
- "If you already used the implicit session and now need startup-scoped flags like --profile, --session-name, or --cdp, retry with sessionMode set to fresh or pass an explicit --session for the new launch.",
66
+ "If you already used the implicit session and now need startup-scoped flags like --profile, --session-name, or --cdp, retry with sessionMode set to fresh or pass an explicit --session for the new launch. After a successful unnamed fresh launch, later auto calls follow that new session.",
61
67
  "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <n> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load, --url, --fn, or --text.",
62
68
  "For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
63
69
  "For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
@@ -71,8 +77,8 @@ const TOOL_PROMPT_GUIDELINES_SUFFIX = [
71
77
  "Do not fall back to osascript, AppleScript, or generic browser-driving bash commands when this tool can do the job.",
72
78
  "Pass exact agent-browser CLI arguments in args, excluding the binary name.",
73
79
  "Use stdin for commands like eval --stdin and batch instead of shell heredocs.",
74
- "Let the implicit session handle the common path unless you explicitly need a fresh launch for upstream flags like --profile, --session-name, or --cdp.",
75
- "Use sessionMode=fresh when switching from an existing implicit session to a new profile/debug launch without inventing a fixed explicit session name.",
80
+ "Let the extension-managed session handle the common path unless you explicitly need a fresh launch for upstream flags like --profile, --session-name, or --cdp.",
81
+ "Use sessionMode=fresh when switching from an existing implicit session to a new profile/debug launch without inventing a fixed explicit session name; later auto calls will follow that new session.",
76
82
  ] as const;
77
83
 
78
84
  function buildMissingBinaryMessage(): string {
@@ -90,16 +96,19 @@ function buildInvocationPreview(effectiveArgs: string[]): string {
90
96
  return preview.length > 120 ? `${preview.slice(0, 117)}...` : preview;
91
97
  }
92
98
 
99
+ const AGENT_BROWSER_BASH_PREFIX = String.raw`(?:env(?:\s+[A-Za-z_][A-Za-z0-9_]*=[^\s;&|]+)*\s+)?(?:(?:npx|bunx)(?:\s+-[^\s;&|]+|\s+--[^\s;&|]+(?:=[^\s;&|]+)?)*\s+|(?:pnpm|yarn)\s+dlx(?:\s+-[^\s;&|]+|\s+--[^\s;&|]+(?:=[^\s;&|]+)?)*\s+)?`;
100
+ const AGENT_BROWSER_BASH_EXECUTABLE = String.raw`(?:[.~]|\.\.?|\/)?(?:[^\s;&|]+\/)?agent-browser`;
101
+ const DIRECT_AGENT_BROWSER_BASH_PATTERN = new RegExp(
102
+ String.raw`(^|[\s;&|])${AGENT_BROWSER_BASH_PREFIX}${AGENT_BROWSER_BASH_EXECUTABLE}(?=\s|$)`,
103
+ );
104
+ const HARMLESS_AGENT_BROWSER_INSPECTION_PATTERN = /(command\s+-v|which|type\s+-P)\s+agent-browser\b/;
105
+
93
106
  function looksLikeDirectAgentBrowserBash(command: string): boolean {
94
- return /(^|[\s;&|])(npx\s+)?agent-browser(\s|$)/.test(command);
107
+ return DIRECT_AGENT_BROWSER_BASH_PATTERN.test(command);
95
108
  }
96
109
 
97
110
  function isHarmlessAgentBrowserInspectionCommand(command: string): boolean {
98
- return /(command\s+-v|which)\s+agent-browser\b/.test(command) || /(^|\s)agent-browser\s+--(help|version)\b/.test(command);
99
- }
100
-
101
- function isPlainTextInspectionArgs(args: string[]): boolean {
102
- return args.includes("--help") || args.includes("-h") || args.includes("--version") || args.includes("-V");
111
+ return HARMLESS_AGENT_BROWSER_INSPECTION_PATTERN.test(command);
103
112
  }
104
113
 
105
114
  const NAVIGATION_SUMMARY_COMMANDS = new Set(["back", "click", "dblclick", "forward", "reload"]);
@@ -187,18 +196,6 @@ function buildSharedBrowserPlaybookGuidelines(hasBraveApiKey: boolean): string[]
187
196
  ];
188
197
  }
189
198
 
190
- function buildBrowserSystemPromptAppendix(hasBraveApiKey: boolean): string {
191
- return [
192
- PROJECT_RULE_PROMPT,
193
- "",
194
- "Quick start:",
195
- ...QUICK_START_GUIDELINES.map((guideline) => `- ${guideline}`),
196
- "",
197
- "Browser operating playbook:",
198
- ...buildSharedBrowserPlaybookGuidelines(hasBraveApiKey).map((guideline) => `- ${guideline}`),
199
- ].join("\n");
200
- }
201
-
202
199
  function buildToolPromptGuidelines(hasBraveApiKey: boolean): string[] {
203
200
  return [
204
201
  ...TOOL_PROMPT_GUIDELINES_PREFIX,
@@ -208,44 +205,85 @@ function buildToolPromptGuidelines(hasBraveApiKey: boolean): string[] {
208
205
  ];
209
206
  }
210
207
 
208
+ function buildSessionDetailFields(sessionName: string | undefined, usedImplicitSession: boolean): Record<string, unknown> {
209
+ return sessionName ? { sessionName, usedImplicitSession } : {};
210
+ }
211
+
212
+ function redactRecoveryHint(recoveryHint: {
213
+ exampleArgs: string[];
214
+ exampleParams: { args: string[]; sessionMode: "fresh" };
215
+ reason: string;
216
+ recommendedSessionMode: "fresh";
217
+ } | undefined): typeof recoveryHint {
218
+ if (!recoveryHint) {
219
+ return undefined;
220
+ }
221
+ const exampleArgs = redactInvocationArgs(recoveryHint.exampleArgs);
222
+ return {
223
+ ...recoveryHint,
224
+ exampleArgs,
225
+ exampleParams: {
226
+ ...recoveryHint.exampleParams,
227
+ args: exampleArgs,
228
+ },
229
+ };
230
+ }
231
+
232
+ async function closeManagedSession(options: { cwd: string; sessionName: string; timeoutMs: number }): Promise<void> {
233
+ const controller = new AbortController();
234
+ const timer = setTimeout(() => controller.abort(), options.timeoutMs);
235
+ try {
236
+ await runAgentBrowserProcess({
237
+ args: ["--session", options.sessionName, "close"],
238
+ cwd: options.cwd,
239
+ signal: controller.signal,
240
+ });
241
+ } catch {
242
+ // Best-effort cleanup only.
243
+ } finally {
244
+ clearTimeout(timer);
245
+ }
246
+ }
247
+
211
248
  export default function agentBrowserExtension(pi: ExtensionAPI) {
212
249
  const ephemeralSessionSeed = createEphemeralSessionSeed();
213
250
  const hasBraveApiKey = hasUsableBraveApiKey();
214
- const browserSystemPromptAppendix = buildBrowserSystemPromptAppendix(hasBraveApiKey);
215
251
  const toolPromptGuidelines = buildToolPromptGuidelines(hasBraveApiKey);
216
252
  const implicitSessionIdleTimeoutMs = getImplicitSessionIdleTimeoutMs();
217
253
  const implicitSessionCloseTimeoutMs = getImplicitSessionCloseTimeoutMs();
218
- let implicitSessionActive = false;
219
- let implicitSessionName = createImplicitSessionName(undefined, process.cwd(), ephemeralSessionSeed);
220
- let implicitSessionCwd = process.cwd();
254
+ let managedSessionActive = false;
255
+ let managedSessionBaseName = createImplicitSessionName(undefined, process.cwd(), ephemeralSessionSeed);
256
+ let managedSessionName = managedSessionBaseName;
257
+ let managedSessionCwd = process.cwd();
258
+ let freshSessionOrdinal = 0;
221
259
 
222
260
  pi.on("session_start", async (_event, ctx) => {
223
- implicitSessionActive = false;
224
- implicitSessionName = createImplicitSessionName(ctx.sessionManager.getSessionId(), ctx.cwd, ephemeralSessionSeed);
225
- implicitSessionCwd = ctx.cwd;
261
+ managedSessionBaseName = createImplicitSessionName(ctx.sessionManager.getSessionId(), ctx.cwd, ephemeralSessionSeed);
262
+ const restoredState = restoreManagedSessionStateFromBranch(ctx.sessionManager.getBranch(), managedSessionBaseName);
263
+ managedSessionActive = restoredState.active;
264
+ managedSessionName = restoredState.sessionName;
265
+ managedSessionCwd = ctx.cwd;
266
+ freshSessionOrdinal = restoredState.freshSessionOrdinal;
226
267
  });
227
268
 
228
269
  pi.on("session_shutdown", async () => {
229
- implicitSessionActive = false;
230
- const controller = new AbortController();
231
- const timer = setTimeout(() => controller.abort(), implicitSessionCloseTimeoutMs);
232
- try {
233
- await runAgentBrowserProcess({
234
- args: ["--session", implicitSessionName, "close"],
235
- cwd: implicitSessionCwd,
236
- signal: controller.signal,
270
+ if (managedSessionActive) {
271
+ await closeManagedSession({
272
+ cwd: managedSessionCwd,
273
+ sessionName: managedSessionName,
274
+ timeoutMs: implicitSessionCloseTimeoutMs,
237
275
  });
238
- } catch {
239
- // Best-effort cleanup only.
240
- } finally {
241
- clearTimeout(timer);
242
- await cleanupSecureTempArtifacts();
243
276
  }
277
+ managedSessionActive = false;
278
+ await cleanupSecureTempArtifacts();
244
279
  });
245
280
 
246
281
  pi.on("before_agent_start", async (event) => {
282
+ if (!shouldAppendBrowserSystemPrompt(event.prompt)) {
283
+ return undefined;
284
+ }
247
285
  return {
248
- systemPrompt: `${event.systemPrompt}\n\n${browserSystemPromptAppendix}`,
286
+ systemPrompt: `${event.systemPrompt}\n\n${PROJECT_RULE_PROMPT}`,
249
287
  };
250
288
  });
251
289
 
@@ -274,29 +312,38 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
274
312
  promptGuidelines: toolPromptGuidelines,
275
313
  parameters: AGENT_BROWSER_PARAMS,
276
314
  async execute(_toolCallId, params, signal, onUpdate, ctx) {
315
+ const redactedArgs = redactInvocationArgs(params.args);
277
316
  const validationError = validateToolArgs(params.args);
278
317
  if (validationError) {
279
318
  return {
280
319
  content: [{ type: "text", text: validationError }],
281
- details: { args: params.args, validationError },
320
+ details: { args: redactedArgs, validationError },
282
321
  isError: true,
283
322
  };
284
323
  }
285
324
 
286
325
  const sessionMode = params.sessionMode ?? DEFAULT_SESSION_MODE;
326
+ const freshSessionName = createFreshSessionName(managedSessionBaseName, ephemeralSessionSeed, freshSessionOrdinal + 1);
287
327
  const executionPlan = buildExecutionPlan(params.args, {
288
- implicitSessionActive,
289
- implicitSessionName,
328
+ freshSessionName,
329
+ managedSessionActive,
330
+ managedSessionName,
290
331
  sessionMode,
291
332
  });
333
+ const redactedEffectiveArgs = redactInvocationArgs(executionPlan.effectiveArgs);
334
+ const redactedRecoveryHint = redactRecoveryHint(executionPlan.recoveryHint);
335
+ if (executionPlan.managedSessionName === freshSessionName) {
336
+ freshSessionOrdinal += 1;
337
+ }
292
338
 
293
339
  if (executionPlan.validationError) {
294
340
  return {
295
341
  content: [{ type: "text", text: executionPlan.validationError }],
296
342
  details: {
297
- args: params.args,
343
+ args: redactedArgs,
344
+ invalidValueFlag: executionPlan.invalidValueFlag,
298
345
  sessionMode,
299
- sessionRecoveryHint: executionPlan.recoveryHint,
346
+ sessionRecoveryHint: redactedRecoveryHint,
300
347
  startupScopedFlags: executionPlan.startupScopedFlags,
301
348
  validationError: executionPlan.validationError,
302
349
  },
@@ -305,21 +352,18 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
305
352
  }
306
353
 
307
354
  onUpdate?.({
308
- content: [{ type: "text", text: `Running agent-browser ${buildInvocationPreview(executionPlan.effectiveArgs)}` }],
355
+ content: [{ type: "text", text: `Running agent-browser ${buildInvocationPreview(redactedEffectiveArgs)}` }],
309
356
  details: {
310
- effectiveArgs: executionPlan.effectiveArgs,
357
+ effectiveArgs: redactedEffectiveArgs,
311
358
  sessionMode,
312
- sessionName: executionPlan.sessionName,
313
- usedImplicitSession: executionPlan.usedImplicitSession,
359
+ ...buildSessionDetailFields(executionPlan.sessionName, executionPlan.usedImplicitSession),
314
360
  },
315
361
  });
316
362
 
317
363
  const processResult = await runAgentBrowserProcess({
318
364
  args: executionPlan.effectiveArgs,
319
365
  cwd: ctx.cwd,
320
- env: executionPlan.usedImplicitSession
321
- ? { AGENT_BROWSER_IDLE_TIMEOUT_MS: implicitSessionIdleTimeoutMs }
322
- : undefined,
366
+ env: executionPlan.managedSessionName ? { AGENT_BROWSER_IDLE_TIMEOUT_MS: implicitSessionIdleTimeoutMs } : undefined,
323
367
  signal,
324
368
  stdin: params.stdin,
325
369
  });
@@ -329,8 +373,8 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
329
373
  return {
330
374
  content: [{ type: "text", text: errorText }],
331
375
  details: {
332
- args: params.args,
333
- effectiveArgs: executionPlan.effectiveArgs,
376
+ args: redactedArgs,
377
+ effectiveArgs: redactedEffectiveArgs,
334
378
  sessionMode,
335
379
  spawnError: processResult.spawnError.message,
336
380
  },
@@ -345,10 +389,11 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
345
389
  });
346
390
  let presentationEnvelope = parsed.envelope;
347
391
  const processSucceeded = !processResult.aborted && !processResult.spawnError && processResult.exitCode === 0;
348
- const plainTextInspection = isPlainTextInspectionArgs(params.args) && processSucceeded && parsed.parseError !== undefined;
349
- const envelopeSuccess = plainTextInspection ? true : parsed.envelope?.success !== false;
392
+ const plainTextInspection = executionPlan.plainTextInspection && processSucceeded;
350
393
  const parseSucceeded = plainTextInspection || parsed.parseError === undefined;
394
+ const envelopeSuccess = plainTextInspection ? true : parsed.envelope?.success !== false;
351
395
  const succeeded = processSucceeded && parseSucceeded && envelopeSuccess;
396
+ const inspectionText = plainTextInspection ? processResult.stdout.trim() : undefined;
352
397
 
353
398
  let navigationSummary: NavigationSummary | undefined;
354
399
  if (succeeded && shouldCaptureNavigationSummary(executionPlan.commandInfo.command, parsed.envelope?.data)) {
@@ -365,12 +410,27 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
365
410
  }
366
411
  }
367
412
 
368
- implicitSessionActive = resolveImplicitSessionActiveState({
413
+ const priorManagedSessionCwd = managedSessionCwd;
414
+ const managedSessionState = resolveManagedSessionState({
369
415
  command: executionPlan.commandInfo.command,
370
- priorActive: implicitSessionActive,
416
+ managedSessionName: executionPlan.managedSessionName,
417
+ priorActive: managedSessionActive,
418
+ priorSessionName: managedSessionName,
371
419
  succeeded,
372
- usedImplicitSession: executionPlan.usedImplicitSession,
373
420
  });
421
+ const replacedManagedSessionName = managedSessionState.replacedSessionName;
422
+ managedSessionActive = managedSessionState.active;
423
+ managedSessionName = managedSessionState.sessionName;
424
+ if (executionPlan.managedSessionName && succeeded) {
425
+ managedSessionCwd = ctx.cwd;
426
+ }
427
+ if (replacedManagedSessionName) {
428
+ await closeManagedSession({
429
+ cwd: priorManagedSessionCwd,
430
+ sessionName: replacedManagedSessionName,
431
+ timeoutMs: implicitSessionCloseTimeoutMs,
432
+ });
433
+ }
374
434
 
375
435
  const errorText = getAgentBrowserErrorText({
376
436
  aborted: processResult.aborted,
@@ -384,9 +444,15 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
384
444
 
385
445
  const presentation = plainTextInspection
386
446
  ? {
387
- content: [{ type: "text" as const, text: processResult.stdout.trim() }],
447
+ batchFailure: undefined,
448
+ batchSteps: undefined,
449
+ content: [{ type: "text" as const, text: inspectionText ?? "" }],
450
+ data: undefined,
451
+ fullOutputPath: undefined,
452
+ fullOutputPaths: undefined,
388
453
  imagePath: undefined,
389
- summary: `${params.args.join(" ")} completed`,
454
+ imagePaths: undefined,
455
+ summary: `${redactedArgs.join(" ")} completed`,
390
456
  }
391
457
  : await buildToolPresentation({
392
458
  commandInfo: executionPlan.commandInfo,
@@ -394,32 +460,40 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
394
460
  envelope: presentationEnvelope,
395
461
  errorText,
396
462
  });
463
+ const redactedContent = presentation.content.map((item) =>
464
+ item.type === "text" ? { ...item, text: redactSensitiveText(item.text) } : item,
465
+ );
397
466
 
398
467
  return {
399
- content: presentation.content,
468
+ content: redactedContent,
400
469
  details: {
401
- args: params.args,
402
- batchSteps: presentation.batchSteps,
470
+ args: redactedArgs,
471
+ batchFailure: redactSensitiveValue(presentation.batchFailure),
472
+ batchSteps: redactSensitiveValue(presentation.batchSteps),
403
473
  command: executionPlan.commandInfo.command,
404
474
  subcommand: executionPlan.commandInfo.subcommand,
405
- data: presentation.data,
406
- error: parsed.envelope?.error,
407
- navigationSummary,
408
- effectiveArgs: executionPlan.effectiveArgs,
475
+ data: redactSensitiveValue(presentation.data),
476
+ error: plainTextInspection ? undefined : redactSensitiveValue(parsed.envelope?.error),
477
+ inspection: plainTextInspection || undefined,
478
+ navigationSummary: redactSensitiveValue(navigationSummary),
479
+ effectiveArgs: redactedEffectiveArgs,
409
480
  exitCode: processResult.exitCode,
410
481
  fullOutputPath: presentation.fullOutputPath,
411
482
  fullOutputPaths: presentation.fullOutputPaths,
412
483
  imagePath: presentation.imagePath,
413
484
  imagePaths: presentation.imagePaths,
414
- parseError: parsed.parseError,
485
+ parseError: plainTextInspection ? undefined : parsed.parseError,
415
486
  sessionMode,
416
- sessionName: executionPlan.sessionName,
417
- sessionRecoveryHint: executionPlan.recoveryHint,
487
+ ...buildSessionDetailFields(executionPlan.sessionName, executionPlan.usedImplicitSession),
488
+ sessionRecoveryHint: redactedRecoveryHint,
418
489
  startupScopedFlags: executionPlan.startupScopedFlags,
419
- stderr: processResult.stderr || undefined,
420
- stdout: parseSucceeded ? undefined : processResult.stdout,
421
- summary: presentation.summary,
422
- usedImplicitSession: executionPlan.usedImplicitSession,
490
+ stderr: processResult.stderr ? redactSensitiveText(processResult.stderr) : undefined,
491
+ stdout: plainTextInspection
492
+ ? redactSensitiveText(inspectionText ?? "")
493
+ : parseSucceeded
494
+ ? undefined
495
+ : redactSensitiveText(processResult.stdout),
496
+ summary: redactSensitiveText(presentation.summary),
423
497
  },
424
498
  isError: !succeeded,
425
499
  };
@@ -65,7 +65,7 @@ const INHERITED_ENV_NAMES = new Set([
65
65
  allProxyEnvName,
66
66
  noProxyEnvName,
67
67
  ]);
68
- const INHERITED_ENV_PREFIXES = ["AGENT_BROWSER_", "AI_GATEWAY_", "XDG_"] as const;
68
+ const INHERITED_ENV_PREFIXES = ["AI_GATEWAY_", "XDG_"] as const;
69
69
 
70
70
  export interface ProcessRunResult {
71
71
  aborted: boolean;
@@ -10,6 +10,10 @@ import { readFile } from "node:fs/promises";
10
10
 
11
11
  import { type AgentBrowserBatchResult, type AgentBrowserEnvelope, isRecord, stringifyUnknown } from "./shared.js";
12
12
 
13
+ function hasStructuredBatchStepFailure(data: unknown): data is AgentBrowserBatchResult[] {
14
+ return Array.isArray(data) && data.some((item) => isRecord(item) && item.success === false);
15
+ }
16
+
13
17
  async function readEnvelopeSource(options: { stdout: string; stdoutPath?: string }): Promise<string> {
14
18
  if (!options.stdoutPath) {
15
19
  return options.stdout;
@@ -93,6 +97,9 @@ export function getAgentBrowserErrorText(options: {
93
97
  if (spawnError) return spawnError.message;
94
98
  if (parseError) return parseError;
95
99
  if (envelope?.success === false) {
100
+ if (hasStructuredBatchStepFailure(envelope.data) && envelope.error === undefined) {
101
+ return undefined;
102
+ }
96
103
  return extractEnvelopeErrorText(envelope.error) ?? (stderr.trim() || `agent-browser reported failure${exitCode !== 0 ? ` (exit code ${exitCode})` : "."}`);
97
104
  }
98
105
  if (exitCode !== 0) {
@@ -14,6 +14,7 @@ import { buildSnapshotPresentation, formatRawSnapshotText, formatSnapshotSummary
14
14
  import {
15
15
  type AgentBrowserBatchResult,
16
16
  type AgentBrowserEnvelope,
17
+ type BatchFailurePresentationDetails,
17
18
  type BatchStepPresentationDetails,
18
19
  type ToolPresentation,
19
20
  isRecord,
@@ -188,6 +189,20 @@ function formatBatchStepError(error: unknown): string {
188
189
  return errorText.length > 0 ? `Error: ${errorText}` : "Error: batch step failed.";
189
190
  }
190
191
 
192
+ function getBatchFailureDetails(steps: Array<{ details: BatchStepPresentationDetails }>): BatchFailurePresentationDetails | undefined {
193
+ const failedSteps = steps.filter((step) => step.details.success === false);
194
+ if (failedSteps.length === 0) {
195
+ return undefined;
196
+ }
197
+ const successCount = steps.length - failedSteps.length;
198
+ return {
199
+ failedStep: failedSteps[0].details,
200
+ failureCount: failedSteps.length,
201
+ successCount,
202
+ totalCount: steps.length,
203
+ };
204
+ }
205
+
191
206
  async function buildBatchStepPresentation(options: {
192
207
  cwd: string;
193
208
  index: number;
@@ -261,6 +276,7 @@ async function buildBatchPresentation(options: {
261
276
  steps.push(await buildBatchStepPresentation({ cwd, index, item }));
262
277
  }
263
278
 
279
+ const batchFailure = getBatchFailureDetails(steps);
264
280
  const images = steps.flatMap((step) => getPresentationImages(step.presentation));
265
281
  const fullOutputPaths = steps.flatMap((step) => getPresentationPaths({
266
282
  primaryPath: step.presentation.fullOutputPath,
@@ -270,13 +286,14 @@ async function buildBatchPresentation(options: {
270
286
  primaryPath: step.presentation.imagePath,
271
287
  secondaryPaths: step.presentation.imagePaths,
272
288
  }));
273
- const text =
289
+ const stepText =
274
290
  steps.length === 0
275
291
  ? "(no batch steps)"
276
292
  : steps
277
293
  .map(({ details, presentation }) => {
278
294
  const inlineImageCount = getPresentationImages(presentation).length;
279
- const lines = [`Step ${details.index + 1} ${details.commandText}`];
295
+ const status = details.success ? "succeeded" : "failed";
296
+ const lines = [`Step ${details.index + 1} — ${details.commandText} (${status})`];
280
297
  if (details.text.length > 0) {
281
298
  lines.push(details.text);
282
299
  }
@@ -286,8 +303,20 @@ async function buildBatchPresentation(options: {
286
303
  return lines.join("\n");
287
304
  })
288
305
  .join("\n\n");
306
+ const failureHeader =
307
+ batchFailure === undefined
308
+ ? undefined
309
+ : [
310
+ summary,
311
+ `First failing step: ${batchFailure.failedStep.index + 1} — ${batchFailure.failedStep.commandText}`,
312
+ batchFailure.failureCount > 1
313
+ ? `${batchFailure.failureCount} steps failed. See the per-step results below.`
314
+ : "See the per-step results below.",
315
+ ].join("\n");
316
+ const text = failureHeader ? `${failureHeader}\n\n${stepText}` : stepText;
289
317
 
290
318
  return {
319
+ batchFailure,
291
320
  batchSteps: steps.map((step) => step.details),
292
321
  content: [{ type: "text", text }, ...images],
293
322
  data,
@@ -302,7 +331,7 @@ async function buildBatchPresentation(options: {
302
331
  function formatSummary(commandInfo: CommandInfo, data: unknown): string {
303
332
  if (Array.isArray(data) && commandInfo.command === "batch") {
304
333
  const successCount = data.filter((item) => isRecord(item) && item.success !== false).length;
305
- return `Batch: ${successCount}/${data.length} succeeded`;
334
+ return successCount === data.length ? `Batch: ${successCount}/${data.length} succeeded` : `Batch failed: ${successCount}/${data.length} succeeded`;
306
335
  }
307
336
  if (isRecord(data)) {
308
337
  const navigationSummary = getNavigationSummary(data);
@@ -33,7 +33,15 @@ export interface BatchStepPresentationDetails {
33
33
  text: string;
34
34
  }
35
35
 
36
+ export interface BatchFailurePresentationDetails {
37
+ failedStep: BatchStepPresentationDetails;
38
+ failureCount: number;
39
+ successCount: number;
40
+ totalCount: number;
41
+ }
42
+
36
43
  export interface ToolPresentation {
44
+ batchFailure?: BatchFailurePresentationDetails;
37
45
  batchSteps?: BatchStepPresentationDetails[];
38
46
  content: Array<{ text: string; type: "text" } | { data: string; mimeType: string; type: "image" }>;
39
47
  data?: unknown;