npm - pi-agent-browser-native - Versions diffs - 0.2.0 → 0.2.2 - Mend

pi-agent-browser-native 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +22 -2
package/README.md +19 -11
package/docs/ARCHITECTURE.md +12 -7
package/docs/REQUIREMENTS.md +1 -1
package/docs/TOOL_CONTRACT.md +27 -11
package/extensions/agent-browser/index.ts +155 -81
package/extensions/agent-browser/lib/process.ts +1 -1
package/extensions/agent-browser/lib/results/envelope.ts +7 -0
package/extensions/agent-browser/lib/results/presentation.ts +32 -3
package/extensions/agent-browser/lib/results/shared.ts +8 -0
package/extensions/agent-browser/lib/runtime.ts +369 -25
package/package.json +1 -1

package/extensions/agent-browser/index.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /**
  * Purpose: Register the native agent_browser tool for pi so agents can invoke agent-browser without going through bash.
- * Responsibilities: Define the tool schema, inject thin wrapper behavior around the upstream CLI, manage implicit session convenience, and return pi-friendly content/details.
+ * Responsibilities: Define the tool schema, inject thin wrapper behavior around the upstream CLI, manage extension-owned browser session convenience, and return pi-friendly content/details.
  * Scope: Native tool registration and orchestration only; the wrapper intentionally stays close to the upstream agent-browser CLI.
  * Usage: Loaded by pi through the package manifest in this package, or explicitly via `pi --no-extensions -e .` during local checkout development.
  * Invariants/Assumptions: agent-browser is installed separately on PATH, the wrapper targets the current locally installed upstream version only, and no backward-compatibility shims are provided.
@@ -17,12 +17,18 @@ import {
 	buildExecutionPlan,
 	buildPromptPolicy,
 	createEphemeralSessionSeed,
+	createFreshSessionName,
 	createImplicitSessionName,
 	getImplicitSessionCloseTimeoutMs,
 	getImplicitSessionIdleTimeoutMs,
 	getLatestUserPrompt,
 	hasUsableBraveApiKey,
-	resolveImplicitSessionActiveState,
+	redactInvocationArgs,
+	redactSensitiveText,
+	redactSensitiveValue,
+	restoreManagedSessionStateFromBranch,
+	resolveManagedSessionState,
+	shouldAppendBrowserSystemPrompt,
 	validateToolArgs,
 } from "./lib/runtime.js";
 import { cleanupSecureTempArtifacts } from "./lib/temp.js";
@@ -38,7 +44,7 @@ const AGENT_BROWSER_PARAMS = Type.Object({
 	sessionMode: Type.Optional(
 		Type.Union([Type.Literal("auto"), Type.Literal("fresh")], {
 			description:
-				"Session handling mode. `auto` reuses the implicit pi-scoped session when possible. `fresh` skips the implicit session so startup-scoped flags like --profile, --session-name, or --cdp can launch a fresh upstream session.",
+				"Session handling mode. `auto` reuses the extension-managed pi-scoped session when possible. `fresh` switches that managed session to a fresh upstream launch so startup-scoped flags like --profile, --session-name, or --cdp apply and later auto calls follow the new browser.",
 			default: DEFAULT_SESSION_MODE,
 		}),
 	),
@@ -46,7 +52,7 @@ const AGENT_BROWSER_PARAMS = Type.Object({
 const PROJECT_RULE_PROMPT =
 	"Project rule: when browser automation is needed, prefer the native `agent_browser` tool. Do not run direct `agent-browser` bash commands unless the user explicitly asks for a bash-oriented workflow or browser-integration debugging.";
 const QUICK_START_GUIDELINES = [
-	"Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch and eval --stdin; sessionMode=fresh starts a fresh upstream launch when you need new --profile, --session-name, or --cdp state.",
+	"Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch and eval --stdin; sessionMode=fresh switches the extension-managed session to a fresh upstream launch when you need new --profile, --session-name, or --cdp state.",
 	"Common first calls: { args: [\"open\", \"https://example.com\"] } then { args: [\"snapshot\", \"-i\"] }; after navigation, use { args: [\"click\", \"@e2\"] } then { args: [\"snapshot\", \"-i\"] }.",
 	"Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, and { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }.",
 ] as const;
@@ -57,7 +63,7 @@ const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
 	"For authenticated or user-specific content like feeds, inboxes, dashboards, and accounts, prefer --profile Default on the first browser call and let the implicit session carry continuity. Use --auto-connect only if profile-based reuse is unavailable or the task is specifically about attaching to a running debug-enabled browser.",
 	"Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.",
 	"When using --profile, --session-name, or --cdp, put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.",
-	"If you already used the implicit session and now need startup-scoped flags like --profile, --session-name, or --cdp, retry with sessionMode set to fresh or pass an explicit --session for the new launch.",
+	"If you already used the implicit session and now need startup-scoped flags like --profile, --session-name, or --cdp, retry with sessionMode set to fresh or pass an explicit --session for the new launch. After a successful unnamed fresh launch, later auto calls follow that new session.",
 	"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <n> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load, --url, --fn, or --text.",
 	"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
 	"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
@@ -71,8 +77,8 @@ const TOOL_PROMPT_GUIDELINES_SUFFIX = [
 	"Do not fall back to osascript, AppleScript, or generic browser-driving bash commands when this tool can do the job.",
 	"Pass exact agent-browser CLI arguments in args, excluding the binary name.",
 	"Use stdin for commands like eval --stdin and batch instead of shell heredocs.",
-	"Let the implicit session handle the common path unless you explicitly need a fresh launch for upstream flags like --profile, --session-name, or --cdp.",
-	"Use sessionMode=fresh when switching from an existing implicit session to a new profile/debug launch without inventing a fixed explicit session name.",
+	"Let the extension-managed session handle the common path unless you explicitly need a fresh launch for upstream flags like --profile, --session-name, or --cdp.",
+	"Use sessionMode=fresh when switching from an existing implicit session to a new profile/debug launch without inventing a fixed explicit session name; later auto calls will follow that new session.",
 ] as const;
 function buildMissingBinaryMessage(): string {
@@ -90,16 +96,19 @@ function buildInvocationPreview(effectiveArgs: string[]): string {
 	return preview.length > 120 ? `${preview.slice(0, 117)}...` : preview;
 }
+const AGENT_BROWSER_BASH_PREFIX = String.raw`(?:env(?:\s+[A-Za-z_][A-Za-z0-9_]*=[^\s;&|]+)*\s+)?(?:(?:npx|bunx)(?:\s+-[^\s;&|]+|\s+--[^\s;&|]+(?:=[^\s;&|]+)?)*\s+|(?:pnpm|yarn)\s+dlx(?:\s+-[^\s;&|]+|\s+--[^\s;&|]+(?:=[^\s;&|]+)?)*\s+)?`;
+const AGENT_BROWSER_BASH_EXECUTABLE = String.raw`(?:[.~]|\.\.?|\/)?(?:[^\s;&|]+\/)?agent-browser`;
+const DIRECT_AGENT_BROWSER_BASH_PATTERN = new RegExp(
+	String.raw`(^|[\s;&|])${AGENT_BROWSER_BASH_PREFIX}${AGENT_BROWSER_BASH_EXECUTABLE}(?=\s|$)`,
+);
+const HARMLESS_AGENT_BROWSER_INSPECTION_PATTERN = /(command\s+-v|which|type\s+-P)\s+agent-browser\b/;
 function looksLikeDirectAgentBrowserBash(command: string): boolean {
-	return /(^|[\s;&|])(npx\s+)?agent-browser(\s|$)/.test(command);
+	return DIRECT_AGENT_BROWSER_BASH_PATTERN.test(command);
 }
 function isHarmlessAgentBrowserInspectionCommand(command: string): boolean {
-	return /(command\s+-v|which)\s+agent-browser\b/.test(command) || /(^|\s)agent-browser\s+--(help|version)\b/.test(command);
-}
-function isPlainTextInspectionArgs(args: string[]): boolean {
-	return args.includes("--help") || args.includes("-h") || args.includes("--version") || args.includes("-V");
+	return HARMLESS_AGENT_BROWSER_INSPECTION_PATTERN.test(command);
 }
 const NAVIGATION_SUMMARY_COMMANDS = new Set(["back", "click", "dblclick", "forward", "reload"]);
@@ -187,18 +196,6 @@ function buildSharedBrowserPlaybookGuidelines(hasBraveApiKey: boolean): string[]
 	];
 }
-function buildBrowserSystemPromptAppendix(hasBraveApiKey: boolean): string {
-	return [
-		PROJECT_RULE_PROMPT,
-		"",
-		"Quick start:",
-		...QUICK_START_GUIDELINES.map((guideline) => `- ${guideline}`),
-		"",
-		"Browser operating playbook:",
-		...buildSharedBrowserPlaybookGuidelines(hasBraveApiKey).map((guideline) => `- ${guideline}`),
-	].join("\n");
-}
 function buildToolPromptGuidelines(hasBraveApiKey: boolean): string[] {
 	return [
 		...TOOL_PROMPT_GUIDELINES_PREFIX,
@@ -208,44 +205,85 @@ function buildToolPromptGuidelines(hasBraveApiKey: boolean): string[] {
 	];
 }
+function buildSessionDetailFields(sessionName: string | undefined, usedImplicitSession: boolean): Record<string, unknown> {
+	return sessionName ? { sessionName, usedImplicitSession } : {};
+}
+function redactRecoveryHint(recoveryHint: {
+	exampleArgs: string[];
+	exampleParams: { args: string[]; sessionMode: "fresh" };
+	reason: string;
+	recommendedSessionMode: "fresh";
+} | undefined): typeof recoveryHint {
+	if (!recoveryHint) {
+		return undefined;
+	}
+	const exampleArgs = redactInvocationArgs(recoveryHint.exampleArgs);
+	return {
+		...recoveryHint,
+		exampleArgs,
+		exampleParams: {
+			...recoveryHint.exampleParams,
+			args: exampleArgs,
+		},
+	};
+}
+async function closeManagedSession(options: { cwd: string; sessionName: string; timeoutMs: number }): Promise<void> {
+	const controller = new AbortController();
+	const timer = setTimeout(() => controller.abort(), options.timeoutMs);
+	try {
+		await runAgentBrowserProcess({
+			args: ["--session", options.sessionName, "close"],
+			cwd: options.cwd,
+			signal: controller.signal,
+		});
+	} catch {
+		// Best-effort cleanup only.
+	} finally {
+		clearTimeout(timer);
+	}
+}
 export default function agentBrowserExtension(pi: ExtensionAPI) {
 	const ephemeralSessionSeed = createEphemeralSessionSeed();
 	const hasBraveApiKey = hasUsableBraveApiKey();
-	const browserSystemPromptAppendix = buildBrowserSystemPromptAppendix(hasBraveApiKey);
 	const toolPromptGuidelines = buildToolPromptGuidelines(hasBraveApiKey);
 	const implicitSessionIdleTimeoutMs = getImplicitSessionIdleTimeoutMs();
 	const implicitSessionCloseTimeoutMs = getImplicitSessionCloseTimeoutMs();
-	let implicitSessionActive = false;
-	let implicitSessionName = createImplicitSessionName(undefined, process.cwd(), ephemeralSessionSeed);
-	let implicitSessionCwd = process.cwd();
+	let managedSessionActive = false;
+	let managedSessionBaseName = createImplicitSessionName(undefined, process.cwd(), ephemeralSessionSeed);
+	let managedSessionName = managedSessionBaseName;
+	let managedSessionCwd = process.cwd();
+	let freshSessionOrdinal = 0;
 	pi.on("session_start", async (_event, ctx) => {
-		implicitSessionActive = false;
-		implicitSessionName = createImplicitSessionName(ctx.sessionManager.getSessionId(), ctx.cwd, ephemeralSessionSeed);
-		implicitSessionCwd = ctx.cwd;
+		managedSessionBaseName = createImplicitSessionName(ctx.sessionManager.getSessionId(), ctx.cwd, ephemeralSessionSeed);
+		const restoredState = restoreManagedSessionStateFromBranch(ctx.sessionManager.getBranch(), managedSessionBaseName);
+		managedSessionActive = restoredState.active;
+		managedSessionName = restoredState.sessionName;
+		managedSessionCwd = ctx.cwd;
+		freshSessionOrdinal = restoredState.freshSessionOrdinal;
 	});
 	pi.on("session_shutdown", async () => {
-		implicitSessionActive = false;
-		const controller = new AbortController();
-		const timer = setTimeout(() => controller.abort(), implicitSessionCloseTimeoutMs);
-		try {
-			await runAgentBrowserProcess({
-				args: ["--session", implicitSessionName, "close"],
-				cwd: implicitSessionCwd,
-				signal: controller.signal,
+		if (managedSessionActive) {
+			await closeManagedSession({
+				cwd: managedSessionCwd,
+				sessionName: managedSessionName,
+				timeoutMs: implicitSessionCloseTimeoutMs,
 			});
-		} catch {
-			// Best-effort cleanup only.
-		} finally {
-			clearTimeout(timer);
-			await cleanupSecureTempArtifacts();
 		}
+		managedSessionActive = false;
+		await cleanupSecureTempArtifacts();
 	});
 	pi.on("before_agent_start", async (event) => {
+		if (!shouldAppendBrowserSystemPrompt(event.prompt)) {
+			return undefined;
+		}
 		return {
-			systemPrompt: `${event.systemPrompt}\n\n${browserSystemPromptAppendix}`,
+			systemPrompt: `${event.systemPrompt}\n\n${PROJECT_RULE_PROMPT}`,
 		};
 	});
@@ -274,29 +312,38 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 		promptGuidelines: toolPromptGuidelines,
 		parameters: AGENT_BROWSER_PARAMS,
 		async execute(_toolCallId, params, signal, onUpdate, ctx) {
+			const redactedArgs = redactInvocationArgs(params.args);
 			const validationError = validateToolArgs(params.args);
 			if (validationError) {
 				return {
 					content: [{ type: "text", text: validationError }],
-					details: { args: params.args, validationError },
+					details: { args: redactedArgs, validationError },
 					isError: true,
 				};
 			}
 			const sessionMode = params.sessionMode ?? DEFAULT_SESSION_MODE;
+			const freshSessionName = createFreshSessionName(managedSessionBaseName, ephemeralSessionSeed, freshSessionOrdinal + 1);
 			const executionPlan = buildExecutionPlan(params.args, {
-				implicitSessionActive,
-				implicitSessionName,
+				freshSessionName,
+				managedSessionActive,
+				managedSessionName,
 				sessionMode,
 			});
+			const redactedEffectiveArgs = redactInvocationArgs(executionPlan.effectiveArgs);
+			const redactedRecoveryHint = redactRecoveryHint(executionPlan.recoveryHint);
+			if (executionPlan.managedSessionName === freshSessionName) {
+				freshSessionOrdinal += 1;
+			}
 			if (executionPlan.validationError) {
 				return {
 					content: [{ type: "text", text: executionPlan.validationError }],
 					details: {
-						args: params.args,
+						args: redactedArgs,
+						invalidValueFlag: executionPlan.invalidValueFlag,
 						sessionMode,
-						sessionRecoveryHint: executionPlan.recoveryHint,
+						sessionRecoveryHint: redactedRecoveryHint,
 						startupScopedFlags: executionPlan.startupScopedFlags,
 						validationError: executionPlan.validationError,
 					},
@@ -305,21 +352,18 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 			}
 			onUpdate?.({
-				content: [{ type: "text", text: `Running agent-browser ${buildInvocationPreview(executionPlan.effectiveArgs)}` }],
+				content: [{ type: "text", text: `Running agent-browser ${buildInvocationPreview(redactedEffectiveArgs)}` }],
 				details: {
-					effectiveArgs: executionPlan.effectiveArgs,
+					effectiveArgs: redactedEffectiveArgs,
 					sessionMode,
-					sessionName: executionPlan.sessionName,
-					usedImplicitSession: executionPlan.usedImplicitSession,
+					...buildSessionDetailFields(executionPlan.sessionName, executionPlan.usedImplicitSession),
 				},
 			});
 			const processResult = await runAgentBrowserProcess({
 				args: executionPlan.effectiveArgs,
 				cwd: ctx.cwd,
-				env: executionPlan.usedImplicitSession
-					? { AGENT_BROWSER_IDLE_TIMEOUT_MS: implicitSessionIdleTimeoutMs }
-					: undefined,
+				env: executionPlan.managedSessionName ? { AGENT_BROWSER_IDLE_TIMEOUT_MS: implicitSessionIdleTimeoutMs } : undefined,
 				signal,
 				stdin: params.stdin,
 			});
@@ -329,8 +373,8 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 				return {
 					content: [{ type: "text", text: errorText }],
 					details: {
-						args: params.args,
-						effectiveArgs: executionPlan.effectiveArgs,
+						args: redactedArgs,
+						effectiveArgs: redactedEffectiveArgs,
 						sessionMode,
 						spawnError: processResult.spawnError.message,
 					},
@@ -345,10 +389,11 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 				});
 				let presentationEnvelope = parsed.envelope;
 				const processSucceeded = !processResult.aborted && !processResult.spawnError && processResult.exitCode === 0;
-				const plainTextInspection = isPlainTextInspectionArgs(params.args) && processSucceeded && parsed.parseError !== undefined;
-				const envelopeSuccess = plainTextInspection ? true : parsed.envelope?.success !== false;
+				const plainTextInspection = executionPlan.plainTextInspection && processSucceeded;
 				const parseSucceeded = plainTextInspection || parsed.parseError === undefined;
+				const envelopeSuccess = plainTextInspection ? true : parsed.envelope?.success !== false;
 				const succeeded = processSucceeded && parseSucceeded && envelopeSuccess;
+				const inspectionText = plainTextInspection ? processResult.stdout.trim() : undefined;
 				let navigationSummary: NavigationSummary | undefined;
 				if (succeeded && shouldCaptureNavigationSummary(executionPlan.commandInfo.command, parsed.envelope?.data)) {
@@ -365,12 +410,27 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 					}
 				}
-				implicitSessionActive = resolveImplicitSessionActiveState({
+				const priorManagedSessionCwd = managedSessionCwd;
+				const managedSessionState = resolveManagedSessionState({
 					command: executionPlan.commandInfo.command,
-					priorActive: implicitSessionActive,
+					managedSessionName: executionPlan.managedSessionName,
+					priorActive: managedSessionActive,
+					priorSessionName: managedSessionName,
 					succeeded,
-					usedImplicitSession: executionPlan.usedImplicitSession,
 				});
+				const replacedManagedSessionName = managedSessionState.replacedSessionName;
+				managedSessionActive = managedSessionState.active;
+				managedSessionName = managedSessionState.sessionName;
+				if (executionPlan.managedSessionName && succeeded) {
+					managedSessionCwd = ctx.cwd;
+				}
+				if (replacedManagedSessionName) {
+					await closeManagedSession({
+						cwd: priorManagedSessionCwd,
+						sessionName: replacedManagedSessionName,
+						timeoutMs: implicitSessionCloseTimeoutMs,
+					});
+				}
 				const errorText = getAgentBrowserErrorText({
 					aborted: processResult.aborted,
@@ -384,9 +444,15 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 				const presentation = plainTextInspection
 					? {
-						content: [{ type: "text" as const, text: processResult.stdout.trim() }],
+						batchFailure: undefined,
+						batchSteps: undefined,
+						content: [{ type: "text" as const, text: inspectionText ?? "" }],
+						data: undefined,
+						fullOutputPath: undefined,
+						fullOutputPaths: undefined,
 						imagePath: undefined,
-						summary: `${params.args.join(" ")} completed`,
+						imagePaths: undefined,
+						summary: `${redactedArgs.join(" ")} completed`,
 					  }
 					: await buildToolPresentation({
 							commandInfo: executionPlan.commandInfo,
@@ -394,32 +460,40 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 							envelope: presentationEnvelope,
 							errorText,
 					  });
+				const redactedContent = presentation.content.map((item) =>
+					item.type === "text" ? { ...item, text: redactSensitiveText(item.text) } : item,
+				);
 				return {
-					content: presentation.content,
+					content: redactedContent,
 					details: {
-						args: params.args,
-						batchSteps: presentation.batchSteps,
+						args: redactedArgs,
+						batchFailure: redactSensitiveValue(presentation.batchFailure),
+						batchSteps: redactSensitiveValue(presentation.batchSteps),
 						command: executionPlan.commandInfo.command,
 						subcommand: executionPlan.commandInfo.subcommand,
-						data: presentation.data,
-						error: parsed.envelope?.error,
-						navigationSummary,
-						effectiveArgs: executionPlan.effectiveArgs,
+						data: redactSensitiveValue(presentation.data),
+						error: plainTextInspection ? undefined : redactSensitiveValue(parsed.envelope?.error),
+						inspection: plainTextInspection || undefined,
+						navigationSummary: redactSensitiveValue(navigationSummary),
+						effectiveArgs: redactedEffectiveArgs,
 						exitCode: processResult.exitCode,
 						fullOutputPath: presentation.fullOutputPath,
 						fullOutputPaths: presentation.fullOutputPaths,
 						imagePath: presentation.imagePath,
 						imagePaths: presentation.imagePaths,
-						parseError: parsed.parseError,
+						parseError: plainTextInspection ? undefined : parsed.parseError,
 						sessionMode,
-						sessionName: executionPlan.sessionName,
-						sessionRecoveryHint: executionPlan.recoveryHint,
+						...buildSessionDetailFields(executionPlan.sessionName, executionPlan.usedImplicitSession),
+						sessionRecoveryHint: redactedRecoveryHint,
 						startupScopedFlags: executionPlan.startupScopedFlags,
-						stderr: processResult.stderr || undefined,
-						stdout: parseSucceeded ? undefined : processResult.stdout,
-						summary: presentation.summary,
-						usedImplicitSession: executionPlan.usedImplicitSession,
+						stderr: processResult.stderr ? redactSensitiveText(processResult.stderr) : undefined,
+						stdout: plainTextInspection
+							? redactSensitiveText(inspectionText ?? "")
+							: parseSucceeded
+								? undefined
+								: redactSensitiveText(processResult.stdout),
+						summary: redactSensitiveText(presentation.summary),
 					},
 					isError: !succeeded,
 				};

package/extensions/agent-browser/lib/process.ts CHANGED Viewed

@@ -65,7 +65,7 @@ const INHERITED_ENV_NAMES = new Set([
 	allProxyEnvName,
 	noProxyEnvName,
 ]);
-const INHERITED_ENV_PREFIXES = ["AGENT_BROWSER_", "AI_GATEWAY_", "XDG_"] as const;
+const INHERITED_ENV_PREFIXES = ["AI_GATEWAY_", "XDG_"] as const;
 export interface ProcessRunResult {
 	aborted: boolean;

package/extensions/agent-browser/lib/results/envelope.ts CHANGED Viewed

@@ -10,6 +10,10 @@ import { readFile } from "node:fs/promises";
 import { type AgentBrowserBatchResult, type AgentBrowserEnvelope, isRecord, stringifyUnknown } from "./shared.js";
+function hasStructuredBatchStepFailure(data: unknown): data is AgentBrowserBatchResult[] {
+	return Array.isArray(data) && data.some((item) => isRecord(item) && item.success === false);
+}
 async function readEnvelopeSource(options: { stdout: string; stdoutPath?: string }): Promise<string> {
 	if (!options.stdoutPath) {
 		return options.stdout;
@@ -93,6 +97,9 @@ export function getAgentBrowserErrorText(options: {
 	if (spawnError) return spawnError.message;
 	if (parseError) return parseError;
 	if (envelope?.success === false) {
+		if (hasStructuredBatchStepFailure(envelope.data) && envelope.error === undefined) {
+			return undefined;
+		}
 		return extractEnvelopeErrorText(envelope.error) ?? (stderr.trim() || `agent-browser reported failure${exitCode !== 0 ? ` (exit code ${exitCode})` : "."}`);
 	}
 	if (exitCode !== 0) {

package/extensions/agent-browser/lib/results/presentation.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import { buildSnapshotPresentation, formatRawSnapshotText, formatSnapshotSummary
 import {
 	type AgentBrowserBatchResult,
 	type AgentBrowserEnvelope,
+	type BatchFailurePresentationDetails,
 	type BatchStepPresentationDetails,
 	type ToolPresentation,
 	isRecord,
@@ -188,6 +189,20 @@ function formatBatchStepError(error: unknown): string {
 	return errorText.length > 0 ? `Error: ${errorText}` : "Error: batch step failed.";
 }
+function getBatchFailureDetails(steps: Array<{ details: BatchStepPresentationDetails }>): BatchFailurePresentationDetails | undefined {
+	const failedSteps = steps.filter((step) => step.details.success === false);
+	if (failedSteps.length === 0) {
+		return undefined;
+	}
+	const successCount = steps.length - failedSteps.length;
+	return {
+		failedStep: failedSteps[0].details,
+		failureCount: failedSteps.length,
+		successCount,
+		totalCount: steps.length,
+	};
+}
 async function buildBatchStepPresentation(options: {
 	cwd: string;
 	index: number;
@@ -261,6 +276,7 @@ async function buildBatchPresentation(options: {
 		steps.push(await buildBatchStepPresentation({ cwd, index, item }));
 	}
+	const batchFailure = getBatchFailureDetails(steps);
 	const images = steps.flatMap((step) => getPresentationImages(step.presentation));
 	const fullOutputPaths = steps.flatMap((step) => getPresentationPaths({
 		primaryPath: step.presentation.fullOutputPath,
@@ -270,13 +286,14 @@ async function buildBatchPresentation(options: {
 		primaryPath: step.presentation.imagePath,
 		secondaryPaths: step.presentation.imagePaths,
 	}));
-	const text =
+	const stepText =
 		steps.length === 0
 			? "(no batch steps)"
 			: steps
 				.map(({ details, presentation }) => {
 					const inlineImageCount = getPresentationImages(presentation).length;
-					const lines = [`Step ${details.index + 1} — ${details.commandText}`];
+					const status = details.success ? "succeeded" : "failed";
+					const lines = [`Step ${details.index + 1} — ${details.commandText} (${status})`];
 					if (details.text.length > 0) {
 						lines.push(details.text);
 					}
@@ -286,8 +303,20 @@ async function buildBatchPresentation(options: {
 					return lines.join("\n");
 				})
 				.join("\n\n");
+	const failureHeader =
+		batchFailure === undefined
+			? undefined
+			: [
+					summary,
+					`First failing step: ${batchFailure.failedStep.index + 1} — ${batchFailure.failedStep.commandText}`,
+					batchFailure.failureCount > 1
+						? `${batchFailure.failureCount} steps failed. See the per-step results below.`
+						: "See the per-step results below.",
+				].join("\n");
+	const text = failureHeader ? `${failureHeader}\n\n${stepText}` : stepText;
 	return {
+		batchFailure,
 		batchSteps: steps.map((step) => step.details),
 		content: [{ type: "text", text }, ...images],
 		data,
@@ -302,7 +331,7 @@ async function buildBatchPresentation(options: {
 function formatSummary(commandInfo: CommandInfo, data: unknown): string {
 	if (Array.isArray(data) && commandInfo.command === "batch") {
 		const successCount = data.filter((item) => isRecord(item) && item.success !== false).length;
-		return `Batch: ${successCount}/${data.length} succeeded`;
+		return successCount === data.length ? `Batch: ${successCount}/${data.length} succeeded` : `Batch failed: ${successCount}/${data.length} succeeded`;
 	}
 	if (isRecord(data)) {
 		const navigationSummary = getNavigationSummary(data);

package/extensions/agent-browser/lib/results/shared.ts CHANGED Viewed

@@ -33,7 +33,15 @@ export interface BatchStepPresentationDetails {
 	text: string;
 }
+export interface BatchFailurePresentationDetails {
+	failedStep: BatchStepPresentationDetails;
+	failureCount: number;
+	successCount: number;
+	totalCount: number;
+}
 export interface ToolPresentation {
+	batchFailure?: BatchFailurePresentationDetails;
 	batchSteps?: BatchStepPresentationDetails[];
 	content: Array<{ text: string; type: "text" } | { data: string; mimeType: string; type: "image" }>;
 	data?: unknown;