npm - pi-agent-browser-native - Versions diffs - 0.1.5 → 0.2.0 - Mend

pi-agent-browser-native 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/CHANGELOG.md +23 -0
package/README.md +89 -2
package/docs/ARCHITECTURE.md +7 -3
package/docs/RELEASE.md +1 -1
package/docs/TOOL_CONTRACT.md +18 -10
package/extensions/agent-browser/index.ts +185 -62
package/extensions/agent-browser/lib/process.ts +91 -6
package/extensions/agent-browser/lib/results/envelope.ts +102 -0
package/extensions/agent-browser/lib/results/presentation.ts +461 -0
package/extensions/agent-browser/lib/results/shared.ts +91 -0
package/extensions/agent-browser/lib/results/snapshot.ts +648 -0
package/extensions/agent-browser/lib/results.ts +8 -934
package/extensions/agent-browser/lib/runtime.ts +66 -24
package/extensions/agent-browser/lib/temp.ts +159 -16
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,28 @@
 # Changelog
+## 0.2.0 - 2026-04-12
+### Changed
+- `batch` now reuses the richer standalone renderers, so batched snapshots keep the compact main-content-first view and batched screenshots keep inline image attachments instead of degrading to raw JSON-ish text
+- the tool schema now uses `sessionMode: "auto" | "fresh"` instead of the old implicit-session boolean so agents have a first-class way to request a fresh profiled/debug launch, and blocked startup-scoped reuse errors now include structured recovery hints
+- plain-text inspection commands like `agent_browser --help` and `--version` are now always allowed, removing the old prompt-dependent inspection gate and making the inspection contract local and predictable
+- navigation actions like `click`, `dblclick`, `back`, `forward`, and `reload` now include lightweight post-action title/url summaries when the wrapper can address the active session, reducing guess-and-check follow-up snapshots
+- compact snapshot rendering is leaner by default: fewer additional sections, fewer refs, smaller role summaries, and the raw spill path now stays in `details.fullOutputPath` instead of dominating the visible snapshot body
+- README and injected tool guidance now include a compact agent quick start with the core call shapes for `open` + `snapshot`, `click` + re-snapshot, `batch`, `eval --stdin`, and fresh profiled launches
+### Migration notes
+- replace any use of `useActiveSession` with `sessionMode`
+- use `sessionMode: "fresh"` when you need a new `--profile`, `--session-name`, or `--cdp` launch after the implicit session is already active
+## 0.1.6 - 2026-04-12
+### Changed
+- hardened the implicit browser-session lifecycle so failed first launches no longer mark the convenience session active, startup-scoped flags behave correctly across launches and closes, and the highest-risk entrypoint paths now have direct automated and isolated-`pi` coverage
+- added explicit temp-root ownership markers, aggregate spill-file disk budgeting, inline image size limits, and graceful fallback behavior when large snapshot or stdout artifacts exceed temp budgets
+- consolidated the shared browser operating playbook across the injected system prompt and tool prompt guidance while adding direct extension-hook coverage for prompt injection, bash blocking, and session resets
+- split the old result-rendering god module into focused envelope, presentation, shared, and snapshot modules, and made snapshot compaction fall back to a resilient outline mode when upstream raw snapshot formatting is unfamiliar
+- refactored the release-package verification script into smaller testable helpers, preserved the retired autoload-shim guard, and aligned the tarball gate with the split result-rendering module layout
 ## 0.1.5 - 2026-04-12
 ### Changed

package/README.md CHANGED Viewed

@@ -87,6 +87,67 @@ This avoids duplicate `agent_browser` registrations if you also have the publish
 The native tool exposed to the agent is named `agent_browser`.
+The primary session control parameter is `sessionMode`:
+- `"auto"` (default) reuses the implicit `pi`-scoped session when possible
+- `"fresh"` skips that implicit session so startup-scoped flags like `--profile`, `--session-name`, and `--cdp` can launch a fresh upstream session
+## Agent quick start
+### Mental model
+- `args` — exact CLI args after `agent-browser`
+- `stdin` — raw stdin only for `batch` and `eval --stdin`
+- `sessionMode`
+  - `"auto"` — default, reuse the implicit `pi`-scoped session
+  - `"fresh"` — skip the implicit session for a new profile/debug launch
+### Common call shapes
+Open a page, then take an interactive snapshot:
+```json
+{ "args": ["open", "https://example.com"] }
+{ "args": ["snapshot", "-i"] }
+```
+Click a ref, then re-snapshot after navigation or a major DOM change:
+```json
+{ "args": ["click", "@e2"] }
+{ "args": ["snapshot", "-i"] }
+```
+Run a multi-step browser flow in one tool call:
+```json
+{ "args": ["batch"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }
+```
+Evaluate page JavaScript via stdin:
+```json
+{ "args": ["eval", "--stdin"], "stdin": "document.title" }
+```
+Start a fresh profiled launch after you already used the implicit session:
+```json
+{ "args": ["--profile", "Default", "open", "https://example.com/account"], "sessionMode": "fresh" }
+```
+Name a new upstream session explicitly when you want to keep reusing it:
+```json
+{ "args": ["--session", "auth-flow", "open", "https://example.com"] }
+```
+### First useful prompt in a fresh `pi` session
+```text
+Use the agent_browser tool to open https://react.dev and then take an interactive snapshot.
+```
 ## Local development
 Do not track or rely on a repo-local `.pi/extensions/agent-browser.ts` autoload shim for this package. When the package is also installed globally, that creates a duplicate `agent_browser` registration and blocks `pi` startup from this working directory.
@@ -116,14 +177,40 @@ Validated workflow examples:
 - run `batch` with JSON via `stdin`
 - run `eval --stdin`
 - take a screenshot with inline attachment support
-- inspect `agent_browser --help` and `--version`
+- inspect `agent_browser --help` and `--version` via the tool's plain-text inspection fallback
+Inspection commands like `agent_browser --help` and `--version` are always supported. They return plain text and are useful for debugging or capability checks, but they are not required for normal browsing workflows.
 Current cautions:
 - passing `--profile` is an explicit upstream choice; this extension does not add its own profile-cloning or isolation layer
-- startup-scoped flags like `--profile`, `--session-name`, and `--cdp` are for the first command that launches a session; if the implicit session is already active, the extension returns a validation error instead of silently letting upstream ignore those flags
+- startup-scoped flags like `--profile`, `--session-name`, and `--cdp` are for the first command that launches a session; if the implicit session is already active, retry that call with `sessionMode: "fresh"` or provide an explicit `--session ...` for the new launch
 - implicit `piab-*` sessions are extension-managed convenience sessions; they are best-effort closed on `pi` shutdown, get an idle timeout to reduce stale background daemons, and clean up private temp spill artifacts on shutdown
 - explicit upstream sessions like `--session`, `--profile`, `--session-name`, and `--cdp` are treated as user-managed and are not auto-closed by the extension
+### Switching from public browsing to a fresh profile/debug launch
+A common agent workflow is:
+1. browse a public page with the default implicit session
+2. then switch to a fresh authenticated/profile/debug launch
+Use `sessionMode: "fresh"` for that transition instead of relying on the implicit session:
+```json
+{
+  "args": ["--profile", "Default", "open", "https://example.com/account"],
+  "sessionMode": "fresh"
+}
+```
+If you want to name the new upstream session yourself, pass an explicit session instead:
+```json
+{
+  "args": ["--session", "auth-flow", "--profile", "Default", "open", "https://example.com/account"]
+}
+```
 ## Docs
 - [`docs/REQUIREMENTS.md`](docs/REQUIREMENTS.md) — product requirements and constraints

package/docs/ARCHITECTURE.md CHANGED Viewed

@@ -59,17 +59,19 @@ The published package should exclude agent-only and superseded repo materials su
 ### Default
-If the caller does not provide `--session`, the extension should use an implicit session name derived from the current `pi` session id.
+If the caller does not provide `--session`, the extension should default to `sessionMode: "auto"` and use an implicit session name derived from the current `pi` session id.
 Why:
 - works out of the box
 - gives continuity across calls
 - avoids forcing the agent to invent session names for basic browsing
-### Explicit upstream sessions
+### Explicit upstream sessions and fresh launches
 If the caller provides `--session`, `--profile`, `--cdp`, or similar upstream flags, the extension should respect them with minimal interference.
+The tool should also expose a first-class `sessionMode: "fresh"` escape hatch so agents can intentionally skip the implicit session and launch a fresh upstream session without inventing a fixed explicit session name.
 ### Ownership
 V1 ownership rule:
@@ -92,7 +94,9 @@ The extension should surface that clearly and avoid hidden restart behavior in v
 That means explicit startup-scoping flags like `--profile`, `--session-name`, and `--cdp` should remain explicit upstream choices instead of being wrapped in extra hidden restart or cloning logic.
-If the implicit session is already active and one of those startup-scoped flags appears again, the extension should fail clearly instead of silently sending a command shape that upstream would ignore.
+If the implicit session is already active and one of those startup-scoped flags appears again while `sessionMode` is still `"auto"`, the extension should fail clearly instead of silently sending a command shape that upstream would ignore.
+That failure should include a structured recovery hint pointing to `sessionMode: "fresh"` as the first-line fix, while still allowing an explicit `--session` when the caller wants to name the new upstream session.
 ## Preferring the native tool

package/docs/RELEASE.md CHANGED Viewed

@@ -31,7 +31,7 @@ npm run verify:release
 - no repo-local `.pi/extensions/agent-browser.ts` autoload shim is present
 - `LICENSE` exists in the repo and the packed tarball
 - canonical published docs are present
-- extension source files are present
+- extension source files are present, including the split result-rendering modules required by the published facade
 - agent-only and superseded docs are absent from the tarball
 Current forbidden packed files include:

package/docs/TOOL_CONTRACT.md CHANGED Viewed

@@ -32,7 +32,7 @@ The tool also needs an operating playbook, not just a capability list. The model
 {
   "args": ["open", "https://example.com"],
   "stdin": "optional raw stdin content",
-  "useActiveSession": true
+  "sessionMode": "auto"
 }
 ```
@@ -69,15 +69,20 @@ Examples:
 { "args": ["batch"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }
 ```
-### `useActiveSession`
+### `sessionMode`
-- type: `boolean`
+- type: `"auto" | "fresh"`
 - optional
-- default: `true`
+- default: `"auto"`
 Behavior:
 - if `args` already include `--session`, upstream session choice wins
-- otherwise the extension prepends its implicit active session when `useActiveSession` is `true`
+- `"auto"` prepends the implicit active session when appropriate
+- `"fresh"` skips the implicit session so startup-scoped flags like `--profile`, `--session-name`, or `--cdp` can launch a fresh upstream session
+Recommended use:
+- use `"auto"` for the common browse/snapshot/click flow inside one `pi` session
+- use `"fresh"` when switching from an already-active implicit session to a new profile/debug/auth launch without inventing a fixed explicit session name
 ## Wrapper behavior
@@ -87,8 +92,8 @@ The extension should:
 - parse JSON output into tool details
 - handle observed JSON result shapes, including the array returned by `batch --json`
 - allow plain-text fallback for inspection commands like `--help` and `--version`
-- discourage exploratory inspection calls unless the user explicitly asks or debugging requires them
-- deflect normal-task `--help` inspection back into the standard browser workflow instead of letting the model relearn the tool from scratch each session
+- support those inspection commands unconditionally so the tool contract stays local and predictable
+- still describe normal browser workflows in guidance so models do not overuse inspection for routine tasks
 - surface stderr and non-zero exits clearly
 - attach images when the result points to a screenshot-like artifact
@@ -104,7 +109,8 @@ Primary content should be:
 Examples:
 - small `snapshot` results should include the actual snapshot text
-- oversized `snapshot` results should switch to a compact view that preserves the primary content, nearby sections, high-value refs, and a path to the spilled full raw snapshot
+- oversized `snapshot` results should switch to a compact view that preserves the primary content, nearby sections, and a trimmed set of high-value refs, while exposing the full raw snapshot path via `details.fullOutputPath`
+- successful navigation actions like `click`, `back`, `forward`, and `reload` should include a lightweight post-action title/url summary when the wrapper can address the active session
 - `tab list` should include a readable tab summary
 - `screenshot` should include the saved-path summary plus the inline image attachment when available
@@ -116,6 +122,7 @@ Recommended details:
 {
   "args": ["snapshot", "-i"],
   "effectiveArgs": ["--session", "pi-abc123", "--json", "snapshot", "-i"],
+  "sessionMode": "auto",
   "sessionName": "pi-abc123",
   "usedImplicitSession": true,
   "data": {
@@ -136,7 +143,8 @@ For oversized snapshots, details should switch to a compact metadata object and
 Worth doing in v1:
 - screenshots → inline image attachment
-- snapshots → origin + ref count + main-content-first compact preview, with full raw snapshot spill files when the inline result would otherwise be too large
+- snapshots → origin + ref count + main-content-first compact preview, with the raw snapshot spill path kept in `details.fullOutputPath` when the inline result would otherwise be too large
+- navigation actions like `click`, `back`, `forward`, and `reload` → lightweight post-action title/url summary when available
 - tab lists → compact summary/table
 - stream status → enabled/connected/port summary
@@ -158,7 +166,7 @@ If `agent-browser` is not on `PATH`, fail with a message that:
 - clean up private temp spill artifacts owned by the implicit session on shutdown
 - treat explicit upstream session choices like `--session`, `--profile`, `--session-name`, and `--cdp` as user-managed
 - pass explicit `--profile` straight through to upstream `agent-browser`; no profile-cloning or isolation layer is added in v1
-- if startup-scoped flags like `--profile`, `--session-name`, or `--cdp` are supplied after the implicit session is already active, return a validation error instead of silently relying on upstream to ignore them
+- if startup-scoped flags like `--profile`, `--session-name`, or `--cdp` are supplied after the implicit session is already active while `sessionMode` is `"auto"`, return a validation error with a structured recovery hint that recommends `sessionMode: "fresh"`
 ## Non-goals

package/extensions/agent-browser/index.ts CHANGED Viewed

@@ -18,14 +18,16 @@ import {
 	buildPromptPolicy,
 	createEphemeralSessionSeed,
 	createImplicitSessionName,
+	getImplicitSessionCloseTimeoutMs,
+	getImplicitSessionIdleTimeoutMs,
 	getLatestUserPrompt,
 	hasUsableBraveApiKey,
+	resolveImplicitSessionActiveState,
 	validateToolArgs,
 } from "./lib/runtime.js";
 import { cleanupSecureTempArtifacts } from "./lib/temp.js";
-const IMPLICIT_SESSION_IDLE_TIMEOUT_MS = "900000";
-const IMPLICIT_SESSION_CLOSE_TIMEOUT_MS = 5_000;
+const DEFAULT_SESSION_MODE = "auto" as const;
 const AGENT_BROWSER_PARAMS = Type.Object({
 	args: Type.Array(Type.String({ description: "Exact agent-browser CLI arguments, excluding the binary name." }), {
@@ -33,13 +35,45 @@ const AGENT_BROWSER_PARAMS = Type.Object({
 		minItems: 1,
 	}),
 	stdin: Type.Optional(Type.String({ description: "Optional raw stdin content for commands like eval --stdin or batch." })),
-	useActiveSession: Type.Optional(
-		Type.Boolean({
-			description: "When true and no explicit --session is present, inject the implicit session for this pi session.",
-			default: true,
+	sessionMode: Type.Optional(
+		Type.Union([Type.Literal("auto"), Type.Literal("fresh")], {
+			description:
+				"Session handling mode. `auto` reuses the implicit pi-scoped session when possible. `fresh` skips the implicit session so startup-scoped flags like --profile, --session-name, or --cdp can launch a fresh upstream session.",
+			default: DEFAULT_SESSION_MODE,
 		}),
 	),
 });
+const PROJECT_RULE_PROMPT =
+	"Project rule: when browser automation is needed, prefer the native `agent_browser` tool. Do not run direct `agent-browser` bash commands unless the user explicitly asks for a bash-oriented workflow or browser-integration debugging.";
+const QUICK_START_GUIDELINES = [
+	"Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch and eval --stdin; sessionMode=fresh starts a fresh upstream launch when you need new --profile, --session-name, or --cdp state.",
+	"Common first calls: { args: [\"open\", \"https://example.com\"] } then { args: [\"snapshot\", \"-i\"] }; after navigation, use { args: [\"click\", \"@e2\"] } then { args: [\"snapshot\", \"-i\"] }.",
+	"Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, and { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }.",
+] as const;
+const BRAVE_SEARCH_PROMPT_GUIDELINE =
+	"When a non-empty BRAVE_API_KEY is available in the current environment, prefer the Brave Search API via bash/curl to discover specific destination URLs, then open the chosen URL with agent_browser instead of browsing a search engine results page just to find the target.";
+const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
+	"Standard workflow: open the page, snapshot -i, interact using refs, and re-snapshot after navigation or major DOM changes.",
+	"For authenticated or user-specific content like feeds, inboxes, dashboards, and accounts, prefer --profile Default on the first browser call and let the implicit session carry continuity. Use --auto-connect only if profile-based reuse is unavailable or the task is specifically about attaching to a running debug-enabled browser.",
+	"Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.",
+	"When using --profile, --session-name, or --cdp, put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.",
+	"If you already used the implicit session and now need startup-scoped flags like --profile, --session-name, or --cdp, retry with sessionMode set to fresh or pass an explicit --session for the new launch.",
+	"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <n> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load, --url, --fn, or --text.",
+	"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
+	"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
+	"When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
+	"When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel.",
+	"Do not call --help or other exploratory inspection commands unless the user explicitly asks for them or debugging the browser integration is necessary.",
+] as const;
+const TOOL_PROMPT_GUIDELINES_PREFIX = ["Use this tool whenever the task requires a real browser or live web content."] as const;
+const TOOL_PROMPT_GUIDELINES_SUFFIX = [
+	"Prefer this tool over bash for opening sites, reading docs on the web, clicking, filling, screenshots, eval, and batch workflows.",
+	"Do not fall back to osascript, AppleScript, or generic browser-driving bash commands when this tool can do the job.",
+	"Pass exact agent-browser CLI arguments in args, excluding the binary name.",
+	"Use stdin for commands like eval --stdin and batch instead of shell heredocs.",
+	"Let the implicit session handle the common path unless you explicitly need a fresh launch for upstream flags like --profile, --session-name, or --cdp.",
+	"Use sessionMode=fresh when switching from an existing implicit session to a new profile/debug launch without inventing a fixed explicit session name.",
+] as const;
 function buildMissingBinaryMessage(): string {
 	return [
@@ -68,25 +102,119 @@ function isPlainTextInspectionArgs(args: string[]): boolean {
 	return args.includes("--help") || args.includes("-h") || args.includes("--version") || args.includes("-V");
 }
-function buildInspectionDeflectionMessage(): string {
+const NAVIGATION_SUMMARY_COMMANDS = new Set(["back", "click", "dblclick", "forward", "reload"]);
+interface NavigationSummary {
+	title?: string;
+	url?: string;
+}
+function isRecord(value: unknown): value is Record<string, unknown> {
+	return typeof value === "object" && value !== null;
+}
+function shouldCaptureNavigationSummary(command: string | undefined, data: unknown): boolean {
+	return (
+		command !== undefined &&
+		NAVIGATION_SUMMARY_COMMANDS.has(command) &&
+		(!isRecord(data) || (typeof data.title !== "string" && typeof data.url !== "string"))
+	);
+}
+function extractStringResultField(data: unknown, fieldName: "title" | "url"): string | undefined {
+	if (typeof data === "string") {
+		const text = data.trim();
+		return text.length > 0 ? text : undefined;
+	}
+	if (!isRecord(data) || typeof data[fieldName] !== "string") {
+		return undefined;
+	}
+	const text = data[fieldName].trim();
+	return text.length > 0 ? text : undefined;
+}
+async function collectNavigationSummary(options: {
+	cwd: string;
+	sessionName?: string;
+	signal?: AbortSignal;
+}): Promise<NavigationSummary | undefined> {
+	const { cwd, sessionName, signal } = options;
+	if (!sessionName) return undefined;
+	const readField = async (fieldName: "title" | "url"): Promise<string | undefined> => {
+		const processResult = await runAgentBrowserProcess({
+			args: ["--json", "--session", sessionName, "get", fieldName],
+			cwd,
+			signal,
+		});
+		if (processResult.aborted || processResult.spawnError || processResult.exitCode !== 0) {
+			return undefined;
+		}
+		const parsed = await parseAgentBrowserEnvelope({
+			stdout: processResult.stdout,
+			stdoutPath: processResult.stdoutSpillPath,
+		});
+		try {
+			if (parsed.parseError || parsed.envelope?.success === false) {
+				return undefined;
+			}
+			return extractStringResultField(parsed.envelope?.data, fieldName);
+		} finally {
+			if (processResult.stdoutSpillPath) {
+				await rm(processResult.stdoutSpillPath, { force: true }).catch(() => undefined);
+			}
+		}
+	};
+	const title = await readField("title");
+	const url = await readField("url");
+	if (!title && !url) return undefined;
+	return { title, url };
+}
+function mergeNavigationSummaryIntoData(data: unknown, navigationSummary: NavigationSummary): unknown {
+	if (isRecord(data)) {
+		return { ...data, navigationSummary };
+	}
+	return { navigationSummary, result: data };
+}
+function buildSharedBrowserPlaybookGuidelines(hasBraveApiKey: boolean): string[] {
 	return [
-		"Do not inspect agent_browser help for a normal browser task.",
-		"Use the workflow directly:",
-		"1. open the target URL",
-		"2. snapshot -i",
-		"3. interact using refs and re-snapshot after navigation or major DOM changes",
-		"For authenticated or user-specific content like feeds, inboxes, dashboards, or accounts, start with an authenticated strategy such as --profile Default on the first browser call and let the implicit session carry continuity. Use --auto-connect only if profile-based reuse is unavailable.",
+		SHARED_BROWSER_PLAYBOOK_GUIDELINES[0],
+		...(hasBraveApiKey ? [BRAVE_SEARCH_PROMPT_GUIDELINE] : []),
+		...SHARED_BROWSER_PLAYBOOK_GUIDELINES.slice(1),
+	];
+}
+function buildBrowserSystemPromptAppendix(hasBraveApiKey: boolean): string {
+	return [
+		PROJECT_RULE_PROMPT,
+		"",
+		"Quick start:",
+		...QUICK_START_GUIDELINES.map((guideline) => `- ${guideline}`),
+		"",
+		"Browser operating playbook:",
+		...buildSharedBrowserPlaybookGuidelines(hasBraveApiKey).map((guideline) => `- ${guideline}`),
 	].join("\n");
 }
-function buildBraveSearchGuidance(hasBraveApiKey: boolean): string {
-	if (!hasBraveApiKey) return "";
-	return "\n- A non-empty `BRAVE_API_KEY` is available in the current environment. For web search or URL discovery, prefer the Brave Search API via `bash`/`curl` to find the destination URL, then open that URL with `agent_browser` instead of using browser automation to drive Google or another search engine results page. If the Brave request fails, fall back to the normal workflow.";
+function buildToolPromptGuidelines(hasBraveApiKey: boolean): string[] {
+	return [
+		...TOOL_PROMPT_GUIDELINES_PREFIX,
+		...QUICK_START_GUIDELINES,
+		...buildSharedBrowserPlaybookGuidelines(hasBraveApiKey),
+		...TOOL_PROMPT_GUIDELINES_SUFFIX,
+	];
 }
 export default function agentBrowserExtension(pi: ExtensionAPI) {
 	const ephemeralSessionSeed = createEphemeralSessionSeed();
-	const braveSearchGuidance = buildBraveSearchGuidance(hasUsableBraveApiKey());
+	const hasBraveApiKey = hasUsableBraveApiKey();
+	const browserSystemPromptAppendix = buildBrowserSystemPromptAppendix(hasBraveApiKey);
+	const toolPromptGuidelines = buildToolPromptGuidelines(hasBraveApiKey);
+	const implicitSessionIdleTimeoutMs = getImplicitSessionIdleTimeoutMs();
+	const implicitSessionCloseTimeoutMs = getImplicitSessionCloseTimeoutMs();
 	let implicitSessionActive = false;
 	let implicitSessionName = createImplicitSessionName(undefined, process.cwd(), ephemeralSessionSeed);
 	let implicitSessionCwd = process.cwd();
@@ -100,7 +228,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 	pi.on("session_shutdown", async () => {
 		implicitSessionActive = false;
 		const controller = new AbortController();
-		const timer = setTimeout(() => controller.abort(), IMPLICIT_SESSION_CLOSE_TIMEOUT_MS);
+		const timer = setTimeout(() => controller.abort(), implicitSessionCloseTimeoutMs);
 		try {
 			await runAgentBrowserProcess({
 				args: ["--session", implicitSessionName, "close"],
@@ -117,10 +245,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 	pi.on("before_agent_start", async (event) => {
 		return {
-			systemPrompt:
-				event.systemPrompt +
-				"\n\nProject rule: when browser automation is needed, prefer the native `agent_browser` tool. Do not run direct `agent-browser` bash commands unless the user explicitly asks for a bash-oriented workflow or browser-integration debugging.\n\nBrowser operating playbook:\n- Standard workflow: open the page, then snapshot -i, then interact via refs, then re-snapshot after navigation or major DOM changes.\n- For user-specific or authenticated content like feeds, inboxes, dashboards, and accounts, start with an authenticated browser strategy instead of public browsing. Prefer `--profile Default` on the first browser call and let the current implicit session carry continuity. Use `--auto-connect` only if profile-based reuse is unavailable or the task is specifically about attaching to a running debug-enabled browser.\n- Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.\n- When using startup-scoped flags like `--profile`, `--session-name`, or `--cdp`, put them on the first command for that session. If you intentionally use an explicit `--session`, keep using that same explicit session for follow-ups.\n- If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an `open` call returns blocked, blank, or otherwise unexpected results, use `tab list`, `tab <n>`, and `snapshot -i` to recover state before retrying different URLs or fallback strategies. Only use `wait` with an explicit argument like milliseconds, `--load`, `--url`, `--fn`, or `--text`.\n- For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.\n- For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.\n- When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.\n- When using `eval --stdin` for extraction, return the value you want instead of relying on `console.log` as the primary result channel.\n- Do not use `agent_browser --help` for normal browsing tasks." +
-				braveSearchGuidance,
+			systemPrompt: `${event.systemPrompt}\n\n${browserSystemPromptAppendix}`,
 		};
 	});
@@ -146,41 +271,9 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 			"Browse and interact with websites using agent-browser. Use this for web research, reading live docs, opening pages, taking snapshots or screenshots, clicking links, filling forms, extracting page content, and authenticated/profile-based browser work.",
 		promptSnippet:
 			"Browse websites, read live docs, click and fill pages, extract browser content, take screenshots, and automate real web workflows.",
-		promptGuidelines: [
-			"Use this tool whenever the task requires a real browser or live web content.",
-			"Standard workflow: open the page, snapshot -i, interact using refs, and re-snapshot after navigation or major DOM changes.",
-			...(braveSearchGuidance
-				? [
-					"When a non-empty BRAVE_API_KEY is available in the current environment, prefer the Brave Search API via bash/curl to discover specific destination URLs, then open the chosen URL with agent_browser instead of browsing a search engine results page just to find the target.",
-				  ]
-				: []),
-			"For authenticated or user-specific content like feeds, inboxes, dashboards, and accounts, prefer --profile Default on the first browser call and let the implicit session carry continuity. Use --auto-connect only if profile-based reuse is unavailable or the task is specifically about attaching to a running debug-enabled browser.",
-			"Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.",
-			"When using --profile, --session-name, or --cdp, put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.",
-			"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <n> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load, --url, --fn, or --text.",
-			"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
-			"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
-			"When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
-			"When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel.",
-			"Prefer this tool over bash for opening sites, reading docs on the web, clicking, filling, screenshots, eval, and batch workflows.",
-			"Do not call --help or other exploratory inspection commands unless the user explicitly asks for them or debugging the browser integration is necessary.",
-			"Do not fall back to osascript, AppleScript, or generic browser-driving bash commands when this tool can do the job.",
-			"Pass exact agent-browser CLI arguments in args, excluding the binary name.",
-			"Use stdin for commands like eval --stdin and batch instead of shell heredocs.",
-			"Let the implicit session handle the common path unless you explicitly need upstream flags like --session, --profile, or --cdp.",
-		],
+		promptGuidelines: toolPromptGuidelines,
 		parameters: AGENT_BROWSER_PARAMS,
 		async execute(_toolCallId, params, signal, onUpdate, ctx) {
-			const promptPolicy = buildPromptPolicy(getLatestUserPrompt(ctx.sessionManager.getBranch()));
-			if (!promptPolicy.allowAgentBrowserInspection && isPlainTextInspectionArgs(params.args)) {
-				const errorText = buildInspectionDeflectionMessage();
-				return {
-					content: [{ type: "text", text: errorText }],
-					details: { args: params.args, inspectionBlocked: true },
-					isError: true,
-				};
-			}
 			const validationError = validateToolArgs(params.args);
 			if (validationError) {
 				return {
@@ -190,10 +283,11 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 				};
 			}
+			const sessionMode = params.sessionMode ?? DEFAULT_SESSION_MODE;
 			const executionPlan = buildExecutionPlan(params.args, {
 				implicitSessionActive,
 				implicitSessionName,
-				useActiveSession: params.useActiveSession ?? true,
+				sessionMode,
 			});
 			if (executionPlan.validationError) {
@@ -201,6 +295,8 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 					content: [{ type: "text", text: executionPlan.validationError }],
 					details: {
 						args: params.args,
+						sessionMode,
+						sessionRecoveryHint: executionPlan.recoveryHint,
 						startupScopedFlags: executionPlan.startupScopedFlags,
 						validationError: executionPlan.validationError,
 					},
@@ -212,6 +308,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 				content: [{ type: "text", text: `Running agent-browser ${buildInvocationPreview(executionPlan.effectiveArgs)}` }],
 				details: {
 					effectiveArgs: executionPlan.effectiveArgs,
+					sessionMode,
 					sessionName: executionPlan.sessionName,
 					usedImplicitSession: executionPlan.usedImplicitSession,
 				},
@@ -221,16 +318,12 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 				args: executionPlan.effectiveArgs,
 				cwd: ctx.cwd,
 				env: executionPlan.usedImplicitSession
-					? { AGENT_BROWSER_IDLE_TIMEOUT_MS: IMPLICIT_SESSION_IDLE_TIMEOUT_MS }
+					? { AGENT_BROWSER_IDLE_TIMEOUT_MS: implicitSessionIdleTimeoutMs }
 					: undefined,
 				signal,
 				stdin: params.stdin,
 			});
-			if (executionPlan.usedImplicitSession && !processResult.aborted && !processResult.spawnError) {
-				implicitSessionActive = executionPlan.commandInfo.command !== "close";
-			}
 			if (processResult.spawnError?.message.includes("ENOENT")) {
 				const errorText = buildMissingBinaryMessage();
 				return {
@@ -238,6 +331,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 					details: {
 						args: params.args,
 						effectiveArgs: executionPlan.effectiveArgs,
+						sessionMode,
 						spawnError: processResult.spawnError.message,
 					},
 					isError: true,
@@ -249,12 +343,35 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 					stdout: processResult.stdout,
 					stdoutPath: processResult.stdoutSpillPath,
 				});
+				let presentationEnvelope = parsed.envelope;
 				const processSucceeded = !processResult.aborted && !processResult.spawnError && processResult.exitCode === 0;
 				const plainTextInspection = isPlainTextInspectionArgs(params.args) && processSucceeded && parsed.parseError !== undefined;
 				const envelopeSuccess = plainTextInspection ? true : parsed.envelope?.success !== false;
 				const parseSucceeded = plainTextInspection || parsed.parseError === undefined;
 				const succeeded = processSucceeded && parseSucceeded && envelopeSuccess;
+				let navigationSummary: NavigationSummary | undefined;
+				if (succeeded && shouldCaptureNavigationSummary(executionPlan.commandInfo.command, parsed.envelope?.data)) {
+					navigationSummary = await collectNavigationSummary({
+						cwd: ctx.cwd,
+						sessionName: executionPlan.sessionName,
+						signal,
+					});
+					if (navigationSummary && presentationEnvelope) {
+						presentationEnvelope = {
+							...presentationEnvelope,
+							data: mergeNavigationSummaryIntoData(presentationEnvelope.data, navigationSummary),
+						};
+					}
+				}
+				implicitSessionActive = resolveImplicitSessionActiveState({
+					command: executionPlan.commandInfo.command,
+					priorActive: implicitSessionActive,
+					succeeded,
+					usedImplicitSession: executionPlan.usedImplicitSession,
+				});
 				const errorText = getAgentBrowserErrorText({
 					aborted: processResult.aborted,
 					envelope: parsed.envelope,
@@ -274,7 +391,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 					: await buildToolPresentation({
 							commandInfo: executionPlan.commandInfo,
 							cwd: ctx.cwd,
-							envelope: parsed.envelope,
+							envelope: presentationEnvelope,
 							errorText,
 					  });
@@ -282,16 +399,22 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 					content: presentation.content,
 					details: {
 						args: params.args,
+						batchSteps: presentation.batchSteps,
 						command: executionPlan.commandInfo.command,
 						subcommand: executionPlan.commandInfo.subcommand,
 						data: presentation.data,
 						error: parsed.envelope?.error,
+						navigationSummary,
 						effectiveArgs: executionPlan.effectiveArgs,
 						exitCode: processResult.exitCode,
 						fullOutputPath: presentation.fullOutputPath,
+						fullOutputPaths: presentation.fullOutputPaths,
 						imagePath: presentation.imagePath,
+						imagePaths: presentation.imagePaths,
 						parseError: parsed.parseError,
+						sessionMode,
 						sessionName: executionPlan.sessionName,
+						sessionRecoveryHint: executionPlan.recoveryHint,
 						startupScopedFlags: executionPlan.startupScopedFlags,
 						stderr: processResult.stderr || undefined,
 						stdout: parseSucceeded ? undefined : processResult.stdout,