npm - pi-agent-browser-native - Versions diffs - 0.2.8 → 0.2.10 - Mend

pi-agent-browser-native 0.2.8 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +22 -0
package/README.md +31 -5
package/docs/ARCHITECTURE.md +2 -1
package/docs/COMMAND_REFERENCE.md +264 -0
package/docs/RELEASE.md +14 -4
package/docs/REQUIREMENTS.md +3 -0
package/docs/TOOL_CONTRACT.md +8 -5
package/extensions/agent-browser/index.ts +42 -1
package/extensions/agent-browser/lib/results/presentation.ts +149 -6
package/extensions/agent-browser/lib/results/snapshot.ts +2 -2
package/extensions/agent-browser/lib/temp.ts +1 -1
package/package.json +5 -4

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,28 @@
 ## Unreleased
+## 0.2.10 - 2026-04-18
+### Changed
+- bumped the local pi development baseline to `@mariozechner/pi-coding-agent` `0.67.68` and `typescript` `6.0.3`
+- refreshed the release lockfile against the current stable pi patch line
+### Fixed
+- pinned the transitive `basic-ftp` dependency to `5.3.0` to clear the current audit finding during local verification and publish checks
+## 0.2.9 - 2026-04-17
+### Fixed
+- large non-snapshot outputs such as oversized `eval --stdin` payloads now compact inline content, spill the full payload to a private file, and print the actual spill path directly in tool content instead of dumping huge raw output into model context
+- file-save flows now render `download` results as explicit saved-file summaries so agents can see the downloaded path directly
+- when a known target tab stays correct at command start but a restored/background tab steals focus after the command completes, the wrapper now best-effort restores the intended tab before returning control
+- compact snapshot text now prints the actual raw-spill file path directly instead of only referring agents to `details.fullOutputPath`
+### Changed
+- added a published `docs/COMMAND_REFERENCE.md` so agents have a repo-readable local command/help surface even when direct `agent-browser` binary usage is blocked
+- expanded tool guidance, README, release notes, and repo guidance with download workflows, better `wait` usage, oversized-output handling, and the documentation-sync rule for upstream `agent-browser` updates
+- clarified the checkout-versus-installed-package workflow in README, release notes, and repo agent guidance so local development keeps one active Pi package source for this extension at a time instead of treating the published entrypoint file as optional
 ## 0.2.8 - 2026-04-16
 ### Fixed

package/README.md CHANGED Viewed

@@ -85,7 +85,9 @@ Until you are using a published package release, prefer an explicit checkout-onl
 pi --no-extensions -e /absolute/path/to/pi-agent-browser-native
 ```
-This avoids duplicate `agent_browser` registrations if you also have the published package installed globally.
+This keeps the checkout isolated from any other active package source for the same extension.
+This repository's `package.json` is itself a publishable pi package manifest that points at `extensions/agent-browser/index.ts`. That file is the real extension entrypoint for both the checkout and the published package. Keep exactly one active source for this extension in Pi settings at a time: either this checkout path or the published npm package.
 The native tool exposed to the agent is named `agent_browser`.
@@ -132,6 +134,12 @@ Evaluate page JavaScript via stdin:
 { "args": ["eval", "--stdin"], "stdin": "document.title" }
 ```
+Download a file to an explicit path instead of relying on `click` alone:
+```json
+{ "args": ["download", "@e5", "/tmp/report.pdf"] }
+```
 Start a fresh profiled launch after you already used the implicit session:
 ```json
@@ -154,17 +162,21 @@ Use the agent_browser tool to open https://react.dev and then take an interactiv
 ## Local development
-Do not track or rely on a repo-local `.pi/extensions/agent-browser.ts` autoload shim for this package. When the package is also installed globally, that creates a duplicate `agent_browser` registration and blocks `pi` startup from this working directory.
+Do not track or rely on a repo-local `.pi/extensions/agent-browser.ts` autoload shim for this package. That creates an unnecessary second registration path.
+The published entrypoint lives at `extensions/agent-browser/index.ts` and is referenced directly from this repo's `package.json`. While developing this repo, keep the checkout path enabled in Pi settings and disable or uninstall `npm:pi-agent-browser-native` so Pi has only one active source for this extension.
+Recommended local development setup:
 1. Install `agent-browser` separately via the upstream project.
 2. Run `npm install`.
-3. Launch `pi` from this repository root with only the checkout extension loaded:
+3. Keep the checkout path enabled in Pi settings and disable or uninstall `npm:pi-agent-browser-native` while developing this repo.
+4. Launch `pi` from this repository root with only the checkout extension loaded:
 ```bash
 pi --no-extensions -e .
 ```
-4. Prompt the agent to use `agent_browser`.
+5. Prompt the agent to use `agent_browser`.
 Example prompt:
@@ -172,6 +184,8 @@ Example prompt:
 Use the agent_browser tool to open https://react.dev and then take an interactive snapshot.
 ```
+For installed-package validation after a release, temporarily do the reverse: disable/remove the checkout path from Pi settings and validate the published npm package, or use an isolated ephemeral run such as `pi --no-extensions -e npm:pi-agent-browser-native@<version>`.
 Validated workflow examples:
 - open a page and snapshot it
@@ -184,6 +198,8 @@ Validated workflow examples:
 - run `eval --stdin`
 - take a screenshot with inline attachment support
 - inspect `agent_browser --help` and `--version` via the tool's stateless plain-text inspection fallback
+- use `download <selector> <path>` for attachment/file-save workflows instead of trying to infer downloads from generic clicks or large eval dumps
+- confirm oversized outputs show the actual spill file path directly in tool content, not just a details key name
 Inspection commands like `agent_browser --help` and `--version` are always supported. They return plain text, are useful for debugging or capability checks, and stay stateless: the extension does not inject its implicit session for them and they do not consume the managed-session slot needed for a later `--profile`, `--session-name`, or `--cdp` launch.
@@ -195,7 +211,9 @@ Current cautions:
 - for local Unix launches, the wrapper uses a short private socket directory under `/tmp` so extension-generated session names do not trip upstream Unix socket-path limits in longer cwd/session-name combinations
 - for direct headless local Chrome launches to `chat.com`, `chatgpt.com`, and `chat.openai.com`, the extension injects a normal Chrome user agent when the caller did not explicitly provide `--user-agent`; this keeps the default headless workflow usable without forcing `--headed` or `--auto-connect`
 - after profiled `open` calls, the extension best-effort re-selects the tab that matches the returned page URL when restored profile tabs steal focus during launch
-- after a target tab is known, later active-tab commands like `click` and `snapshot -i` best-effort pin that same tab inside the same upstream invocation when a reconnect would otherwise drift to a restored tab
+- after a target tab is known, later active-tab commands best-effort pin that same tab inside the same upstream invocation when a reconnect would otherwise drift to a restored tab
+- after a successful command, the extension also best-effort restores that intended tab when a restored/background tab steals focus after the command completes
+- oversized snapshots and oversized generic outputs compact inline content and print the actual spill file path directly in the tool result when a spill file exists
 - explicit caller-provided `--session` values are treated as user-managed and are not auto-closed by the extension
 - explicit caller-provided `--user-agent` values win over the ChatGPT/OpenAI compatibility workaround
 - tool progress/details redact sensitive invocation values such as `--headers`, proxy credentials, and auth-bearing URL parameters before echoing them back into Pi
@@ -231,6 +249,7 @@ If you want to name the new upstream session yourself, pass an explicit session
 - [`docs/REQUIREMENTS.md`](docs/REQUIREMENTS.md) — product requirements and constraints
 - [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) — current architecture decision
 - [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md) — proposed v1 tool shape
+- [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md) — local repo-readable command reference for the blocked direct-binary path
 - [`docs/RELEASE.md`](docs/RELEASE.md) — maintainer release and package verification workflow
 ## Documentation rule
@@ -240,3 +259,10 @@ When requirements change in chat:
 1. update `docs/REQUIREMENTS.md`
 2. update the affected design docs
 3. update this README if user-facing expectations changed
+When the upstream `agent-browser` binary changes:
+1. re-check the upstream command/help surface
+2. update `docs/COMMAND_REFERENCE.md`
+3. update tool guidance, README, and release docs if behavior or recommended usage changed
+4. verify the blocked direct-binary path still has an equally usable local extension-side documentation path

package/docs/ARCHITECTURE.md CHANGED Viewed

@@ -89,6 +89,7 @@ Practical policy:
 - leave explicit caller-provided `--session` choices alone unless the caller closes them explicitly
 - after profiled `open` / `goto` / `navigate` calls, verify the active tab still matches the returned page URL and best-effort switch back when restored profile tabs steal focus
 - once the wrapper knows which tab the agent is operating on, later active-tab commands may synthesize a tiny upstream `batch` that re-selects that tab and then runs the requested command in the same upstream invocation; this stays thin while avoiding reconnect-time drift on profile-restored sessions
+- after a successful command on a known tab target, the wrapper may best-effort restore that same target again if restored/background tabs steal focus after the command returns
 - for local Unix launches, set a short private socket directory so extension-generated session names do not fail on the upstream Unix socket-path length limit
 This is primarily about ownership clarity and avoiding surprise, not adding a heavy safety wrapper. If the extension invented the session, the extension should own its lifecycle without breaking reload/resume semantics. If the caller explicitly chose the upstream session model, the extension should stay out of the way.
@@ -126,7 +127,7 @@ This keeps the product centered on native tool usage instead of auxiliary skill
 - compact result summaries
 - inline screenshots/images
 - lightweight session convenience
-- docs
+- docs, including a repo-readable command reference that mirrors the blocked direct-binary help path closely enough for normal agent work
 ### Upstream `agent-browser` owns

package/docs/COMMAND_REFERENCE.md ADDED Viewed

@@ -0,0 +1,264 @@
+# Agent Browser command reference
+Related docs:
+- [`../README.md`](../README.md)
+- [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md)
+- [`ARCHITECTURE.md`](ARCHITECTURE.md)
+- [`RELEASE.md`](RELEASE.md)
+## Purpose
+Provide a local, repo-readable command reference for the native `agent_browser` tool.
+This project intentionally blocks normal `agent-browser` bash usage in most agent sessions, so the agent still needs an accessible local equivalent of the upstream command surface. This document is the durable reference the agent can read inside the repository without calling the binary directly.
+## Core mental model
+Tool parameters:
+```json
+{
+  "args": ["open", "https://example.com"],
+  "stdin": "optional raw stdin content",
+  "sessionMode": "auto"
+}
+```
+- `args`: exact `agent-browser` CLI tokens after the binary name
+- `stdin`: only for commands like `batch` and `eval --stdin`
+- `sessionMode`:
+  - `"auto"` reuse the extension-managed session when possible
+  - `"fresh"` rotate that managed session to a fresh upstream launch so startup-scoped flags like `--profile`, `--session-name`, or `--cdp` apply
+## Recommended workflow
+### Normal browse flow
+```json
+{ "args": ["open", "https://example.com"] }
+{ "args": ["snapshot", "-i"] }
+{ "args": ["click", "@e2"] }
+{ "args": ["snapshot", "-i"] }
+```
+### Extract page data
+```json
+{ "args": ["get", "title"] }
+{ "args": ["get", "url"] }
+{ "args": ["eval", "--stdin"], "stdin": "document.title" }
+```
+### Run a multi-step flow in one browser invocation
+```json
+{ "args": ["batch"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }
+```
+### Switch from an already-active implicit session to a fresh profiled launch
+```json
+{
+  "args": ["--profile", "Default", "open", "https://mail.google.com"],
+  "sessionMode": "fresh"
+}
+```
+## High-value commands
+### Open and navigation
+- `open <url>`
+- `goto <url>`
+- `navigate <url>`
+- `back`
+- `forward`
+- `reload`
+Examples:
+```json
+{ "args": ["open", "https://react.dev"] }
+{ "args": ["reload"] }
+```
+### Snapshot and page inspection
+- `snapshot`
+- `snapshot -i` interactive elements only
+- `snapshot -c` compact tree
+- `snapshot -d <n>` limit depth
+- `snapshot -s <selector>` scope to one subtree
+Examples:
+```json
+{ "args": ["snapshot", "-i"] }
+{ "args": ["snapshot", "-i", "-s", "main"] }
+```
+### Element interaction
+- `click <selector-or-@ref>`
+- `dblclick <selector-or-@ref>`
+- `hover <selector-or-@ref>`
+- `focus <selector-or-@ref>`
+- `type <selector-or-@ref> <text>`
+- `fill <selector-or-@ref> <text>`
+- `press <key>`
+- `check <selector-or-@ref>`
+- `uncheck <selector-or-@ref>`
+- `select <selector-or-@ref> <value...>`
+- `drag <src> <dst>`
+- `upload <selector-or-@ref> <files...>`
+Examples:
+```json
+{ "args": ["click", "@e12"] }
+{ "args": ["fill", "#email", "user@example.com"] }
+{ "args": ["press", "Enter"] }
+```
+### Downloads and saved files
+Use the purpose-built command when a click should save a file.
+- `download <selector-or-@ref> <path>`
+- `pdf <path>`
+- `screenshot [path]`
+Examples:
+```json
+{ "args": ["download", "@e5", "/tmp/report.pdf"] }
+{ "args": ["pdf", "/tmp/page.pdf"] }
+{ "args": ["screenshot", "/tmp/page.png"] }
+```
+Rules:
+- Prefer `download <selector> <path>` over `click` when the goal is a downloaded file on disk.
+- Prefer explicit output paths when the calling task needs to read, move, or attach the saved file later.
+- Use `--download-path <dir>` on the first launch when many downloads should land in one directory.
+### Read page state
+`get <subcommand>` supports:
+- `title`
+- `url`
+- `text <selector>`
+- `html <selector>`
+- `value <selector>`
+- `attr <selector> <name>`
+- `count <selector>`
+- `box <selector>`
+- `styles <selector>`
+- `cdp-url`
+Examples:
+```json
+{ "args": ["get", "title"] }
+{ "args": ["get", "text", "main"] }
+{ "args": ["get", "attr", "a.primary", "href"] }
+```
+### JavaScript evaluation
+- `eval <js>`
+- `eval --stdin` with JavaScript in `stdin`
+Example:
+```json
+{ "args": ["eval", "--stdin"], "stdin": "Array.from(document.querySelectorAll('a')).map((a) => a.href)" }
+```
+Rules:
+- Return the intended value instead of relying on `console.log`.
+- Scope DOM queries to the relevant route, component, or element.
+- Prefer `snapshot -i` refs first when the task is interaction-heavy.
+### Wait
+- `wait <ms>`
+- `wait <selector>`
+- use explicit variants like `--load <state>`, `--url <matcher>`, `--fn <js>`, or `--text <matcher>` when needed
+Important:
+- bare `wait --load` is incomplete; `--load` needs a state value
+### Tabs
+- `tab list`
+- `tab <tab-id-or-label>`
+- `tab new`
+- `tab close`
+Examples:
+```json
+{ "args": ["tab", "list"] }
+{ "args": ["tab", "t3"] }
+```
+Use this when:
+- a restored profile tab steals focus
+- an interaction opens a new tab
+- the browser lands on the wrong page unexpectedly
+### Batch
+- `batch`
+- `batch --bail`
+Example:
+```json
+{ "args": ["batch", "--bail"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"],[\"click\",\"@e2\"]]" }
+```
+### Session and inspection commands
+- `session`
+- `session list`
+- `close`
+- `close --all`
+- `--help`
+- `--version`
+The wrapper keeps `--help` and `--version` stateless so they do not consume the implicit managed-session slot.
+## Important global flags
+- `--profile <name|path>` reuse Chrome profile state
+- `--session <name>` explicit upstream session name
+- `--session-name <name>` upstream saved auth/session state name
+- `--cdp <port-or-url>` connect to an existing browser
+- `--headed` show the browser window
+- `--download-path <dir>` default download directory
+- `--user-agent <ua>` custom user agent
+- `--json` injected by the wrapper automatically for normal tool execution
+## Wrapper-specific behavior worth knowing
+- The extension may keep following one implicit managed session across later tool calls.
+- If startup-scoped flags like `--profile`, `--session-name`, or `--cdp` would be ignored because that implicit session is already active, retry with `sessionMode: "fresh"`.
+- After profiled opens, the wrapper best-effort restores the intended target tab when restored tabs steal focus.
+- After the wrapper knows the intended tab for a session, later commands best-effort keep that tab active so reconnect drift does not silently move the browser to a restored/background tab.
+- Oversized snapshots and oversized generic outputs may be compacted in tool content, with the full raw output written to a spill file path shown directly in the tool result.
+## Maintenance rule
+Whenever the upstream `agent-browser` binary version changes in this project:
+1. re-check the upstream command/help surface
+2. update this local command reference if anything changed
+3. update tool prompt guidance if the recommended agent workflow changed
+4. update README and release docs if the user-visible behavior changed
+5. validate the extension still exposes local documentation that is at least as usable as the blocked direct-binary path for normal agent work

package/docs/RELEASE.md CHANGED Viewed

@@ -54,10 +54,12 @@ node scripts/verify-package.mjs --list-files
 Before publishing, also validate the explicit local-checkout path:
 1. Install `agent-browser` separately.
-2. Launch `pi --no-extensions -e .` from this repository root.
-3. Confirm the checkout extension loads from `extensions/agent-browser/index.ts`.
-4. Run a smoke prompt that exercises `agent_browser`.
-5. Validate managed-session continuity with both `/reload` and a full restart + `/resume`.
+2. Make sure Pi has only one active source for this extension during checkout validation.
+3. Launch `pi --no-extensions -e .` from this repository root.
+4. Confirm the checkout extension loads from `extensions/agent-browser/index.ts`.
+5. Run a smoke prompt that exercises `agent_browser`.
+6. Validate managed-session continuity with both `/reload` and a full restart + `/resume`.
+7. Re-check local extension-side docs (`README.md`, `docs/COMMAND_REFERENCE.md`, and prompt guidance) if the upstream `agent-browser` version/help surface changed.
 Example smoke prompt:
@@ -71,6 +73,8 @@ Recommended lifecycle follow-up:
 2. Run `/reload`, then ask for `snapshot -i` and confirm the same page is still active.
 3. Exit `pi`, relaunch it against the same session file or use `/resume`, then ask for `snapshot -i` again and confirm the same page is still active.
 4. Open a large page that compacts its snapshot output and confirm `details.fullOutputPath` still exists after the restart/resume flow.
+5. Trigger an oversized non-snapshot output (for example a deliberately large `eval --stdin` result) and confirm the tool prints the actual spill file path directly in content instead of only referencing a details key.
+6. Validate at least one file-download flow with `download <selector> <path>`.
 ## Post-publish install validation
@@ -81,6 +85,11 @@ pi install npm:pi-agent-browser-native@<version>
 pi -e npm:pi-agent-browser-native@<version>
 ```
+For installed-package validation, make sure Pi has only one active source for this extension. The simplest safe paths are either:
+- temporarily disable/remove the checkout path and then run plain `pi`, or
+- use an isolated ephemeral run such as `pi --no-extensions -e npm:pi-agent-browser-native@<version>`
 Then confirm `pi` exposes the native `agent_browser` tool, that a basic `open` + `snapshot -i` flow works, and that `/reload` plus restart/`/resume` keep following the same implicit managed browser session.
 ## Release notes checklist
@@ -89,6 +98,7 @@ Before publishing:
 - update `CHANGELOG.md`
 - confirm README install guidance still leads with the package-first flow
+- confirm `docs/COMMAND_REFERENCE.md` still matches the effective upstream command/help surface used by the wrapper
 - confirm the explicit local-checkout instructions still work for pre-release validation
 - rerun `npm run verify:release`
 - manually exercise `/reload` and full restart + `/resume` continuity in local checkout validation

package/docs/REQUIREMENTS.md CHANGED Viewed

@@ -64,6 +64,7 @@ Define the product requirements and constraints for `pi-agent-browser-native`.
 - Someone opening the repo should quickly understand the goal, purpose, install model, and usage.
 - Documents should read as complete documents, not iterative logs, unless they are explicitly meant to be iterative, such as a changelog.
 - Requirements, expectations, and durable rules from user conversations should be reflected in the appropriate docs.
+- Because direct-binary usage is commonly blocked in normal agent sessions, the repo must carry a local command reference for the effective `agent_browser` surface and keep it in sync with upstream changes.
 - Published package contents should include the canonical user-facing docs plus `LICENSE`.
 - Published package contents should exclude agent-only and superseded docs such as `AGENTS.md`, `docs/v1-tool-contract.md`, and `docs/native-integration-design.md`.
@@ -95,10 +96,12 @@ The design should comfortably support workflows such as:
 - The wrapper should stay thin, with upstream `agent-browser` remaining the source of truth for command semantics.
 - User-facing docs belong in `README.md` and the canonical published files under `docs/`.
 - Agent workflow and deeper testing procedures can stay in `AGENTS.md`, but published docs must not depend on that file being present.
+- When upstream `agent-browser` changes, refresh the local command reference, prompt guidance, and other extension-side docs so agents still have a repo-readable equivalent of the blocked direct-binary help path.
 - Keep mitigations for legacy-skill coexistence simple; do not add extra moving parts unless observed behavior justifies them.
 - Prefer narrow, evidence-backed compatibility mitigations over broad stealth layers when a specific upstream site starts rejecting the default headless launch fingerprint.
 - Preserve the page that a profiled `open` just navigated to; if restored profile tabs steal focus during launch, the wrapper should best-effort switch back to the returned page URL before handing control back to the agent.
 - Once a tab target is known for a session, later active-tab commands should best-effort pin that same tab inside the same upstream invocation when reconnect drift would otherwise land on a restored/background tab.
+- If a restored/background tab steals focus after a successful command, the wrapper should best-effort restore the intended target tab again before handing control back.
 - On local Unix launches, extension-generated session names should not fail just because the upstream default socket path is too long; the wrapper should choose a shorter socket directory when needed.
 ## Open design questions

package/docs/TOOL_CONTRACT.md CHANGED Viewed

@@ -4,6 +4,7 @@ Related docs:
 - [`../README.md`](../README.md)
 - [`REQUIREMENTS.md`](REQUIREMENTS.md)
 - [`ARCHITECTURE.md`](ARCHITECTURE.md)
+- [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md)
 ## V1 tool
@@ -24,7 +25,7 @@ It also keeps the main UX where it belongs: the agent invokes the tool directly
 The tool guidance should be written for task discovery first, not wrapper implementation first. That means the description should emphasize browser use cases like web research, reading live docs, clicking, filling, screenshots, extraction, and authenticated/profile-based workflows. Low-level wrapper details like `stdin` and exact CLI args belong in the schema and guidelines, not the lead description.
-The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <tab-id-or-label>` / `snapshot -i` before retrying different URLs or fallback strategies). It should also discourage inventing fixed explicit session names for routine tasks, because those names leak stale browser state across otherwise unrelated `pi` sessions. For read-only browsing tasks, guidance should prefer answering from the current page state first: use the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating into media viewers, detail routes, or other new pages unless the current view lacks the needed information. When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics. When using `eval --stdin` for extraction, return the intended value instead of relying on `console.log` as the primary result channel.
+The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <tab-id-or-label>` / `snapshot -i` before retrying different URLs or fallback strategies). It should also discourage inventing fixed explicit session names for routine tasks, because those names leak stale browser state across otherwise unrelated `pi` sessions. For read-only browsing tasks, guidance should prefer answering from the current page state first: use the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating into media viewers, detail routes, or other new pages unless the current view lacks the needed information. For downloads, guidance should explicitly prefer `download <selector> <path>` over `click` when the goal is a file on disk. When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics. When using `eval --stdin` for extraction, return the intended value instead of relying on `console.log` as the primary result channel. Because the extension blocks normal direct-binary usage in most agent sessions, the repository must also carry a local command reference that stays in sync with the effective tool surface.
 ## Parameters
@@ -109,7 +110,7 @@ Primary content should be:
 Examples:
 - small `snapshot` results should include the actual snapshot text
-- oversized `snapshot` results should switch to a compact view that preserves the primary content, nearby sections, and a trimmed set of high-value refs, while exposing the full raw snapshot path via `details.fullOutputPath`
+- oversized `snapshot` results should switch to a compact view that preserves the primary content, nearby sections, and a trimmed set of high-value refs, while exposing the full raw snapshot path directly in the rendered tool text and via `details.fullOutputPath`
 - successful navigation actions like `click`, `back`, `forward`, and `reload` should include a lightweight post-action title/url summary when the wrapper can address the active session
 - `tab list` should include a readable tab summary
 - `screenshot` should include the saved-path summary plus the inline image attachment when available
@@ -141,13 +142,13 @@ Additional structured fields can appear when relevant:
 - `batchFailure` and `batchSteps` for `batch` rendering, including mixed-success runs
 - `navigationSummary` for navigation-style commands like `click`, `back`, `forward`, and `reload`
 - `imagePath` / `imagePaths` for screenshots and batched image outputs
-- `fullOutputPath` / `fullOutputPaths` when large snapshot output is compacted and spilled to a private file; persisted sessions keep that path under a private session-scoped artifact directory with a bounded per-session budget so it survives reload/resume without unbounded growth
+- `fullOutputPath` / `fullOutputPaths` when large snapshot output or other oversized tool output is compacted and spilled to a private file; persisted sessions keep that path under a private session-scoped artifact directory with a bounded per-session budget so it survives reload/resume without unbounded growth
 - `sessionRecoveryHint` when startup-scoped flags need `sessionMode: "fresh"`
 - `inspection: true` plus `stdout` for successful plain-text inspection commands like `--help` and `--version`
 When the tool echoes `args` or `effectiveArgs` back into Pi, sensitive values such as `--headers`, proxy credentials, and auth-bearing URL parameters should be redacted first.
-For oversized snapshots, details should switch to a compact metadata object and include `fullOutputPath` pointing at a private JSON spill file with the full upstream snapshot payload. Persisted sessions should keep that spill file under a private session-scoped artifact directory so the path remains usable after reload/restart, with the oldest persisted spill files evicted as needed to stay within the per-session budget.
+For oversized snapshots and other oversized tool outputs, details should switch to a compact metadata object and include `fullOutputPath` pointing at a private spill file with the full upstream payload. The model-facing tool text should print the actual spill-file path when one exists instead of only saying to inspect a details key. Persisted sessions should keep that spill file under a private session-scoped artifact directory so the path remains usable after reload/restart, with the oldest persisted spill files evicted as needed to stay within the per-session budget.
 ## High-value result rendering
@@ -155,7 +156,8 @@ For oversized snapshots, details should switch to a compact metadata object and
 Worth doing in v1:
 - screenshots → inline image attachment
-- snapshots → origin + ref count + main-content-first compact preview, with the raw snapshot spill path kept in `details.fullOutputPath` when the inline result would otherwise be too large
+- snapshots → origin + ref count + main-content-first compact preview, with the raw snapshot spill path printed directly in content and kept in `details.fullOutputPath` when the inline result would otherwise be too large
+- oversized generic outputs such as large `eval --stdin` payloads → compact preview plus the actual spill file path instead of dumping the whole payload into model context
 - extraction-style commands like `eval --stdin` and `get title` → scalar-first text with lightweight origin context when available
 - navigation actions like `click`, `back`, `forward`, and `reload` → lightweight post-action title/url summary when available
 - tab lists → compact summary/table
@@ -184,6 +186,7 @@ If `agent-browser` is not on `PATH`, fail with a message that:
 - pass explicit `--profile` straight through to upstream `agent-browser`; no profile-cloning or isolation layer is added in v1
 - after profiled `open` / `goto` / `navigate`, if upstream leaves a restored profile tab active instead of the page that was just opened, best-effort switch back to the tab whose URL matches the returned open result before returning control to the agent
 - once the wrapper has a known tab target for a session, later active-tab commands may best-effort pin that tab inside the same upstream invocation so reconnect drift does not send a `click`, `snapshot`, or similar action to a restored/background tab instead
+- after a successful command on a known tab target, the wrapper may best-effort restore that same target again if a restored/background tab steals focus after the command completes
 - on local Unix launches, set a short private socket directory for wrapper-spawned `agent-browser` processes so extension-generated session names do not fail the upstream Unix socket-path length limit in longer cwd/session-name combinations
 - treat successful plain-text inspection commands like `--help` and `--version` as stateless: do not inject the implicit managed session and do not let those calls claim the managed-session slot
 - if startup-scoped flags like `--profile`, `--session-name`, or `--cdp` are supplied after the implicit session is already active while `sessionMode` is `"auto"`, return a validation error with a structured recovery hint that recommends `sessionMode: "fresh"`

package/extensions/agent-browser/index.ts CHANGED Viewed

@@ -65,6 +65,7 @@ const QUICK_START_GUIDELINES = [
 	"Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch and eval --stdin; sessionMode=fresh switches the extension-managed session to a fresh upstream launch when you need new --profile, --session-name, or --cdp state.",
 	"Common first calls: { args: [\"open\", \"https://example.com\"] } then { args: [\"snapshot\", \"-i\"] }; after navigation, use { args: [\"click\", \"@e2\"] } then { args: [\"snapshot\", \"-i\"] }.",
 	"Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, and { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }.",
+	"High-value command reference: download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab.",
 ] as const;
 const BRAVE_SEARCH_PROMPT_GUIDELINE =
 	"When a non-empty BRAVE_API_KEY is available in the current environment, prefer the Brave Search API via bash/curl to discover specific destination URLs, then open the chosen URL with agent_browser instead of browsing a search engine results page just to find the target.";
@@ -74,9 +75,10 @@ const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
 	"Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.",
 	"When using --profile, --session-name, or --cdp, put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.",
 	"If you already used the implicit session and now need startup-scoped flags like --profile, --session-name, or --cdp, retry with sessionMode set to fresh or pass an explicit --session for the new launch. After a successful unnamed fresh launch, later auto calls follow that new session.",
-	"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load, --url, --fn, or --text.",
+	"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load <state>, --url <matcher>, --fn <js>, or --text <matcher>.",
 	"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
 	"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
+	"For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.",
 	"When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
 	"When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel.",
 	"Do not call --help or other exploratory inspection commands unless the user explicitly asks for them or debugging the browser integration is necessary.",
@@ -322,6 +324,7 @@ function extractStringResultField(data: unknown, fieldName: "title" | "url"): st
 }
 const SESSION_TAB_PINNING_EXCLUDED_COMMANDS = new Set(["batch", "close", "goto", "navigate", "open", "session", "tab"]);
+const SESSION_TAB_POST_COMMAND_CORRECTION_EXCLUDED_COMMANDS = new Set(["batch", "close", "session", "tab"]);
 interface SessionTabTarget {
 	title?: string;
@@ -413,6 +416,14 @@ function shouldPinSessionTabForCommand(options: { command?: string; sessionName?
 	);
 }
+function shouldCorrectSessionTabAfterCommand(options: { command?: string; sessionName?: string }): boolean {
+	return (
+		options.sessionName !== undefined &&
+		options.command !== undefined &&
+		!SESSION_TAB_POST_COMMAND_CORRECTION_EXCLUDED_COMMANDS.has(options.command)
+	);
+}
 function selectSessionTargetTab(options: {
 	tabs: Array<{ active?: boolean; index?: number; label?: string; tabId?: string; title?: string; url?: string }>;
 	target: SessionTabTarget;
@@ -918,12 +929,42 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 					}
 				}
+				const observedSessionTabTarget =
+					normalizeSessionTabTarget(navigationSummary) ?? extractSessionTabTargetFromData(presentationEnvelope?.data);
 				const currentSessionTabTarget = deriveSessionTabTarget({
 					command: executionPlan.commandInfo.command,
 					data: presentationEnvelope?.data,
 					navigationSummary,
 					previousTarget: priorSessionTabTarget,
 				});
+				if (
+					succeeded &&
+					priorSessionTabTarget &&
+					!sessionTabCorrection &&
+					observedSessionTabTarget &&
+					shouldCorrectSessionTabAfterCommand({
+						command: executionPlan.commandInfo.command,
+						sessionName: executionPlan.sessionName,
+					})
+				) {
+					const postCommandTabCorrection = await collectSessionTabSelection({
+						cwd: ctx.cwd,
+						sessionName: executionPlan.sessionName,
+						signal,
+						target: observedSessionTabTarget,
+					});
+					if (postCommandTabCorrection) {
+						const appliedPostCommandCorrection = await applyOpenResultTabCorrection({
+							correction: postCommandTabCorrection,
+							cwd: ctx.cwd,
+							sessionName: executionPlan.sessionName,
+							signal,
+						});
+						if (appliedPostCommandCorrection && !sessionTabCorrection) {
+							sessionTabCorrection = appliedPostCommandCorrection;
+						}
+					}
+				}
 				if (executionPlan.sessionName) {
 					if (executionPlan.commandInfo.command === "close" && succeeded) {
 						sessionTabTargets.delete(executionPlan.sessionName);

package/extensions/agent-browser/lib/results/presentation.ts CHANGED Viewed

@@ -10,7 +10,11 @@ import { readFile, stat } from "node:fs/promises";
 import { resolve } from "node:path";
 import { parseCommandInfo, type CommandInfo } from "../runtime.js";
-import { type PersistentSessionArtifactStore } from "../temp.js";
+import {
+	type PersistentSessionArtifactStore,
+	writePersistentSessionArtifactFile,
+	writeSecureTempFile,
+} from "../temp.js";
 import { buildSnapshotPresentation, formatRawSnapshotText, formatSnapshotSummary } from "./snapshot.js";
 import {
 	type AgentBrowserBatchResult,
@@ -19,8 +23,10 @@ import {
 	type BatchStepPresentationDetails,
 	type ToolPresentation,
 	isRecord,
+	countLines,
 	parsePositiveInteger,
 	stringifyUnknown,
+	truncateText,
 } from "./shared.js";
 const IMAGE_EXTENSION_TO_MIME_TYPE: Record<string, string> = {
@@ -35,6 +41,11 @@ const INLINE_IMAGE_MAX_BYTES_ENV = "PI_AGENT_BROWSER_INLINE_IMAGE_MAX_BYTES";
 const DEFAULT_INLINE_IMAGE_MAX_BYTES = 5 * 1_024 * 1_024;
 const NAVIGATION_SUMMARY_COMMANDS = new Set(["back", "click", "dblclick", "forward", "reload"]);
 const NAVIGATION_SUMMARY_FIELD = "navigationSummary";
+const LARGE_OUTPUT_INLINE_MAX_CHARS = 8_000;
+const LARGE_OUTPUT_INLINE_MAX_LINES = 120;
+const LARGE_OUTPUT_PREVIEW_MAX_CHARS = 2_500;
+const LARGE_OUTPUT_PREVIEW_MAX_LINES = 40;
+const LARGE_OUTPUT_FILE_PREFIX = "pi-agent-browser-output";
 interface NavigationSummary {
 	title?: string;
@@ -114,6 +125,19 @@ function getScreenshotSummary(data: Record<string, unknown>): string | undefined
 	return typeof data.path === "string" ? `Saved image: ${data.path}` : undefined;
 }
+function getSavedFileSummary(commandInfo: CommandInfo, data: Record<string, unknown>): string | undefined {
+	if (typeof data.path !== "string") {
+		return undefined;
+	}
+	if (commandInfo.command === "download") {
+		return `Downloaded file: ${data.path}`;
+	}
+	if (commandInfo.command === "pdf") {
+		return `Saved PDF: ${data.path}`;
+	}
+	return undefined;
+}
 function getScalarExtractionResult(data: Record<string, unknown>): string | undefined {
 	const { result } = data;
 	if (typeof result === "string") {
@@ -437,6 +461,10 @@ function formatSummary(commandInfo: CommandInfo, data: unknown): string {
 		if (commandInfo.command === "screenshot" && typeof data.path === "string") {
 			return `Screenshot saved: ${data.path}`;
 		}
+		const savedFileSummary = getSavedFileSummary(commandInfo, data);
+		if (savedFileSummary) {
+			return savedFileSummary;
+		}
 		const extractionSummary = formatExtractionSummary(commandInfo, data);
 		if (extractionSummary) {
 			return extractionSummary;
@@ -490,6 +518,10 @@ function formatContentText(commandInfo: CommandInfo, data: unknown): string {
 		const screenshotSummary = getScreenshotSummary(data);
 		if (screenshotSummary) return screenshotSummary;
 	}
+	const savedFileSummary = getSavedFileSummary(commandInfo, data);
+	if (savedFileSummary) {
+		return savedFileSummary;
+	}
 	const extractionText = formatExtractionText(commandInfo, data);
 	if (extractionText) {
@@ -546,6 +578,115 @@ async function attachInlineImage(presentation: ToolPresentation, imagePath: stri
 	}
 }
+function shouldCompactLargeOutput(text: string): boolean {
+	return text.length > LARGE_OUTPUT_INLINE_MAX_CHARS || countLines(text) > LARGE_OUTPUT_INLINE_MAX_LINES;
+}
+function buildLargeOutputPreview(text: string): { omittedLineCount: number; previewText: string } {
+	const lines = text.split("\n");
+	const previewLines: string[] = [];
+	let previewChars = 0;
+	for (const line of lines) {
+		if (previewLines.length >= LARGE_OUTPUT_PREVIEW_MAX_LINES || previewChars >= LARGE_OUTPUT_PREVIEW_MAX_CHARS) {
+			break;
+		}
+		const remainingChars = LARGE_OUTPUT_PREVIEW_MAX_CHARS - previewChars;
+		const previewLine = truncateText(line, Math.max(40, remainingChars));
+		previewLines.push(previewLine);
+		previewChars += previewLine.length + 1;
+	}
+	return {
+		omittedLineCount: Math.max(0, lines.length - previewLines.length),
+		previewText: previewLines.join("\n"),
+	};
+}
+async function writeLargeOutputSpillFile(options: {
+	data: unknown;
+	persistentArtifactStore?: PersistentSessionArtifactStore;
+	text: string;
+}): Promise<string> {
+	const payload =
+		typeof options.data === "string"
+			? options.data
+			: typeof options.data === "number" || typeof options.data === "boolean"
+				? String(options.data)
+				: options.data === undefined
+					? options.text
+					: stringifyUnknown(options.data);
+	const isStructuredPayload = typeof options.data !== "string" && typeof options.data !== "number" && typeof options.data !== "boolean";
+	const fileOptions = {
+		content: payload,
+		prefix: LARGE_OUTPUT_FILE_PREFIX,
+		suffix: isStructuredPayload ? ".json" : ".txt",
+	};
+	return options.persistentArtifactStore
+		? await writePersistentSessionArtifactFile({ ...fileOptions, store: options.persistentArtifactStore })
+		: await writeSecureTempFile(fileOptions);
+}
+async function compactLargePresentationOutput(options: {
+	commandInfo: CommandInfo;
+	data: unknown;
+	persistentArtifactStore?: PersistentSessionArtifactStore;
+	presentation: ToolPresentation;
+}): Promise<ToolPresentation> {
+	const text = getPresentationText(options.presentation);
+	if (text.length === 0 || !shouldCompactLargeOutput(text)) {
+		return options.presentation;
+	}
+	let fullOutputPath: string | undefined;
+	let spillErrorText: string | undefined;
+	try {
+		fullOutputPath = await writeLargeOutputSpillFile({
+			data: options.data,
+			persistentArtifactStore: options.persistentArtifactStore,
+			text,
+		});
+	} catch (error) {
+		spillErrorText = error instanceof Error ? error.message : String(error);
+	}
+	const { omittedLineCount, previewText } = buildLargeOutputPreview(text);
+	const commandLabel = options.commandInfo.command ?? "agent-browser";
+	const lines = [
+		`Large ${commandLabel} output compacted.`,
+		"",
+		"Preview:",
+		previewText,
+	];
+	if (omittedLineCount > 0) {
+		lines.push(`- ... (${omittedLineCount} additional lines omitted)`);
+	}
+	lines.push(
+		"",
+		fullOutputPath
+			? `Full output path: ${fullOutputPath}`
+			: `Full output unavailable: ${spillErrorText ?? "spill file could not be created."}`,
+	);
+	const firstTextIndex = options.presentation.content.findIndex((part) => part.type === "text");
+	const compactedText = lines.join("\n");
+	if (firstTextIndex >= 0) {
+		options.presentation.content[firstTextIndex] = { type: "text", text: compactedText };
+	} else {
+		options.presentation.content.unshift({ type: "text", text: compactedText });
+	}
+	options.presentation.data = {
+		compacted: true,
+		fullOutputPath,
+		outputCharCount: text.length,
+		outputLineCount: countLines(text),
+		previewCharCount: previewText.length,
+		previewLineCount: countLines(previewText),
+		spillError: spillErrorText,
+	};
+	options.presentation.fullOutputPath = fullOutputPath;
+	options.presentation.summary = `${options.presentation.summary} (compact)`;
+	return options.presentation;
+}
 export async function buildToolPresentation(options: {
 	commandInfo: CommandInfo;
 	cwd: string;
@@ -575,9 +716,11 @@ export async function buildToolPresentation(options: {
 				  };
 	const imagePath = extractImagePath(cwd, data);
-	if (!imagePath) {
-		return presentation;
-	}
-	return await attachInlineImage(presentation, imagePath);
+	const presentationWithImage = imagePath ? await attachInlineImage(presentation, imagePath) : presentation;
+	return await compactLargePresentationOutput({
+		commandInfo,
+		data,
+		persistentArtifactStore,
+		presentation: presentationWithImage,
+	});
 }

package/extensions/agent-browser/lib/results/snapshot.ts CHANGED Viewed

@@ -579,7 +579,7 @@ export async function buildSnapshotPresentation(
 		);
 		if (fallbackPreview.omittedCount > 0) {
 			lines.push(
-				`- ... (${fallbackPreview.omittedCount} additional snapshot lines omitted; ${fullOutputPath ? "use the spill file for everything" : "the full raw snapshot was omitted"})`,
+				`- ... (${fallbackPreview.omittedCount} additional snapshot lines omitted; ${fullOutputPath ? `full output path: ${fullOutputPath}` : "the full raw snapshot was omitted"})`,
 			);
 		}
 	} else {
@@ -614,7 +614,7 @@ export async function buildSnapshotPresentation(
 	lines.push(
 		"",
 		fullOutputPath
-			? "Full raw snapshot path is available in details.fullOutputPath."
+			? `Full raw snapshot path: ${fullOutputPath}`
 			: `Full raw snapshot unavailable: ${spillErrorText ?? "temp spill file could not be created."}`,
 	);

package/extensions/agent-browser/lib/temp.ts CHANGED Viewed

@@ -260,7 +260,7 @@ export async function writeSecureTempChunk(options: {
 	const { content, fileHandle, path } = options;
 	await enqueueTempMutation(async () => {
 		await assertSecureTempRootBudget(dirname(path), getTempArtifactByteLength(content));
-		await fileHandle.writeFile(content);
+		await fileHandle.appendFile(content);
 	});
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-agent-browser-native",
-  "version": "0.2.8",
+  "version": "0.2.10",
   "description": "pi extension that exposes agent-browser as a native tool for browser automation",
   "type": "module",
   "author": "Mitch Fultz (https://github.com/fitchmultz)",
@@ -32,6 +32,7 @@
     "CHANGELOG.md",
     "LICENSE",
     "docs/ARCHITECTURE.md",
+    "docs/COMMAND_REFERENCE.md",
     "docs/RELEASE.md",
     "docs/REQUIREMENTS.md",
     "docs/TOOL_CONTRACT.md"
@@ -45,14 +46,14 @@
     "@sinclair/typebox": "*"
   },
   "devDependencies": {
-    "@mariozechner/pi-coding-agent": "^0.67.4",
+    "@mariozechner/pi-coding-agent": "^0.67.68",
     "@sinclair/typebox": "^0.34.49",
     "@types/node": "^25.6.0",
     "tsx": "^4.21.0",
-    "typescript": "^6.0.2"
+    "typescript": "^6.0.3"
   },
   "overrides": {
-    "basic-ftp": "5.2.2"
+    "basic-ftp": "5.3.0"
   },
   "scripts": {
     "typecheck": "tsc --noEmit",