pi-agent-browser-native 0.2.8 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,28 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## 0.2.10 - 2026-04-18
6
+
7
+ ### Changed
8
+ - bumped the local pi development baseline to `@mariozechner/pi-coding-agent` `0.67.68` and `typescript` `6.0.3`
9
+ - refreshed the release lockfile against the current stable pi patch line
10
+
11
+ ### Fixed
12
+ - pinned the transitive `basic-ftp` dependency to `5.3.0` to clear the current audit finding during local verification and publish checks
13
+
14
+ ## 0.2.9 - 2026-04-17
15
+
16
+ ### Fixed
17
+ - large non-snapshot outputs such as oversized `eval --stdin` payloads now compact inline content, spill the full payload to a private file, and print the actual spill path directly in tool content instead of dumping huge raw output into model context
18
+ - file-save flows now render `download` results as explicit saved-file summaries so agents can see the downloaded path directly
19
+ - when a known target tab stays correct at command start but a restored/background tab steals focus after the command completes, the wrapper now best-effort restores the intended tab before returning control
20
+ - compact snapshot text now prints the actual raw-spill file path directly instead of only referring agents to `details.fullOutputPath`
21
+
22
+ ### Changed
23
+ - added a published `docs/COMMAND_REFERENCE.md` so agents have a repo-readable local command/help surface even when direct `agent-browser` binary usage is blocked
24
+ - expanded tool guidance, README, release notes, and repo guidance with download workflows, better `wait` usage, oversized-output handling, and the documentation-sync rule for upstream `agent-browser` updates
25
+ - clarified the checkout-versus-installed-package workflow in README, release notes, and repo agent guidance so local development keeps one active Pi package source for this extension at a time instead of treating the published entrypoint file as optional
26
+
5
27
  ## 0.2.8 - 2026-04-16
6
28
 
7
29
  ### Fixed
package/README.md CHANGED
@@ -85,7 +85,9 @@ Until you are using a published package release, prefer an explicit checkout-onl
85
85
  pi --no-extensions -e /absolute/path/to/pi-agent-browser-native
86
86
  ```
87
87
 
88
- This avoids duplicate `agent_browser` registrations if you also have the published package installed globally.
88
+ This keeps the checkout isolated from any other active package source for the same extension.
89
+
90
+ This repository's `package.json` is itself a publishable pi package manifest that points at `extensions/agent-browser/index.ts`. That file is the real extension entrypoint for both the checkout and the published package. Keep exactly one active source for this extension in Pi settings at a time: either this checkout path or the published npm package.
89
91
 
90
92
  The native tool exposed to the agent is named `agent_browser`.
91
93
 
@@ -132,6 +134,12 @@ Evaluate page JavaScript via stdin:
132
134
  { "args": ["eval", "--stdin"], "stdin": "document.title" }
133
135
  ```
134
136
 
137
+ Download a file to an explicit path instead of relying on `click` alone:
138
+
139
+ ```json
140
+ { "args": ["download", "@e5", "/tmp/report.pdf"] }
141
+ ```
142
+
135
143
  Start a fresh profiled launch after you already used the implicit session:
136
144
 
137
145
  ```json
@@ -154,17 +162,21 @@ Use the agent_browser tool to open https://react.dev and then take an interactiv
154
162
 
155
163
  ## Local development
156
164
 
157
- Do not track or rely on a repo-local `.pi/extensions/agent-browser.ts` autoload shim for this package. When the package is also installed globally, that creates a duplicate `agent_browser` registration and blocks `pi` startup from this working directory.
165
+ Do not track or rely on a repo-local `.pi/extensions/agent-browser.ts` autoload shim for this package. That creates an unnecessary second registration path.
158
166
 
167
+ The published entrypoint lives at `extensions/agent-browser/index.ts` and is referenced directly from this repo's `package.json`. While developing this repo, keep the checkout path enabled in Pi settings and disable or uninstall `npm:pi-agent-browser-native` so Pi has only one active source for this extension.
168
+
169
+ Recommended local development setup:
159
170
  1. Install `agent-browser` separately via the upstream project.
160
171
  2. Run `npm install`.
161
- 3. Launch `pi` from this repository root with only the checkout extension loaded:
172
+ 3. Keep the checkout path enabled in Pi settings and disable or uninstall `npm:pi-agent-browser-native` while developing this repo.
173
+ 4. Launch `pi` from this repository root with only the checkout extension loaded:
162
174
 
163
175
  ```bash
164
176
  pi --no-extensions -e .
165
177
  ```
166
178
 
167
- 4. Prompt the agent to use `agent_browser`.
179
+ 5. Prompt the agent to use `agent_browser`.
168
180
 
169
181
  Example prompt:
170
182
 
@@ -172,6 +184,8 @@ Example prompt:
172
184
  Use the agent_browser tool to open https://react.dev and then take an interactive snapshot.
173
185
  ```
174
186
 
187
+ For installed-package validation after a release, temporarily do the reverse: disable/remove the checkout path from Pi settings and validate the published npm package, or use an isolated ephemeral run such as `pi --no-extensions -e npm:pi-agent-browser-native@<version>`.
188
+
175
189
  Validated workflow examples:
176
190
 
177
191
  - open a page and snapshot it
@@ -184,6 +198,8 @@ Validated workflow examples:
184
198
  - run `eval --stdin`
185
199
  - take a screenshot with inline attachment support
186
200
  - inspect `agent_browser --help` and `--version` via the tool's stateless plain-text inspection fallback
201
+ - use `download <selector> <path>` for attachment/file-save workflows instead of trying to infer downloads from generic clicks or large eval dumps
202
+ - confirm oversized outputs show the actual spill file path directly in tool content, not just a details key name
187
203
 
188
204
  Inspection commands like `agent_browser --help` and `--version` are always supported. They return plain text, are useful for debugging or capability checks, and stay stateless: the extension does not inject its implicit session for them and they do not consume the managed-session slot needed for a later `--profile`, `--session-name`, or `--cdp` launch.
189
205
 
@@ -195,7 +211,9 @@ Current cautions:
195
211
  - for local Unix launches, the wrapper uses a short private socket directory under `/tmp` so extension-generated session names do not trip upstream Unix socket-path limits in longer cwd/session-name combinations
196
212
  - for direct headless local Chrome launches to `chat.com`, `chatgpt.com`, and `chat.openai.com`, the extension injects a normal Chrome user agent when the caller did not explicitly provide `--user-agent`; this keeps the default headless workflow usable without forcing `--headed` or `--auto-connect`
197
213
  - after profiled `open` calls, the extension best-effort re-selects the tab that matches the returned page URL when restored profile tabs steal focus during launch
198
- - after a target tab is known, later active-tab commands like `click` and `snapshot -i` best-effort pin that same tab inside the same upstream invocation when a reconnect would otherwise drift to a restored tab
214
+ - after a target tab is known, later active-tab commands best-effort pin that same tab inside the same upstream invocation when a reconnect would otherwise drift to a restored tab
215
+ - after a successful command, the extension also best-effort restores that intended tab when a restored/background tab steals focus after the command completes
216
+ - oversized snapshots and oversized generic outputs compact inline content and print the actual spill file path directly in the tool result when a spill file exists
199
217
  - explicit caller-provided `--session` values are treated as user-managed and are not auto-closed by the extension
200
218
  - explicit caller-provided `--user-agent` values win over the ChatGPT/OpenAI compatibility workaround
201
219
  - tool progress/details redact sensitive invocation values such as `--headers`, proxy credentials, and auth-bearing URL parameters before echoing them back into Pi
@@ -231,6 +249,7 @@ If you want to name the new upstream session yourself, pass an explicit session
231
249
  - [`docs/REQUIREMENTS.md`](docs/REQUIREMENTS.md) — product requirements and constraints
232
250
  - [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) — current architecture decision
233
251
  - [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md) — proposed v1 tool shape
252
+ - [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md) — local repo-readable command reference for the blocked direct-binary path
234
253
  - [`docs/RELEASE.md`](docs/RELEASE.md) — maintainer release and package verification workflow
235
254
 
236
255
  ## Documentation rule
@@ -240,3 +259,10 @@ When requirements change in chat:
240
259
  1. update `docs/REQUIREMENTS.md`
241
260
  2. update the affected design docs
242
261
  3. update this README if user-facing expectations changed
262
+
263
+ When the upstream `agent-browser` binary changes:
264
+
265
+ 1. re-check the upstream command/help surface
266
+ 2. update `docs/COMMAND_REFERENCE.md`
267
+ 3. update tool guidance, README, and release docs if behavior or recommended usage changed
268
+ 4. verify the blocked direct-binary path still has an equally usable local extension-side documentation path
@@ -89,6 +89,7 @@ Practical policy:
89
89
  - leave explicit caller-provided `--session` choices alone unless the caller closes them explicitly
90
90
  - after profiled `open` / `goto` / `navigate` calls, verify the active tab still matches the returned page URL and best-effort switch back when restored profile tabs steal focus
91
91
  - once the wrapper knows which tab the agent is operating on, later active-tab commands may synthesize a tiny upstream `batch` that re-selects that tab and then runs the requested command in the same upstream invocation; this stays thin while avoiding reconnect-time drift on profile-restored sessions
92
+ - after a successful command on a known tab target, the wrapper may best-effort restore that same target again if restored/background tabs steal focus after the command returns
92
93
  - for local Unix launches, set a short private socket directory so extension-generated session names do not fail on the upstream Unix socket-path length limit
93
94
 
94
95
  This is primarily about ownership clarity and avoiding surprise, not adding a heavy safety wrapper. If the extension invented the session, the extension should own its lifecycle without breaking reload/resume semantics. If the caller explicitly chose the upstream session model, the extension should stay out of the way.
@@ -126,7 +127,7 @@ This keeps the product centered on native tool usage instead of auxiliary skill
126
127
  - compact result summaries
127
128
  - inline screenshots/images
128
129
  - lightweight session convenience
129
- - docs
130
+ - docs, including a repo-readable command reference that mirrors the blocked direct-binary help path closely enough for normal agent work
130
131
 
131
132
  ### Upstream `agent-browser` owns
132
133
 
@@ -0,0 +1,264 @@
1
+ # Agent Browser command reference
2
+
3
+ Related docs:
4
+ - [`../README.md`](../README.md)
5
+ - [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md)
6
+ - [`ARCHITECTURE.md`](ARCHITECTURE.md)
7
+ - [`RELEASE.md`](RELEASE.md)
8
+
9
+ ## Purpose
10
+
11
+ Provide a local, repo-readable command reference for the native `agent_browser` tool.
12
+
13
+ This project intentionally blocks normal `agent-browser` bash usage in most agent sessions, so the agent still needs an accessible local equivalent of the upstream command surface. This document is the durable reference the agent can read inside the repository without calling the binary directly.
14
+
15
+ ## Core mental model
16
+
17
+ Tool parameters:
18
+
19
+ ```json
20
+ {
21
+ "args": ["open", "https://example.com"],
22
+ "stdin": "optional raw stdin content",
23
+ "sessionMode": "auto"
24
+ }
25
+ ```
26
+
27
+ - `args`: exact `agent-browser` CLI tokens after the binary name
28
+ - `stdin`: only for commands like `batch` and `eval --stdin`
29
+ - `sessionMode`:
30
+ - `"auto"` reuse the extension-managed session when possible
31
+ - `"fresh"` rotate that managed session to a fresh upstream launch so startup-scoped flags like `--profile`, `--session-name`, or `--cdp` apply
32
+
33
+ ## Recommended workflow
34
+
35
+ ### Normal browse flow
36
+
37
+ ```json
38
+ { "args": ["open", "https://example.com"] }
39
+ { "args": ["snapshot", "-i"] }
40
+ { "args": ["click", "@e2"] }
41
+ { "args": ["snapshot", "-i"] }
42
+ ```
43
+
44
+ ### Extract page data
45
+
46
+ ```json
47
+ { "args": ["get", "title"] }
48
+ { "args": ["get", "url"] }
49
+ { "args": ["eval", "--stdin"], "stdin": "document.title" }
50
+ ```
51
+
52
+ ### Run a multi-step flow in one browser invocation
53
+
54
+ ```json
55
+ { "args": ["batch"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }
56
+ ```
57
+
58
+ ### Switch from an already-active implicit session to a fresh profiled launch
59
+
60
+ ```json
61
+ {
62
+ "args": ["--profile", "Default", "open", "https://mail.google.com"],
63
+ "sessionMode": "fresh"
64
+ }
65
+ ```
66
+
67
+ ## High-value commands
68
+
69
+ ### Open and navigation
70
+
71
+ - `open <url>`
72
+ - `goto <url>`
73
+ - `navigate <url>`
74
+ - `back`
75
+ - `forward`
76
+ - `reload`
77
+
78
+ Examples:
79
+
80
+ ```json
81
+ { "args": ["open", "https://react.dev"] }
82
+ { "args": ["reload"] }
83
+ ```
84
+
85
+ ### Snapshot and page inspection
86
+
87
+ - `snapshot`
88
+ - `snapshot -i` interactive elements only
89
+ - `snapshot -c` compact tree
90
+ - `snapshot -d <n>` limit depth
91
+ - `snapshot -s <selector>` scope to one subtree
92
+
93
+ Examples:
94
+
95
+ ```json
96
+ { "args": ["snapshot", "-i"] }
97
+ { "args": ["snapshot", "-i", "-s", "main"] }
98
+ ```
99
+
100
+ ### Element interaction
101
+
102
+ - `click <selector-or-@ref>`
103
+ - `dblclick <selector-or-@ref>`
104
+ - `hover <selector-or-@ref>`
105
+ - `focus <selector-or-@ref>`
106
+ - `type <selector-or-@ref> <text>`
107
+ - `fill <selector-or-@ref> <text>`
108
+ - `press <key>`
109
+ - `check <selector-or-@ref>`
110
+ - `uncheck <selector-or-@ref>`
111
+ - `select <selector-or-@ref> <value...>`
112
+ - `drag <src> <dst>`
113
+ - `upload <selector-or-@ref> <files...>`
114
+
115
+ Examples:
116
+
117
+ ```json
118
+ { "args": ["click", "@e12"] }
119
+ { "args": ["fill", "#email", "user@example.com"] }
120
+ { "args": ["press", "Enter"] }
121
+ ```
122
+
123
+ ### Downloads and saved files
124
+
125
+ Use the purpose-built command when a click should save a file.
126
+
127
+ - `download <selector-or-@ref> <path>`
128
+ - `pdf <path>`
129
+ - `screenshot [path]`
130
+
131
+ Examples:
132
+
133
+ ```json
134
+ { "args": ["download", "@e5", "/tmp/report.pdf"] }
135
+ { "args": ["pdf", "/tmp/page.pdf"] }
136
+ { "args": ["screenshot", "/tmp/page.png"] }
137
+ ```
138
+
139
+ Rules:
140
+
141
+ - Prefer `download <selector> <path>` over `click` when the goal is a downloaded file on disk.
142
+ - Prefer explicit output paths when the calling task needs to read, move, or attach the saved file later.
143
+ - Use `--download-path <dir>` on the first launch when many downloads should land in one directory.
144
+
145
+ ### Read page state
146
+
147
+ `get <subcommand>` supports:
148
+
149
+ - `title`
150
+ - `url`
151
+ - `text <selector>`
152
+ - `html <selector>`
153
+ - `value <selector>`
154
+ - `attr <selector> <name>`
155
+ - `count <selector>`
156
+ - `box <selector>`
157
+ - `styles <selector>`
158
+ - `cdp-url`
159
+
160
+ Examples:
161
+
162
+ ```json
163
+ { "args": ["get", "title"] }
164
+ { "args": ["get", "text", "main"] }
165
+ { "args": ["get", "attr", "a.primary", "href"] }
166
+ ```
167
+
168
+ ### JavaScript evaluation
169
+
170
+ - `eval <js>`
171
+ - `eval --stdin` with JavaScript in `stdin`
172
+
173
+ Example:
174
+
175
+ ```json
176
+ { "args": ["eval", "--stdin"], "stdin": "Array.from(document.querySelectorAll('a')).map((a) => a.href)" }
177
+ ```
178
+
179
+ Rules:
180
+
181
+ - Return the intended value instead of relying on `console.log`.
182
+ - Scope DOM queries to the relevant route, component, or element.
183
+ - Prefer `snapshot -i` refs first when the task is interaction-heavy.
184
+
185
+ ### Wait
186
+
187
+ - `wait <ms>`
188
+ - `wait <selector>`
189
+ - use explicit variants like `--load <state>`, `--url <matcher>`, `--fn <js>`, or `--text <matcher>` when needed
190
+
191
+ Important:
192
+
193
+ - bare `wait --load` is incomplete; `--load` needs a state value
194
+
195
+ ### Tabs
196
+
197
+ - `tab list`
198
+ - `tab <tab-id-or-label>`
199
+ - `tab new`
200
+ - `tab close`
201
+
202
+ Examples:
203
+
204
+ ```json
205
+ { "args": ["tab", "list"] }
206
+ { "args": ["tab", "t3"] }
207
+ ```
208
+
209
+ Use this when:
210
+
211
+ - a restored profile tab steals focus
212
+ - an interaction opens a new tab
213
+ - the browser lands on the wrong page unexpectedly
214
+
215
+ ### Batch
216
+
217
+ - `batch`
218
+ - `batch --bail`
219
+
220
+ Example:
221
+
222
+ ```json
223
+ { "args": ["batch", "--bail"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"],[\"click\",\"@e2\"]]" }
224
+ ```
225
+
226
+ ### Session and inspection commands
227
+
228
+ - `session`
229
+ - `session list`
230
+ - `close`
231
+ - `close --all`
232
+ - `--help`
233
+ - `--version`
234
+
235
+ The wrapper keeps `--help` and `--version` stateless so they do not consume the implicit managed-session slot.
236
+
237
+ ## Important global flags
238
+
239
+ - `--profile <name|path>` reuse Chrome profile state
240
+ - `--session <name>` explicit upstream session name
241
+ - `--session-name <name>` upstream saved auth/session state name
242
+ - `--cdp <port-or-url>` connect to an existing browser
243
+ - `--headed` show the browser window
244
+ - `--download-path <dir>` default download directory
245
+ - `--user-agent <ua>` custom user agent
246
+ - `--json` injected by the wrapper automatically for normal tool execution
247
+
248
+ ## Wrapper-specific behavior worth knowing
249
+
250
+ - The extension may keep following one implicit managed session across later tool calls.
251
+ - If startup-scoped flags like `--profile`, `--session-name`, or `--cdp` would be ignored because that implicit session is already active, retry with `sessionMode: "fresh"`.
252
+ - After profiled opens, the wrapper best-effort restores the intended target tab when restored tabs steal focus.
253
+ - After the wrapper knows the intended tab for a session, later commands best-effort keep that tab active so reconnect drift does not silently move the browser to a restored/background tab.
254
+ - Oversized snapshots and oversized generic outputs may be compacted in tool content, with the full raw output written to a spill file path shown directly in the tool result.
255
+
256
+ ## Maintenance rule
257
+
258
+ Whenever the upstream `agent-browser` binary version changes in this project:
259
+
260
+ 1. re-check the upstream command/help surface
261
+ 2. update this local command reference if anything changed
262
+ 3. update tool prompt guidance if the recommended agent workflow changed
263
+ 4. update README and release docs if the user-visible behavior changed
264
+ 5. validate the extension still exposes local documentation that is at least as usable as the blocked direct-binary path for normal agent work
package/docs/RELEASE.md CHANGED
@@ -54,10 +54,12 @@ node scripts/verify-package.mjs --list-files
54
54
  Before publishing, also validate the explicit local-checkout path:
55
55
 
56
56
  1. Install `agent-browser` separately.
57
- 2. Launch `pi --no-extensions -e .` from this repository root.
58
- 3. Confirm the checkout extension loads from `extensions/agent-browser/index.ts`.
59
- 4. Run a smoke prompt that exercises `agent_browser`.
60
- 5. Validate managed-session continuity with both `/reload` and a full restart + `/resume`.
57
+ 2. Make sure Pi has only one active source for this extension during checkout validation.
58
+ 3. Launch `pi --no-extensions -e .` from this repository root.
59
+ 4. Confirm the checkout extension loads from `extensions/agent-browser/index.ts`.
60
+ 5. Run a smoke prompt that exercises `agent_browser`.
61
+ 6. Validate managed-session continuity with both `/reload` and a full restart + `/resume`.
62
+ 7. Re-check local extension-side docs (`README.md`, `docs/COMMAND_REFERENCE.md`, and prompt guidance) if the upstream `agent-browser` version/help surface changed.
61
63
 
62
64
  Example smoke prompt:
63
65
 
@@ -71,6 +73,8 @@ Recommended lifecycle follow-up:
71
73
  2. Run `/reload`, then ask for `snapshot -i` and confirm the same page is still active.
72
74
  3. Exit `pi`, relaunch it against the same session file or use `/resume`, then ask for `snapshot -i` again and confirm the same page is still active.
73
75
  4. Open a large page that compacts its snapshot output and confirm `details.fullOutputPath` still exists after the restart/resume flow.
76
+ 5. Trigger an oversized non-snapshot output (for example a deliberately large `eval --stdin` result) and confirm the tool prints the actual spill file path directly in content instead of only referencing a details key.
77
+ 6. Validate at least one file-download flow with `download <selector> <path>`.
74
78
 
75
79
  ## Post-publish install validation
76
80
 
@@ -81,6 +85,11 @@ pi install npm:pi-agent-browser-native@<version>
81
85
  pi -e npm:pi-agent-browser-native@<version>
82
86
  ```
83
87
 
88
+ For installed-package validation, make sure Pi has only one active source for this extension. The simplest safe paths are either:
89
+
90
+ - temporarily disable/remove the checkout path and then run plain `pi`, or
91
+ - use an isolated ephemeral run such as `pi --no-extensions -e npm:pi-agent-browser-native@<version>`
92
+
84
93
  Then confirm `pi` exposes the native `agent_browser` tool, that a basic `open` + `snapshot -i` flow works, and that `/reload` plus restart/`/resume` keep following the same implicit managed browser session.
85
94
 
86
95
  ## Release notes checklist
@@ -89,6 +98,7 @@ Before publishing:
89
98
 
90
99
  - update `CHANGELOG.md`
91
100
  - confirm README install guidance still leads with the package-first flow
101
+ - confirm `docs/COMMAND_REFERENCE.md` still matches the effective upstream command/help surface used by the wrapper
92
102
  - confirm the explicit local-checkout instructions still work for pre-release validation
93
103
  - rerun `npm run verify:release`
94
104
  - manually exercise `/reload` and full restart + `/resume` continuity in local checkout validation
@@ -64,6 +64,7 @@ Define the product requirements and constraints for `pi-agent-browser-native`.
64
64
  - Someone opening the repo should quickly understand the goal, purpose, install model, and usage.
65
65
  - Documents should read as complete documents, not iterative logs, unless they are explicitly meant to be iterative, such as a changelog.
66
66
  - Requirements, expectations, and durable rules from user conversations should be reflected in the appropriate docs.
67
+ - Because direct-binary usage is commonly blocked in normal agent sessions, the repo must carry a local command reference for the effective `agent_browser` surface and keep it in sync with upstream changes.
67
68
  - Published package contents should include the canonical user-facing docs plus `LICENSE`.
68
69
  - Published package contents should exclude agent-only and superseded docs such as `AGENTS.md`, `docs/v1-tool-contract.md`, and `docs/native-integration-design.md`.
69
70
 
@@ -95,10 +96,12 @@ The design should comfortably support workflows such as:
95
96
  - The wrapper should stay thin, with upstream `agent-browser` remaining the source of truth for command semantics.
96
97
  - User-facing docs belong in `README.md` and the canonical published files under `docs/`.
97
98
  - Agent workflow and deeper testing procedures can stay in `AGENTS.md`, but published docs must not depend on that file being present.
99
+ - When upstream `agent-browser` changes, refresh the local command reference, prompt guidance, and other extension-side docs so agents still have a repo-readable equivalent of the blocked direct-binary help path.
98
100
  - Keep mitigations for legacy-skill coexistence simple; do not add extra moving parts unless observed behavior justifies them.
99
101
  - Prefer narrow, evidence-backed compatibility mitigations over broad stealth layers when a specific upstream site starts rejecting the default headless launch fingerprint.
100
102
  - Preserve the page that a profiled `open` just navigated to; if restored profile tabs steal focus during launch, the wrapper should best-effort switch back to the returned page URL before handing control back to the agent.
101
103
  - Once a tab target is known for a session, later active-tab commands should best-effort pin that same tab inside the same upstream invocation when reconnect drift would otherwise land on a restored/background tab.
104
+ - If a restored/background tab steals focus after a successful command, the wrapper should best-effort restore the intended target tab again before handing control back.
102
105
  - On local Unix launches, extension-generated session names should not fail just because the upstream default socket path is too long; the wrapper should choose a shorter socket directory when needed.
103
106
 
104
107
  ## Open design questions
@@ -4,6 +4,7 @@ Related docs:
4
4
  - [`../README.md`](../README.md)
5
5
  - [`REQUIREMENTS.md`](REQUIREMENTS.md)
6
6
  - [`ARCHITECTURE.md`](ARCHITECTURE.md)
7
+ - [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md)
7
8
 
8
9
  ## V1 tool
9
10
 
@@ -24,7 +25,7 @@ It also keeps the main UX where it belongs: the agent invokes the tool directly
24
25
 
25
26
  The tool guidance should be written for task discovery first, not wrapper implementation first. That means the description should emphasize browser use cases like web research, reading live docs, clicking, filling, screenshots, extraction, and authenticated/profile-based workflows. Low-level wrapper details like `stdin` and exact CLI args belong in the schema and guidelines, not the lead description.
26
27
 
27
- The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <tab-id-or-label>` / `snapshot -i` before retrying different URLs or fallback strategies). It should also discourage inventing fixed explicit session names for routine tasks, because those names leak stale browser state across otherwise unrelated `pi` sessions. For read-only browsing tasks, guidance should prefer answering from the current page state first: use the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating into media viewers, detail routes, or other new pages unless the current view lacks the needed information. When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics. When using `eval --stdin` for extraction, return the intended value instead of relying on `console.log` as the primary result channel.
28
+ The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <tab-id-or-label>` / `snapshot -i` before retrying different URLs or fallback strategies). It should also discourage inventing fixed explicit session names for routine tasks, because those names leak stale browser state across otherwise unrelated `pi` sessions. For read-only browsing tasks, guidance should prefer answering from the current page state first: use the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating into media viewers, detail routes, or other new pages unless the current view lacks the needed information. For downloads, guidance should explicitly prefer `download <selector> <path>` over `click` when the goal is a file on disk. When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics. When using `eval --stdin` for extraction, return the intended value instead of relying on `console.log` as the primary result channel. Because the extension blocks normal direct-binary usage in most agent sessions, the repository must also carry a local command reference that stays in sync with the effective tool surface.
28
29
 
29
30
  ## Parameters
30
31
 
@@ -109,7 +110,7 @@ Primary content should be:
109
110
 
110
111
  Examples:
111
112
  - small `snapshot` results should include the actual snapshot text
112
- - oversized `snapshot` results should switch to a compact view that preserves the primary content, nearby sections, and a trimmed set of high-value refs, while exposing the full raw snapshot path via `details.fullOutputPath`
113
+ - oversized `snapshot` results should switch to a compact view that preserves the primary content, nearby sections, and a trimmed set of high-value refs, while exposing the full raw snapshot path directly in the rendered tool text and via `details.fullOutputPath`
113
114
  - successful navigation actions like `click`, `back`, `forward`, and `reload` should include a lightweight post-action title/url summary when the wrapper can address the active session
114
115
  - `tab list` should include a readable tab summary
115
116
  - `screenshot` should include the saved-path summary plus the inline image attachment when available
@@ -141,13 +142,13 @@ Additional structured fields can appear when relevant:
141
142
  - `batchFailure` and `batchSteps` for `batch` rendering, including mixed-success runs
142
143
  - `navigationSummary` for navigation-style commands like `click`, `back`, `forward`, and `reload`
143
144
  - `imagePath` / `imagePaths` for screenshots and batched image outputs
144
- - `fullOutputPath` / `fullOutputPaths` when large snapshot output is compacted and spilled to a private file; persisted sessions keep that path under a private session-scoped artifact directory with a bounded per-session budget so it survives reload/resume without unbounded growth
145
+ - `fullOutputPath` / `fullOutputPaths` when large snapshot output or other oversized tool output is compacted and spilled to a private file; persisted sessions keep that path under a private session-scoped artifact directory with a bounded per-session budget so it survives reload/resume without unbounded growth
145
146
  - `sessionRecoveryHint` when startup-scoped flags need `sessionMode: "fresh"`
146
147
  - `inspection: true` plus `stdout` for successful plain-text inspection commands like `--help` and `--version`
147
148
 
148
149
  When the tool echoes `args` or `effectiveArgs` back into Pi, sensitive values such as `--headers`, proxy credentials, and auth-bearing URL parameters should be redacted first.
149
150
 
150
- For oversized snapshots, details should switch to a compact metadata object and include `fullOutputPath` pointing at a private JSON spill file with the full upstream snapshot payload. Persisted sessions should keep that spill file under a private session-scoped artifact directory so the path remains usable after reload/restart, with the oldest persisted spill files evicted as needed to stay within the per-session budget.
151
+ For oversized snapshots and other oversized tool outputs, details should switch to a compact metadata object and include `fullOutputPath` pointing at a private spill file with the full upstream payload. The model-facing tool text should print the actual spill-file path when one exists instead of only saying to inspect a details key. Persisted sessions should keep that spill file under a private session-scoped artifact directory so the path remains usable after reload/restart, with the oldest persisted spill files evicted as needed to stay within the per-session budget.
151
152
 
152
153
  ## High-value result rendering
153
154
 
@@ -155,7 +156,8 @@ For oversized snapshots, details should switch to a compact metadata object and
155
156
 
156
157
  Worth doing in v1:
157
158
  - screenshots → inline image attachment
158
- - snapshots → origin + ref count + main-content-first compact preview, with the raw snapshot spill path kept in `details.fullOutputPath` when the inline result would otherwise be too large
159
+ - snapshots → origin + ref count + main-content-first compact preview, with the raw snapshot spill path printed directly in content and kept in `details.fullOutputPath` when the inline result would otherwise be too large
160
+ - oversized generic outputs such as large `eval --stdin` payloads → compact preview plus the actual spill file path instead of dumping the whole payload into model context
159
161
  - extraction-style commands like `eval --stdin` and `get title` → scalar-first text with lightweight origin context when available
160
162
  - navigation actions like `click`, `back`, `forward`, and `reload` → lightweight post-action title/url summary when available
161
163
  - tab lists → compact summary/table
@@ -184,6 +186,7 @@ If `agent-browser` is not on `PATH`, fail with a message that:
184
186
  - pass explicit `--profile` straight through to upstream `agent-browser`; no profile-cloning or isolation layer is added in v1
185
187
  - after profiled `open` / `goto` / `navigate`, if upstream leaves a restored profile tab active instead of the page that was just opened, best-effort switch back to the tab whose URL matches the returned open result before returning control to the agent
186
188
  - once the wrapper has a known tab target for a session, later active-tab commands may best-effort pin that tab inside the same upstream invocation so reconnect drift does not send a `click`, `snapshot`, or similar action to a restored/background tab instead
189
+ - after a successful command on a known tab target, the wrapper may best-effort restore that same target again if a restored/background tab steals focus after the command completes
187
190
  - on local Unix launches, set a short private socket directory for wrapper-spawned `agent-browser` processes so extension-generated session names do not fail the upstream Unix socket-path length limit in longer cwd/session-name combinations
188
191
  - treat successful plain-text inspection commands like `--help` and `--version` as stateless: do not inject the implicit managed session and do not let those calls claim the managed-session slot
189
192
  - if startup-scoped flags like `--profile`, `--session-name`, or `--cdp` are supplied after the implicit session is already active while `sessionMode` is `"auto"`, return a validation error with a structured recovery hint that recommends `sessionMode: "fresh"`
@@ -65,6 +65,7 @@ const QUICK_START_GUIDELINES = [
65
65
  "Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch and eval --stdin; sessionMode=fresh switches the extension-managed session to a fresh upstream launch when you need new --profile, --session-name, or --cdp state.",
66
66
  "Common first calls: { args: [\"open\", \"https://example.com\"] } then { args: [\"snapshot\", \"-i\"] }; after navigation, use { args: [\"click\", \"@e2\"] } then { args: [\"snapshot\", \"-i\"] }.",
67
67
  "Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, and { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }.",
68
+ "High-value command reference: download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab.",
68
69
  ] as const;
69
70
  const BRAVE_SEARCH_PROMPT_GUIDELINE =
70
71
  "When a non-empty BRAVE_API_KEY is available in the current environment, prefer the Brave Search API via bash/curl to discover specific destination URLs, then open the chosen URL with agent_browser instead of browsing a search engine results page just to find the target.";
@@ -74,9 +75,10 @@ const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
74
75
  "Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.",
75
76
  "When using --profile, --session-name, or --cdp, put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.",
76
77
  "If you already used the implicit session and now need startup-scoped flags like --profile, --session-name, or --cdp, retry with sessionMode set to fresh or pass an explicit --session for the new launch. After a successful unnamed fresh launch, later auto calls follow that new session.",
77
- "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load, --url, --fn, or --text.",
78
+ "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load <state>, --url <matcher>, --fn <js>, or --text <matcher>.",
78
79
  "For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
79
80
  "For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
81
+ "For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.",
80
82
  "When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
81
83
  "When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel.",
82
84
  "Do not call --help or other exploratory inspection commands unless the user explicitly asks for them or debugging the browser integration is necessary.",
@@ -322,6 +324,7 @@ function extractStringResultField(data: unknown, fieldName: "title" | "url"): st
322
324
  }
323
325
 
324
326
  const SESSION_TAB_PINNING_EXCLUDED_COMMANDS = new Set(["batch", "close", "goto", "navigate", "open", "session", "tab"]);
327
+ const SESSION_TAB_POST_COMMAND_CORRECTION_EXCLUDED_COMMANDS = new Set(["batch", "close", "session", "tab"]);
325
328
 
326
329
  interface SessionTabTarget {
327
330
  title?: string;
@@ -413,6 +416,14 @@ function shouldPinSessionTabForCommand(options: { command?: string; sessionName?
413
416
  );
414
417
  }
415
418
 
419
+ function shouldCorrectSessionTabAfterCommand(options: { command?: string; sessionName?: string }): boolean {
420
+ return (
421
+ options.sessionName !== undefined &&
422
+ options.command !== undefined &&
423
+ !SESSION_TAB_POST_COMMAND_CORRECTION_EXCLUDED_COMMANDS.has(options.command)
424
+ );
425
+ }
426
+
416
427
  function selectSessionTargetTab(options: {
417
428
  tabs: Array<{ active?: boolean; index?: number; label?: string; tabId?: string; title?: string; url?: string }>;
418
429
  target: SessionTabTarget;
@@ -918,12 +929,42 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
918
929
  }
919
930
  }
920
931
 
932
+ const observedSessionTabTarget =
933
+ normalizeSessionTabTarget(navigationSummary) ?? extractSessionTabTargetFromData(presentationEnvelope?.data);
921
934
  const currentSessionTabTarget = deriveSessionTabTarget({
922
935
  command: executionPlan.commandInfo.command,
923
936
  data: presentationEnvelope?.data,
924
937
  navigationSummary,
925
938
  previousTarget: priorSessionTabTarget,
926
939
  });
940
+ if (
941
+ succeeded &&
942
+ priorSessionTabTarget &&
943
+ !sessionTabCorrection &&
944
+ observedSessionTabTarget &&
945
+ shouldCorrectSessionTabAfterCommand({
946
+ command: executionPlan.commandInfo.command,
947
+ sessionName: executionPlan.sessionName,
948
+ })
949
+ ) {
950
+ const postCommandTabCorrection = await collectSessionTabSelection({
951
+ cwd: ctx.cwd,
952
+ sessionName: executionPlan.sessionName,
953
+ signal,
954
+ target: observedSessionTabTarget,
955
+ });
956
+ if (postCommandTabCorrection) {
957
+ const appliedPostCommandCorrection = await applyOpenResultTabCorrection({
958
+ correction: postCommandTabCorrection,
959
+ cwd: ctx.cwd,
960
+ sessionName: executionPlan.sessionName,
961
+ signal,
962
+ });
963
+ if (appliedPostCommandCorrection && !sessionTabCorrection) {
964
+ sessionTabCorrection = appliedPostCommandCorrection;
965
+ }
966
+ }
967
+ }
927
968
  if (executionPlan.sessionName) {
928
969
  if (executionPlan.commandInfo.command === "close" && succeeded) {
929
970
  sessionTabTargets.delete(executionPlan.sessionName);
@@ -10,7 +10,11 @@ import { readFile, stat } from "node:fs/promises";
10
10
  import { resolve } from "node:path";
11
11
 
12
12
  import { parseCommandInfo, type CommandInfo } from "../runtime.js";
13
- import { type PersistentSessionArtifactStore } from "../temp.js";
13
+ import {
14
+ type PersistentSessionArtifactStore,
15
+ writePersistentSessionArtifactFile,
16
+ writeSecureTempFile,
17
+ } from "../temp.js";
14
18
  import { buildSnapshotPresentation, formatRawSnapshotText, formatSnapshotSummary } from "./snapshot.js";
15
19
  import {
16
20
  type AgentBrowserBatchResult,
@@ -19,8 +23,10 @@ import {
19
23
  type BatchStepPresentationDetails,
20
24
  type ToolPresentation,
21
25
  isRecord,
26
+ countLines,
22
27
  parsePositiveInteger,
23
28
  stringifyUnknown,
29
+ truncateText,
24
30
  } from "./shared.js";
25
31
 
26
32
  const IMAGE_EXTENSION_TO_MIME_TYPE: Record<string, string> = {
@@ -35,6 +41,11 @@ const INLINE_IMAGE_MAX_BYTES_ENV = "PI_AGENT_BROWSER_INLINE_IMAGE_MAX_BYTES";
35
41
  const DEFAULT_INLINE_IMAGE_MAX_BYTES = 5 * 1_024 * 1_024;
36
42
  const NAVIGATION_SUMMARY_COMMANDS = new Set(["back", "click", "dblclick", "forward", "reload"]);
37
43
  const NAVIGATION_SUMMARY_FIELD = "navigationSummary";
44
+ const LARGE_OUTPUT_INLINE_MAX_CHARS = 8_000;
45
+ const LARGE_OUTPUT_INLINE_MAX_LINES = 120;
46
+ const LARGE_OUTPUT_PREVIEW_MAX_CHARS = 2_500;
47
+ const LARGE_OUTPUT_PREVIEW_MAX_LINES = 40;
48
+ const LARGE_OUTPUT_FILE_PREFIX = "pi-agent-browser-output";
38
49
 
39
50
  interface NavigationSummary {
40
51
  title?: string;
@@ -114,6 +125,19 @@ function getScreenshotSummary(data: Record<string, unknown>): string | undefined
114
125
  return typeof data.path === "string" ? `Saved image: ${data.path}` : undefined;
115
126
  }
116
127
 
128
+ function getSavedFileSummary(commandInfo: CommandInfo, data: Record<string, unknown>): string | undefined {
129
+ if (typeof data.path !== "string") {
130
+ return undefined;
131
+ }
132
+ if (commandInfo.command === "download") {
133
+ return `Downloaded file: ${data.path}`;
134
+ }
135
+ if (commandInfo.command === "pdf") {
136
+ return `Saved PDF: ${data.path}`;
137
+ }
138
+ return undefined;
139
+ }
140
+
117
141
  function getScalarExtractionResult(data: Record<string, unknown>): string | undefined {
118
142
  const { result } = data;
119
143
  if (typeof result === "string") {
@@ -437,6 +461,10 @@ function formatSummary(commandInfo: CommandInfo, data: unknown): string {
437
461
  if (commandInfo.command === "screenshot" && typeof data.path === "string") {
438
462
  return `Screenshot saved: ${data.path}`;
439
463
  }
464
+ const savedFileSummary = getSavedFileSummary(commandInfo, data);
465
+ if (savedFileSummary) {
466
+ return savedFileSummary;
467
+ }
440
468
  const extractionSummary = formatExtractionSummary(commandInfo, data);
441
469
  if (extractionSummary) {
442
470
  return extractionSummary;
@@ -490,6 +518,10 @@ function formatContentText(commandInfo: CommandInfo, data: unknown): string {
490
518
  const screenshotSummary = getScreenshotSummary(data);
491
519
  if (screenshotSummary) return screenshotSummary;
492
520
  }
521
+ const savedFileSummary = getSavedFileSummary(commandInfo, data);
522
+ if (savedFileSummary) {
523
+ return savedFileSummary;
524
+ }
493
525
 
494
526
  const extractionText = formatExtractionText(commandInfo, data);
495
527
  if (extractionText) {
@@ -546,6 +578,115 @@ async function attachInlineImage(presentation: ToolPresentation, imagePath: stri
546
578
  }
547
579
  }
548
580
 
581
+ function shouldCompactLargeOutput(text: string): boolean {
582
+ return text.length > LARGE_OUTPUT_INLINE_MAX_CHARS || countLines(text) > LARGE_OUTPUT_INLINE_MAX_LINES;
583
+ }
584
+
585
+ function buildLargeOutputPreview(text: string): { omittedLineCount: number; previewText: string } {
586
+ const lines = text.split("\n");
587
+ const previewLines: string[] = [];
588
+ let previewChars = 0;
589
+ for (const line of lines) {
590
+ if (previewLines.length >= LARGE_OUTPUT_PREVIEW_MAX_LINES || previewChars >= LARGE_OUTPUT_PREVIEW_MAX_CHARS) {
591
+ break;
592
+ }
593
+ const remainingChars = LARGE_OUTPUT_PREVIEW_MAX_CHARS - previewChars;
594
+ const previewLine = truncateText(line, Math.max(40, remainingChars));
595
+ previewLines.push(previewLine);
596
+ previewChars += previewLine.length + 1;
597
+ }
598
+ return {
599
+ omittedLineCount: Math.max(0, lines.length - previewLines.length),
600
+ previewText: previewLines.join("\n"),
601
+ };
602
+ }
603
+
604
+ async function writeLargeOutputSpillFile(options: {
605
+ data: unknown;
606
+ persistentArtifactStore?: PersistentSessionArtifactStore;
607
+ text: string;
608
+ }): Promise<string> {
609
+ const payload =
610
+ typeof options.data === "string"
611
+ ? options.data
612
+ : typeof options.data === "number" || typeof options.data === "boolean"
613
+ ? String(options.data)
614
+ : options.data === undefined
615
+ ? options.text
616
+ : stringifyUnknown(options.data);
617
+ const isStructuredPayload = typeof options.data !== "string" && typeof options.data !== "number" && typeof options.data !== "boolean";
618
+ const fileOptions = {
619
+ content: payload,
620
+ prefix: LARGE_OUTPUT_FILE_PREFIX,
621
+ suffix: isStructuredPayload ? ".json" : ".txt",
622
+ };
623
+ return options.persistentArtifactStore
624
+ ? await writePersistentSessionArtifactFile({ ...fileOptions, store: options.persistentArtifactStore })
625
+ : await writeSecureTempFile(fileOptions);
626
+ }
627
+
628
+ async function compactLargePresentationOutput(options: {
629
+ commandInfo: CommandInfo;
630
+ data: unknown;
631
+ persistentArtifactStore?: PersistentSessionArtifactStore;
632
+ presentation: ToolPresentation;
633
+ }): Promise<ToolPresentation> {
634
+ const text = getPresentationText(options.presentation);
635
+ if (text.length === 0 || !shouldCompactLargeOutput(text)) {
636
+ return options.presentation;
637
+ }
638
+
639
+ let fullOutputPath: string | undefined;
640
+ let spillErrorText: string | undefined;
641
+ try {
642
+ fullOutputPath = await writeLargeOutputSpillFile({
643
+ data: options.data,
644
+ persistentArtifactStore: options.persistentArtifactStore,
645
+ text,
646
+ });
647
+ } catch (error) {
648
+ spillErrorText = error instanceof Error ? error.message : String(error);
649
+ }
650
+
651
+ const { omittedLineCount, previewText } = buildLargeOutputPreview(text);
652
+ const commandLabel = options.commandInfo.command ?? "agent-browser";
653
+ const lines = [
654
+ `Large ${commandLabel} output compacted.`,
655
+ "",
656
+ "Preview:",
657
+ previewText,
658
+ ];
659
+ if (omittedLineCount > 0) {
660
+ lines.push(`- ... (${omittedLineCount} additional lines omitted)`);
661
+ }
662
+ lines.push(
663
+ "",
664
+ fullOutputPath
665
+ ? `Full output path: ${fullOutputPath}`
666
+ : `Full output unavailable: ${spillErrorText ?? "spill file could not be created."}`,
667
+ );
668
+
669
+ const firstTextIndex = options.presentation.content.findIndex((part) => part.type === "text");
670
+ const compactedText = lines.join("\n");
671
+ if (firstTextIndex >= 0) {
672
+ options.presentation.content[firstTextIndex] = { type: "text", text: compactedText };
673
+ } else {
674
+ options.presentation.content.unshift({ type: "text", text: compactedText });
675
+ }
676
+ options.presentation.data = {
677
+ compacted: true,
678
+ fullOutputPath,
679
+ outputCharCount: text.length,
680
+ outputLineCount: countLines(text),
681
+ previewCharCount: previewText.length,
682
+ previewLineCount: countLines(previewText),
683
+ spillError: spillErrorText,
684
+ };
685
+ options.presentation.fullOutputPath = fullOutputPath;
686
+ options.presentation.summary = `${options.presentation.summary} (compact)`;
687
+ return options.presentation;
688
+ }
689
+
549
690
  export async function buildToolPresentation(options: {
550
691
  commandInfo: CommandInfo;
551
692
  cwd: string;
@@ -575,9 +716,11 @@ export async function buildToolPresentation(options: {
575
716
  };
576
717
 
577
718
  const imagePath = extractImagePath(cwd, data);
578
- if (!imagePath) {
579
- return presentation;
580
- }
581
-
582
- return await attachInlineImage(presentation, imagePath);
719
+ const presentationWithImage = imagePath ? await attachInlineImage(presentation, imagePath) : presentation;
720
+ return await compactLargePresentationOutput({
721
+ commandInfo,
722
+ data,
723
+ persistentArtifactStore,
724
+ presentation: presentationWithImage,
725
+ });
583
726
  }
@@ -579,7 +579,7 @@ export async function buildSnapshotPresentation(
579
579
  );
580
580
  if (fallbackPreview.omittedCount > 0) {
581
581
  lines.push(
582
- `- ... (${fallbackPreview.omittedCount} additional snapshot lines omitted; ${fullOutputPath ? "use the spill file for everything" : "the full raw snapshot was omitted"})`,
582
+ `- ... (${fallbackPreview.omittedCount} additional snapshot lines omitted; ${fullOutputPath ? `full output path: ${fullOutputPath}` : "the full raw snapshot was omitted"})`,
583
583
  );
584
584
  }
585
585
  } else {
@@ -614,7 +614,7 @@ export async function buildSnapshotPresentation(
614
614
  lines.push(
615
615
  "",
616
616
  fullOutputPath
617
- ? "Full raw snapshot path is available in details.fullOutputPath."
617
+ ? `Full raw snapshot path: ${fullOutputPath}`
618
618
  : `Full raw snapshot unavailable: ${spillErrorText ?? "temp spill file could not be created."}`,
619
619
  );
620
620
 
@@ -260,7 +260,7 @@ export async function writeSecureTempChunk(options: {
260
260
  const { content, fileHandle, path } = options;
261
261
  await enqueueTempMutation(async () => {
262
262
  await assertSecureTempRootBudget(dirname(path), getTempArtifactByteLength(content));
263
- await fileHandle.writeFile(content);
263
+ await fileHandle.appendFile(content);
264
264
  });
265
265
  }
266
266
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-agent-browser-native",
3
- "version": "0.2.8",
3
+ "version": "0.2.10",
4
4
  "description": "pi extension that exposes agent-browser as a native tool for browser automation",
5
5
  "type": "module",
6
6
  "author": "Mitch Fultz (https://github.com/fitchmultz)",
@@ -32,6 +32,7 @@
32
32
  "CHANGELOG.md",
33
33
  "LICENSE",
34
34
  "docs/ARCHITECTURE.md",
35
+ "docs/COMMAND_REFERENCE.md",
35
36
  "docs/RELEASE.md",
36
37
  "docs/REQUIREMENTS.md",
37
38
  "docs/TOOL_CONTRACT.md"
@@ -45,14 +46,14 @@
45
46
  "@sinclair/typebox": "*"
46
47
  },
47
48
  "devDependencies": {
48
- "@mariozechner/pi-coding-agent": "^0.67.4",
49
+ "@mariozechner/pi-coding-agent": "^0.67.68",
49
50
  "@sinclair/typebox": "^0.34.49",
50
51
  "@types/node": "^25.6.0",
51
52
  "tsx": "^4.21.0",
52
- "typescript": "^6.0.2"
53
+ "typescript": "^6.0.3"
53
54
  },
54
55
  "overrides": {
55
- "basic-ftp": "5.2.2"
56
+ "basic-ftp": "5.3.0"
56
57
  },
57
58
  "scripts": {
58
59
  "typecheck": "tsc --noEmit",