pi-agent-browser-native 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,26 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## 0.2.9 - 2026-04-17
6
+
7
+ ### Fixed
8
+ - large non-snapshot outputs such as oversized `eval --stdin` payloads now compact inline content, spill the full payload to a private file, and print the actual spill path directly in tool content instead of dumping huge raw output into model context
9
+ - file-save flows now render `download` results as explicit saved-file summaries so agents can see the downloaded path directly
10
+ - when a known target tab stays correct at command start but a restored/background tab steals focus after the command completes, the wrapper now best-effort restores the intended tab before returning control
11
+ - compact snapshot text now prints the actual raw-spill file path directly instead of only referring agents to `details.fullOutputPath`
12
+
13
+ ### Changed
14
+ - added a published `docs/COMMAND_REFERENCE.md` so agents have a repo-readable local command/help surface even when direct `agent-browser` binary usage is blocked
15
+ - expanded tool guidance, README, release notes, and repo guidance with download workflows, better `wait` usage, oversized-output handling, and the documentation-sync rule for upstream `agent-browser` updates
16
+ - clarified the checkout-versus-installed-package workflow in README, release notes, and repo agent guidance so local development keeps one active Pi package source for this extension at a time instead of treating the published entrypoint file as optional
17
+
18
+ ## 0.2.8 - 2026-04-16
19
+
20
+ ### Fixed
21
+ - updated the tab-correction and tab-pinning wrapper paths for `agent-browser` `0.26.0` tab metadata, so profiled launches and follow-up commands now re-select tabs using stable upstream tab ids instead of the retired numeric index shape
22
+ - updated tab-list rendering and tool guidance to show `agent-browser`'s stable tab ids/labels instead of suggesting `tab <n>` commands that no longer work in `0.26.0`
23
+ - extended the narrow ChatGPT/OpenAI headless user-agent compatibility fallback to cover `chat.com`, so `chat.com` redirects reuse the same authenticated headless path as `chatgpt.com`
24
+
5
25
  ## 0.2.7 - 2026-04-16
6
26
 
7
27
  ### Changed
package/README.md CHANGED
@@ -85,7 +85,9 @@ Until you are using a published package release, prefer an explicit checkout-onl
85
85
  pi --no-extensions -e /absolute/path/to/pi-agent-browser-native
86
86
  ```
87
87
 
88
- This avoids duplicate `agent_browser` registrations if you also have the published package installed globally.
88
+ This keeps the checkout isolated from any other active package source for the same extension.
89
+
90
+ This repository's `package.json` is itself a publishable pi package manifest that points at `extensions/agent-browser/index.ts`. That file is the real extension entrypoint for both the checkout and the published package. Keep exactly one active source for this extension in Pi settings at a time: either this checkout path or the published npm package.
89
91
 
90
92
  The native tool exposed to the agent is named `agent_browser`.
91
93
 
@@ -132,6 +134,12 @@ Evaluate page JavaScript via stdin:
132
134
  { "args": ["eval", "--stdin"], "stdin": "document.title" }
133
135
  ```
134
136
 
137
+ Download a file to an explicit path instead of relying on `click` alone:
138
+
139
+ ```json
140
+ { "args": ["download", "@e5", "/tmp/report.pdf"] }
141
+ ```
142
+
135
143
  Start a fresh profiled launch after you already used the implicit session:
136
144
 
137
145
  ```json
@@ -154,17 +162,21 @@ Use the agent_browser tool to open https://react.dev and then take an interactiv
154
162
 
155
163
  ## Local development
156
164
 
157
- Do not track or rely on a repo-local `.pi/extensions/agent-browser.ts` autoload shim for this package. When the package is also installed globally, that creates a duplicate `agent_browser` registration and blocks `pi` startup from this working directory.
165
+ Do not track or rely on a repo-local `.pi/extensions/agent-browser.ts` autoload shim for this package. That creates an unnecessary second registration path.
158
166
 
167
+ The published entrypoint lives at `extensions/agent-browser/index.ts` and is referenced directly from this repo's `package.json`. While developing this repo, keep the checkout path enabled in Pi settings and disable or uninstall `npm:pi-agent-browser-native` so Pi has only one active source for this extension.
168
+
169
+ Recommended local development setup:
159
170
  1. Install `agent-browser` separately via the upstream project.
160
171
  2. Run `npm install`.
161
- 3. Launch `pi` from this repository root with only the checkout extension loaded:
172
+ 3. Keep the checkout path enabled in Pi settings and disable or uninstall `npm:pi-agent-browser-native` while developing this repo.
173
+ 4. Launch `pi` from this repository root with only the checkout extension loaded:
162
174
 
163
175
  ```bash
164
176
  pi --no-extensions -e .
165
177
  ```
166
178
 
167
- 4. Prompt the agent to use `agent_browser`.
179
+ 5. Prompt the agent to use `agent_browser`.
168
180
 
169
181
  Example prompt:
170
182
 
@@ -172,18 +184,22 @@ Example prompt:
172
184
  Use the agent_browser tool to open https://react.dev and then take an interactive snapshot.
173
185
  ```
174
186
 
187
+ For installed-package validation after a release, temporarily do the reverse: disable/remove the checkout path from Pi settings and validate the published npm package, or use an isolated ephemeral run such as `pi --no-extensions -e npm:pi-agent-browser-native@<version>`.
188
+
175
189
  Validated workflow examples:
176
190
 
177
191
  - open a page and snapshot it
178
192
  - click a link and confirm the destination title
179
193
  - use an explicit `--session` across multiple tool calls
180
194
  - use an explicit `--profile` and verify persisted browser storage across restarts
181
- - open `chatgpt.com` headlessly with `--profile Default` without forcing `--headed` or `--auto-connect`
195
+ - open `chat.com` or `chatgpt.com` headlessly with `--profile Default` without forcing `--headed` or `--auto-connect`
182
196
  - verify `/reload` and full restart + `/resume` keep following the same implicit managed browser session
183
197
  - run `batch` with JSON via `stdin`
184
198
  - run `eval --stdin`
185
199
  - take a screenshot with inline attachment support
186
200
  - inspect `agent_browser --help` and `--version` via the tool's stateless plain-text inspection fallback
201
+ - use `download <selector> <path>` for attachment/file-save workflows instead of trying to infer downloads from generic clicks or large eval dumps
202
+ - confirm oversized outputs show the actual spill file path directly in tool content, not just a details key name
187
203
 
188
204
  Inspection commands like `agent_browser --help` and `--version` are always supported. They return plain text, are useful for debugging or capability checks, and stay stateless: the extension does not inject its implicit session for them and they do not consume the managed-session slot needed for a later `--profile`, `--session-name`, or `--cdp` launch.
189
205
 
@@ -193,9 +209,11 @@ Current cautions:
193
209
  - implicit `piab-*` sessions are extension-managed convenience sessions; they stay alive across `pi` shutdown/reload so later default calls can keep following the active managed browser on `/reload` or `/resume`, rely on the configured idle timeout to reduce stale background daemons, store persisted-session large snapshot spill files under a private session-scoped artifact directory with a bounded per-session budget so `details.fullOutputPath` survives reload/resume without unbounded growth, and still clean up process-private temp spill artifacts on shutdown
194
210
  - `sessionMode: "fresh"` without an explicit `--session` rotates that extension-managed session to the new browser so later auto calls keep using it
195
211
  - for local Unix launches, the wrapper uses a short private socket directory under `/tmp` so extension-generated session names do not trip upstream Unix socket-path limits in longer cwd/session-name combinations
196
- - for direct headless local Chrome launches to `chatgpt.com` and `chat.openai.com`, the extension injects a normal Chrome user agent when the caller did not explicitly provide `--user-agent`; this keeps the default headless workflow usable without forcing `--headed` or `--auto-connect`
212
+ - for direct headless local Chrome launches to `chat.com`, `chatgpt.com`, and `chat.openai.com`, the extension injects a normal Chrome user agent when the caller did not explicitly provide `--user-agent`; this keeps the default headless workflow usable without forcing `--headed` or `--auto-connect`
197
213
  - after profiled `open` calls, the extension best-effort re-selects the tab that matches the returned page URL when restored profile tabs steal focus during launch
198
- - after a target tab is known, later active-tab commands like `click` and `snapshot -i` best-effort pin that same tab inside the same upstream invocation when a reconnect would otherwise drift to a restored tab
214
+ - after a target tab is known, later active-tab commands best-effort pin that same tab inside the same upstream invocation when a reconnect would otherwise drift to a restored tab
215
+ - after a successful command, the extension also best-effort restores that intended tab when a restored/background tab steals focus after the command completes
216
+ - oversized snapshots and oversized generic outputs compact inline content and print the actual spill file path directly in the tool result when a spill file exists
199
217
  - explicit caller-provided `--session` values are treated as user-managed and are not auto-closed by the extension
200
218
  - explicit caller-provided `--user-agent` values win over the ChatGPT/OpenAI compatibility workaround
201
219
  - tool progress/details redact sensitive invocation values such as `--headers`, proxy credentials, and auth-bearing URL parameters before echoing them back into Pi
@@ -231,6 +249,7 @@ If you want to name the new upstream session yourself, pass an explicit session
231
249
  - [`docs/REQUIREMENTS.md`](docs/REQUIREMENTS.md) — product requirements and constraints
232
250
  - [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) — current architecture decision
233
251
  - [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md) — proposed v1 tool shape
252
+ - [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md) — local repo-readable command reference for the blocked direct-binary path
234
253
  - [`docs/RELEASE.md`](docs/RELEASE.md) — maintainer release and package verification workflow
235
254
 
236
255
  ## Documentation rule
@@ -240,3 +259,10 @@ When requirements change in chat:
240
259
  1. update `docs/REQUIREMENTS.md`
241
260
  2. update the affected design docs
242
261
  3. update this README if user-facing expectations changed
262
+
263
+ When the upstream `agent-browser` binary changes:
264
+
265
+ 1. re-check the upstream command/help surface
266
+ 2. update `docs/COMMAND_REFERENCE.md`
267
+ 3. update tool guidance, README, and release docs if behavior or recommended usage changed
268
+ 4. verify the blocked direct-binary path still has an equally usable local extension-side documentation path
@@ -89,6 +89,7 @@ Practical policy:
89
89
  - leave explicit caller-provided `--session` choices alone unless the caller closes them explicitly
90
90
  - after profiled `open` / `goto` / `navigate` calls, verify the active tab still matches the returned page URL and best-effort switch back when restored profile tabs steal focus
91
91
  - once the wrapper knows which tab the agent is operating on, later active-tab commands may synthesize a tiny upstream `batch` that re-selects that tab and then runs the requested command in the same upstream invocation; this stays thin while avoiding reconnect-time drift on profile-restored sessions
92
+ - after a successful command on a known tab target, the wrapper may best-effort restore that same target again if restored/background tabs steal focus after the command returns
92
93
  - for local Unix launches, set a short private socket directory so extension-generated session names do not fail on the upstream Unix socket-path length limit
93
94
 
94
95
  This is primarily about ownership clarity and avoiding surprise, not adding a heavy safety wrapper. If the extension invented the session, the extension should own its lifecycle without breaking reload/resume semantics. If the caller explicitly chose the upstream session model, the extension should stay out of the way.
@@ -126,7 +127,7 @@ This keeps the product centered on native tool usage instead of auxiliary skill
126
127
  - compact result summaries
127
128
  - inline screenshots/images
128
129
  - lightweight session convenience
129
- - docs
130
+ - docs, including a repo-readable command reference that mirrors the blocked direct-binary help path closely enough for normal agent work
130
131
 
131
132
  ### Upstream `agent-browser` owns
132
133
 
@@ -0,0 +1,264 @@
1
+ # Agent Browser command reference
2
+
3
+ Related docs:
4
+ - [`../README.md`](../README.md)
5
+ - [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md)
6
+ - [`ARCHITECTURE.md`](ARCHITECTURE.md)
7
+ - [`RELEASE.md`](RELEASE.md)
8
+
9
+ ## Purpose
10
+
11
+ Provide a local, repo-readable command reference for the native `agent_browser` tool.
12
+
13
+ This project intentionally blocks normal `agent-browser` bash usage in most agent sessions, so the agent still needs an accessible local equivalent of the upstream command surface. This document is the durable reference the agent can read inside the repository without calling the binary directly.
14
+
15
+ ## Core mental model
16
+
17
+ Tool parameters:
18
+
19
+ ```json
20
+ {
21
+ "args": ["open", "https://example.com"],
22
+ "stdin": "optional raw stdin content",
23
+ "sessionMode": "auto"
24
+ }
25
+ ```
26
+
27
+ - `args`: exact `agent-browser` CLI tokens after the binary name
28
+ - `stdin`: only for commands like `batch` and `eval --stdin`
29
+ - `sessionMode`:
30
+ - `"auto"` reuse the extension-managed session when possible
31
+ - `"fresh"` rotate that managed session to a fresh upstream launch so startup-scoped flags like `--profile`, `--session-name`, or `--cdp` apply
32
+
33
+ ## Recommended workflow
34
+
35
+ ### Normal browse flow
36
+
37
+ ```json
38
+ { "args": ["open", "https://example.com"] }
39
+ { "args": ["snapshot", "-i"] }
40
+ { "args": ["click", "@e2"] }
41
+ { "args": ["snapshot", "-i"] }
42
+ ```
43
+
44
+ ### Extract page data
45
+
46
+ ```json
47
+ { "args": ["get", "title"] }
48
+ { "args": ["get", "url"] }
49
+ { "args": ["eval", "--stdin"], "stdin": "document.title" }
50
+ ```
51
+
52
+ ### Run a multi-step flow in one browser invocation
53
+
54
+ ```json
55
+ { "args": ["batch"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }
56
+ ```
57
+
58
+ ### Switch from an already-active implicit session to a fresh profiled launch
59
+
60
+ ```json
61
+ {
62
+ "args": ["--profile", "Default", "open", "https://mail.google.com"],
63
+ "sessionMode": "fresh"
64
+ }
65
+ ```
66
+
67
+ ## High-value commands
68
+
69
+ ### Open and navigation
70
+
71
+ - `open <url>`
72
+ - `goto <url>`
73
+ - `navigate <url>`
74
+ - `back`
75
+ - `forward`
76
+ - `reload`
77
+
78
+ Examples:
79
+
80
+ ```json
81
+ { "args": ["open", "https://react.dev"] }
82
+ { "args": ["reload"] }
83
+ ```
84
+
85
+ ### Snapshot and page inspection
86
+
87
+ - `snapshot`
88
+ - `snapshot -i` interactive elements only
89
+ - `snapshot -c` compact tree
90
+ - `snapshot -d <n>` limit depth
91
+ - `snapshot -s <selector>` scope to one subtree
92
+
93
+ Examples:
94
+
95
+ ```json
96
+ { "args": ["snapshot", "-i"] }
97
+ { "args": ["snapshot", "-i", "-s", "main"] }
98
+ ```
99
+
100
+ ### Element interaction
101
+
102
+ - `click <selector-or-@ref>`
103
+ - `dblclick <selector-or-@ref>`
104
+ - `hover <selector-or-@ref>`
105
+ - `focus <selector-or-@ref>`
106
+ - `type <selector-or-@ref> <text>`
107
+ - `fill <selector-or-@ref> <text>`
108
+ - `press <key>`
109
+ - `check <selector-or-@ref>`
110
+ - `uncheck <selector-or-@ref>`
111
+ - `select <selector-or-@ref> <value...>`
112
+ - `drag <src> <dst>`
113
+ - `upload <selector-or-@ref> <files...>`
114
+
115
+ Examples:
116
+
117
+ ```json
118
+ { "args": ["click", "@e12"] }
119
+ { "args": ["fill", "#email", "user@example.com"] }
120
+ { "args": ["press", "Enter"] }
121
+ ```
122
+
123
+ ### Downloads and saved files
124
+
125
+ Use the purpose-built command when a click should save a file.
126
+
127
+ - `download <selector-or-@ref> <path>`
128
+ - `pdf <path>`
129
+ - `screenshot [path]`
130
+
131
+ Examples:
132
+
133
+ ```json
134
+ { "args": ["download", "@e5", "/tmp/report.pdf"] }
135
+ { "args": ["pdf", "/tmp/page.pdf"] }
136
+ { "args": ["screenshot", "/tmp/page.png"] }
137
+ ```
138
+
139
+ Rules:
140
+
141
+ - Prefer `download <selector> <path>` over `click` when the goal is a downloaded file on disk.
142
+ - Prefer explicit output paths when the calling task needs to read, move, or attach the saved file later.
143
+ - Use `--download-path <dir>` on the first launch when many downloads should land in one directory.
144
+
145
+ ### Read page state
146
+
147
+ `get <subcommand>` supports:
148
+
149
+ - `title`
150
+ - `url`
151
+ - `text <selector>`
152
+ - `html <selector>`
153
+ - `value <selector>`
154
+ - `attr <selector> <name>`
155
+ - `count <selector>`
156
+ - `box <selector>`
157
+ - `styles <selector>`
158
+ - `cdp-url`
159
+
160
+ Examples:
161
+
162
+ ```json
163
+ { "args": ["get", "title"] }
164
+ { "args": ["get", "text", "main"] }
165
+ { "args": ["get", "attr", "a.primary", "href"] }
166
+ ```
167
+
168
+ ### JavaScript evaluation
169
+
170
+ - `eval <js>`
171
+ - `eval --stdin` with JavaScript in `stdin`
172
+
173
+ Example:
174
+
175
+ ```json
176
+ { "args": ["eval", "--stdin"], "stdin": "Array.from(document.querySelectorAll('a')).map((a) => a.href)" }
177
+ ```
178
+
179
+ Rules:
180
+
181
+ - Return the intended value instead of relying on `console.log`.
182
+ - Scope DOM queries to the relevant route, component, or element.
183
+ - Prefer `snapshot -i` refs first when the task is interaction-heavy.
184
+
185
+ ### Wait
186
+
187
+ - `wait <ms>`
188
+ - `wait <selector>`
189
+ - use explicit variants like `--load <state>`, `--url <matcher>`, `--fn <js>`, or `--text <matcher>` when needed
190
+
191
+ Important:
192
+
193
+ - bare `wait --load` is incomplete; `--load` needs a state value
194
+
195
+ ### Tabs
196
+
197
+ - `tab list`
198
+ - `tab <tab-id-or-label>`
199
+ - `tab new`
200
+ - `tab close`
201
+
202
+ Examples:
203
+
204
+ ```json
205
+ { "args": ["tab", "list"] }
206
+ { "args": ["tab", "t3"] }
207
+ ```
208
+
209
+ Use this when:
210
+
211
+ - a restored profile tab steals focus
212
+ - an interaction opens a new tab
213
+ - the browser lands on the wrong page unexpectedly
214
+
215
+ ### Batch
216
+
217
+ - `batch`
218
+ - `batch --bail`
219
+
220
+ Example:
221
+
222
+ ```json
223
+ { "args": ["batch", "--bail"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"],[\"click\",\"@e2\"]]" }
224
+ ```
225
+
226
+ ### Session and inspection commands
227
+
228
+ - `session`
229
+ - `session list`
230
+ - `close`
231
+ - `close --all`
232
+ - `--help`
233
+ - `--version`
234
+
235
+ The wrapper keeps `--help` and `--version` stateless so they do not consume the implicit managed-session slot.
236
+
237
+ ## Important global flags
238
+
239
+ - `--profile <name|path>` reuse Chrome profile state
240
+ - `--session <name>` explicit upstream session name
241
+ - `--session-name <name>` upstream saved auth/session state name
242
+ - `--cdp <port-or-url>` connect to an existing browser
243
+ - `--headed` show the browser window
244
+ - `--download-path <dir>` default download directory
245
+ - `--user-agent <ua>` custom user agent
246
+ - `--json` injected by the wrapper automatically for normal tool execution
247
+
248
+ ## Wrapper-specific behavior worth knowing
249
+
250
+ - The extension may keep following one implicit managed session across later tool calls.
251
+ - If startup-scoped flags like `--profile`, `--session-name`, or `--cdp` would be ignored because that implicit session is already active, retry with `sessionMode: "fresh"`.
252
+ - After profiled opens, the wrapper best-effort restores the intended target tab when restored tabs steal focus.
253
+ - After the wrapper knows the intended tab for a session, later commands best-effort keep that tab active so reconnect drift does not silently move the browser to a restored/background tab.
254
+ - Oversized snapshots and oversized generic outputs may be compacted in tool content, with the full raw output written to a spill file path shown directly in the tool result.
255
+
256
+ ## Maintenance rule
257
+
258
+ Whenever the upstream `agent-browser` binary version changes in this project:
259
+
260
+ 1. re-check the upstream command/help surface
261
+ 2. update this local command reference if anything changed
262
+ 3. update tool prompt guidance if the recommended agent workflow changed
263
+ 4. update README and release docs if the user-visible behavior changed
264
+ 5. validate the extension still exposes local documentation that is at least as usable as the blocked direct-binary path for normal agent work
package/docs/RELEASE.md CHANGED
@@ -54,10 +54,12 @@ node scripts/verify-package.mjs --list-files
54
54
  Before publishing, also validate the explicit local-checkout path:
55
55
 
56
56
  1. Install `agent-browser` separately.
57
- 2. Launch `pi --no-extensions -e .` from this repository root.
58
- 3. Confirm the checkout extension loads from `extensions/agent-browser/index.ts`.
59
- 4. Run a smoke prompt that exercises `agent_browser`.
60
- 5. Validate managed-session continuity with both `/reload` and a full restart + `/resume`.
57
+ 2. Make sure Pi has only one active source for this extension during checkout validation.
58
+ 3. Launch `pi --no-extensions -e .` from this repository root.
59
+ 4. Confirm the checkout extension loads from `extensions/agent-browser/index.ts`.
60
+ 5. Run a smoke prompt that exercises `agent_browser`.
61
+ 6. Validate managed-session continuity with both `/reload` and a full restart + `/resume`.
62
+ 7. Re-check local extension-side docs (`README.md`, `docs/COMMAND_REFERENCE.md`, and prompt guidance) if the upstream `agent-browser` version/help surface changed.
61
63
 
62
64
  Example smoke prompt:
63
65
 
@@ -71,6 +73,8 @@ Recommended lifecycle follow-up:
71
73
  2. Run `/reload`, then ask for `snapshot -i` and confirm the same page is still active.
72
74
  3. Exit `pi`, relaunch it against the same session file or use `/resume`, then ask for `snapshot -i` again and confirm the same page is still active.
73
75
  4. Open a large page that compacts its snapshot output and confirm `details.fullOutputPath` still exists after the restart/resume flow.
76
+ 5. Trigger an oversized non-snapshot output (for example a deliberately large `eval --stdin` result) and confirm the tool prints the actual spill file path directly in content instead of only referencing a details key.
77
+ 6. Validate at least one file-download flow with `download <selector> <path>`.
74
78
 
75
79
  ## Post-publish install validation
76
80
 
@@ -81,6 +85,11 @@ pi install npm:pi-agent-browser-native@<version>
81
85
  pi -e npm:pi-agent-browser-native@<version>
82
86
  ```
83
87
 
88
+ For installed-package validation, make sure Pi has only one active source for this extension. The simplest safe paths are either:
89
+
90
+ - temporarily disable/remove the checkout path and then run plain `pi`, or
91
+ - use an isolated ephemeral run such as `pi --no-extensions -e npm:pi-agent-browser-native@<version>`
92
+
84
93
  Then confirm `pi` exposes the native `agent_browser` tool, that a basic `open` + `snapshot -i` flow works, and that `/reload` plus restart/`/resume` keep following the same implicit managed browser session.
85
94
 
86
95
  ## Release notes checklist
@@ -89,6 +98,7 @@ Before publishing:
89
98
 
90
99
  - update `CHANGELOG.md`
91
100
  - confirm README install guidance still leads with the package-first flow
101
+ - confirm `docs/COMMAND_REFERENCE.md` still matches the effective upstream command/help surface used by the wrapper
92
102
  - confirm the explicit local-checkout instructions still work for pre-release validation
93
103
  - rerun `npm run verify:release`
94
104
  - manually exercise `/reload` and full restart + `/resume` continuity in local checkout validation
@@ -64,6 +64,7 @@ Define the product requirements and constraints for `pi-agent-browser-native`.
64
64
  - Someone opening the repo should quickly understand the goal, purpose, install model, and usage.
65
65
  - Documents should read as complete documents, not iterative logs, unless they are explicitly meant to be iterative, such as a changelog.
66
66
  - Requirements, expectations, and durable rules from user conversations should be reflected in the appropriate docs.
67
+ - Because direct-binary usage is commonly blocked in normal agent sessions, the repo must carry a local command reference for the effective `agent_browser` surface and keep it in sync with upstream changes.
67
68
  - Published package contents should include the canonical user-facing docs plus `LICENSE`.
68
69
  - Published package contents should exclude agent-only and superseded docs such as `AGENTS.md`, `docs/v1-tool-contract.md`, and `docs/native-integration-design.md`.
69
70
 
@@ -85,7 +86,7 @@ The design should comfortably support workflows such as:
85
86
  - web research
86
87
  - using browser UIs for other LLMs such as ChatGPT, Grok, Gemini, and Claude
87
88
  - isolated authenticated browser sessions
88
- - headless authenticated ChatGPT/OpenAI browsing without forcing `--headed` or `--auto-connect`
89
+ - headless authenticated `chat.com` / ChatGPT / OpenAI browsing without forcing `--headed` or `--auto-connect`
89
90
  - upstream profile/debug workflows without adding a local profile-cloning layer in this package
90
91
 
91
92
  ## Implications for the implementation
@@ -95,10 +96,12 @@ The design should comfortably support workflows such as:
95
96
  - The wrapper should stay thin, with upstream `agent-browser` remaining the source of truth for command semantics.
96
97
  - User-facing docs belong in `README.md` and the canonical published files under `docs/`.
97
98
  - Agent workflow and deeper testing procedures can stay in `AGENTS.md`, but published docs must not depend on that file being present.
99
+ - When upstream `agent-browser` changes, refresh the local command reference, prompt guidance, and other extension-side docs so agents still have a repo-readable equivalent of the blocked direct-binary help path.
98
100
  - Keep mitigations for legacy-skill coexistence simple; do not add extra moving parts unless observed behavior justifies them.
99
101
  - Prefer narrow, evidence-backed compatibility mitigations over broad stealth layers when a specific upstream site starts rejecting the default headless launch fingerprint.
100
102
  - Preserve the page that a profiled `open` just navigated to; if restored profile tabs steal focus during launch, the wrapper should best-effort switch back to the returned page URL before handing control back to the agent.
101
103
  - Once a tab target is known for a session, later active-tab commands should best-effort pin that same tab inside the same upstream invocation when reconnect drift would otherwise land on a restored/background tab.
104
+ - If a restored/background tab steals focus after a successful command, the wrapper should best-effort restore the intended target tab again before handing control back.
102
105
  - On local Unix launches, extension-generated session names should not fail just because the upstream default socket path is too long; the wrapper should choose a shorter socket directory when needed.
103
106
 
104
107
  ## Open design questions
@@ -4,6 +4,7 @@ Related docs:
4
4
  - [`../README.md`](../README.md)
5
5
  - [`REQUIREMENTS.md`](REQUIREMENTS.md)
6
6
  - [`ARCHITECTURE.md`](ARCHITECTURE.md)
7
+ - [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md)
7
8
 
8
9
  ## V1 tool
9
10
 
@@ -24,7 +25,7 @@ It also keeps the main UX where it belongs: the agent invokes the tool directly
24
25
 
25
26
  The tool guidance should be written for task discovery first, not wrapper implementation first. That means the description should emphasize browser use cases like web research, reading live docs, clicking, filling, screenshots, extraction, and authenticated/profile-based workflows. Low-level wrapper details like `stdin` and exact CLI args belong in the schema and guidelines, not the lead description.
26
27
 
27
- The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <n>` / `snapshot -i` before retrying different URLs or fallback strategies). It should also discourage inventing fixed explicit session names for routine tasks, because those names leak stale browser state across otherwise unrelated `pi` sessions. For read-only browsing tasks, guidance should prefer answering from the current page state first: use the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating into media viewers, detail routes, or other new pages unless the current view lacks the needed information. When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics. When using `eval --stdin` for extraction, return the intended value instead of relying on `console.log` as the primary result channel.
28
+ The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <tab-id-or-label>` / `snapshot -i` before retrying different URLs or fallback strategies). It should also discourage inventing fixed explicit session names for routine tasks, because those names leak stale browser state across otherwise unrelated `pi` sessions. For read-only browsing tasks, guidance should prefer answering from the current page state first: use the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating into media viewers, detail routes, or other new pages unless the current view lacks the needed information. For downloads, guidance should explicitly prefer `download <selector> <path>` over `click` when the goal is a file on disk. When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics. When using `eval --stdin` for extraction, return the intended value instead of relying on `console.log` as the primary result channel. Because the extension blocks normal direct-binary usage in most agent sessions, the repository must also carry a local command reference that stays in sync with the effective tool surface.
28
29
 
29
30
  ## Parameters
30
31
 
@@ -109,7 +110,7 @@ Primary content should be:
109
110
 
110
111
  Examples:
111
112
  - small `snapshot` results should include the actual snapshot text
112
- - oversized `snapshot` results should switch to a compact view that preserves the primary content, nearby sections, and a trimmed set of high-value refs, while exposing the full raw snapshot path via `details.fullOutputPath`
113
+ - oversized `snapshot` results should switch to a compact view that preserves the primary content, nearby sections, and a trimmed set of high-value refs, while exposing the full raw snapshot path directly in the rendered tool text and via `details.fullOutputPath`
113
114
  - successful navigation actions like `click`, `back`, `forward`, and `reload` should include a lightweight post-action title/url summary when the wrapper can address the active session
114
115
  - `tab list` should include a readable tab summary
115
116
  - `screenshot` should include the saved-path summary plus the inline image attachment when available
@@ -141,13 +142,13 @@ Additional structured fields can appear when relevant:
141
142
  - `batchFailure` and `batchSteps` for `batch` rendering, including mixed-success runs
142
143
  - `navigationSummary` for navigation-style commands like `click`, `back`, `forward`, and `reload`
143
144
  - `imagePath` / `imagePaths` for screenshots and batched image outputs
144
- - `fullOutputPath` / `fullOutputPaths` when large snapshot output is compacted and spilled to a private file; persisted sessions keep that path under a private session-scoped artifact directory with a bounded per-session budget so it survives reload/resume without unbounded growth
145
+ - `fullOutputPath` / `fullOutputPaths` when large snapshot output or other oversized tool output is compacted and spilled to a private file; persisted sessions keep that path under a private session-scoped artifact directory with a bounded per-session budget so it survives reload/resume without unbounded growth
145
146
  - `sessionRecoveryHint` when startup-scoped flags need `sessionMode: "fresh"`
146
147
  - `inspection: true` plus `stdout` for successful plain-text inspection commands like `--help` and `--version`
147
148
 
148
149
  When the tool echoes `args` or `effectiveArgs` back into Pi, sensitive values such as `--headers`, proxy credentials, and auth-bearing URL parameters should be redacted first.
149
150
 
150
- For oversized snapshots, details should switch to a compact metadata object and include `fullOutputPath` pointing at a private JSON spill file with the full upstream snapshot payload. Persisted sessions should keep that spill file under a private session-scoped artifact directory so the path remains usable after reload/restart, with the oldest persisted spill files evicted as needed to stay within the per-session budget.
151
+ For oversized snapshots and other oversized tool outputs, details should switch to a compact metadata object and include `fullOutputPath` pointing at a private spill file with the full upstream payload. The model-facing tool text should print the actual spill-file path when one exists instead of only saying to inspect a details key. Persisted sessions should keep that spill file under a private session-scoped artifact directory so the path remains usable after reload/restart, with the oldest persisted spill files evicted as needed to stay within the per-session budget.
151
152
 
152
153
  ## High-value result rendering
153
154
 
@@ -155,7 +156,8 @@ For oversized snapshots, details should switch to a compact metadata object and
155
156
 
156
157
  Worth doing in v1:
157
158
  - screenshots → inline image attachment
158
- - snapshots → origin + ref count + main-content-first compact preview, with the raw snapshot spill path kept in `details.fullOutputPath` when the inline result would otherwise be too large
159
+ - snapshots → origin + ref count + main-content-first compact preview, with the raw snapshot spill path printed directly in content and kept in `details.fullOutputPath` when the inline result would otherwise be too large
160
+ - oversized generic outputs such as large `eval --stdin` payloads → compact preview plus the actual spill file path instead of dumping the whole payload into model context
159
161
  - extraction-style commands like `eval --stdin` and `get title` → scalar-first text with lightweight origin context when available
160
162
  - navigation actions like `click`, `back`, `forward`, and `reload` → lightweight post-action title/url summary when available
161
163
  - tab lists → compact summary/table
@@ -184,10 +186,11 @@ If `agent-browser` is not on `PATH`, fail with a message that:
184
186
  - pass explicit `--profile` straight through to upstream `agent-browser`; no profile-cloning or isolation layer is added in v1
185
187
  - after profiled `open` / `goto` / `navigate`, if upstream leaves a restored profile tab active instead of the page that was just opened, best-effort switch back to the tab whose URL matches the returned open result before returning control to the agent
186
188
  - once the wrapper has a known tab target for a session, later active-tab commands may best-effort pin that tab inside the same upstream invocation so reconnect drift does not send a `click`, `snapshot`, or similar action to a restored/background tab instead
189
+ - after a successful command on a known tab target, the wrapper may best-effort restore that same target again if a restored/background tab steals focus after the command completes
187
190
  - on local Unix launches, set a short private socket directory for wrapper-spawned `agent-browser` processes so extension-generated session names do not fail the upstream Unix socket-path length limit in longer cwd/session-name combinations
188
191
  - treat successful plain-text inspection commands like `--help` and `--version` as stateless: do not inject the implicit managed session and do not let those calls claim the managed-session slot
189
192
  - if startup-scoped flags like `--profile`, `--session-name`, or `--cdp` are supplied after the implicit session is already active while `sessionMode` is `"auto"`, return a validation error with a structured recovery hint that recommends `sessionMode: "fresh"`
190
- - for direct headless local Chrome launches to `chatgpt.com` / `chat.openai.com`, allow a narrow compatibility fallback that injects a normal Chrome `--user-agent` only when the caller did not explicitly provide one and did not choose `--headed`, `--cdp`, `--auto-connect`, or a provider-backed launch
193
+ - for direct headless local Chrome launches to `chat.com` / `chatgpt.com` / `chat.openai.com`, allow a narrow compatibility fallback that injects a normal Chrome `--user-agent` only when the caller did not explicitly provide one and did not choose `--headed`, `--cdp`, `--auto-connect`, or a provider-backed launch
191
194
 
192
195
  ## Non-goals
193
196
 
@@ -65,6 +65,7 @@ const QUICK_START_GUIDELINES = [
65
65
  "Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch and eval --stdin; sessionMode=fresh switches the extension-managed session to a fresh upstream launch when you need new --profile, --session-name, or --cdp state.",
66
66
  "Common first calls: { args: [\"open\", \"https://example.com\"] } then { args: [\"snapshot\", \"-i\"] }; after navigation, use { args: [\"click\", \"@e2\"] } then { args: [\"snapshot\", \"-i\"] }.",
67
67
  "Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, and { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }.",
68
+ "High-value command reference: download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab.",
68
69
  ] as const;
69
70
  const BRAVE_SEARCH_PROMPT_GUIDELINE =
70
71
  "When a non-empty BRAVE_API_KEY is available in the current environment, prefer the Brave Search API via bash/curl to discover specific destination URLs, then open the chosen URL with agent_browser instead of browsing a search engine results page just to find the target.";
@@ -74,9 +75,10 @@ const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
74
75
  "Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.",
75
76
  "When using --profile, --session-name, or --cdp, put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.",
76
77
  "If you already used the implicit session and now need startup-scoped flags like --profile, --session-name, or --cdp, retry with sessionMode set to fresh or pass an explicit --session for the new launch. After a successful unnamed fresh launch, later auto calls follow that new session.",
77
- "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <n> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load, --url, --fn, or --text.",
78
+ "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load <state>, --url <matcher>, --fn <js>, or --text <matcher>.",
78
79
  "For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
79
80
  "For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
81
+ "For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.",
80
82
  "When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
81
83
  "When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel.",
82
84
  "Do not call --help or other exploratory inspection commands unless the user explicitly asks for them or debugging the browser integration is necessary.",
@@ -322,6 +324,7 @@ function extractStringResultField(data: unknown, fieldName: "title" | "url"): st
322
324
  }
323
325
 
324
326
  const SESSION_TAB_PINNING_EXCLUDED_COMMANDS = new Set(["batch", "close", "goto", "navigate", "open", "session", "tab"]);
327
+ const SESSION_TAB_POST_COMMAND_CORRECTION_EXCLUDED_COMMANDS = new Set(["batch", "close", "session", "tab"]);
325
328
 
326
329
  interface SessionTabTarget {
327
330
  title?: string;
@@ -413,26 +416,23 @@ function shouldPinSessionTabForCommand(options: { command?: string; sessionName?
413
416
  );
414
417
  }
415
418
 
419
+ function shouldCorrectSessionTabAfterCommand(options: { command?: string; sessionName?: string }): boolean {
420
+ return (
421
+ options.sessionName !== undefined &&
422
+ options.command !== undefined &&
423
+ !SESSION_TAB_POST_COMMAND_CORRECTION_EXCLUDED_COMMANDS.has(options.command)
424
+ );
425
+ }
426
+
416
427
  function selectSessionTargetTab(options: {
417
- tabs: Array<{ active?: boolean; index?: number; title?: string; url?: string }>;
428
+ tabs: Array<{ active?: boolean; index?: number; label?: string; tabId?: string; title?: string; url?: string }>;
418
429
  target: SessionTabTarget;
419
430
  }): OpenResultTabCorrection | undefined {
420
- const matchingTabs = options.tabs.filter((tab) => normalizeComparableUrl(tab.url) === options.target.url);
421
- if (matchingTabs.length === 0) {
422
- return undefined;
423
- }
424
- const titledMatch =
425
- typeof options.target.title === "string"
426
- ? matchingTabs.find((tab) => tab.title?.trim() === options.target.title)
427
- : undefined;
428
- const selectedTab = titledMatch ?? matchingTabs[0];
429
- return typeof selectedTab.index === "number"
430
- ? {
431
- selectedIndex: selectedTab.index,
432
- targetTitle: options.target.title,
433
- targetUrl: options.target.url,
434
- }
435
- : undefined;
431
+ return chooseOpenResultTabCorrection({
432
+ tabs: options.tabs,
433
+ targetTitle: options.target.title,
434
+ targetUrl: options.target.url,
435
+ });
436
436
  }
437
437
 
438
438
  function deriveSessionTabTarget(options: {
@@ -570,9 +570,11 @@ async function collectOpenResultTabCorrection(options: {
570
570
  if (!isRecord(tabData) || !Array.isArray(tabData.tabs)) {
571
571
  return undefined;
572
572
  }
573
- const tabs = tabData.tabs.filter(isRecord).map((tab) => ({
573
+ const tabs = tabData.tabs.filter(isRecord).map((tab, index) => ({
574
574
  active: tab.active === true,
575
- index: typeof tab.index === "number" ? tab.index : undefined,
575
+ index: typeof tab.index === "number" ? tab.index : index,
576
+ label: typeof tab.label === "string" ? tab.label : undefined,
577
+ tabId: typeof tab.tabId === "string" ? tab.tabId : undefined,
576
578
  title: typeof tab.title === "string" ? tab.title : undefined,
577
579
  url: typeof tab.url === "string" ? tab.url : undefined,
578
580
  }));
@@ -590,9 +592,11 @@ async function collectSessionTabSelection(options: {
590
592
  if (!isRecord(tabData) || !Array.isArray(tabData.tabs)) {
591
593
  return undefined;
592
594
  }
593
- const tabs = tabData.tabs.filter(isRecord).map((tab) => ({
595
+ const tabs = tabData.tabs.filter(isRecord).map((tab, index) => ({
594
596
  active: tab.active === true,
595
- index: typeof tab.index === "number" ? tab.index : undefined,
597
+ index: typeof tab.index === "number" ? tab.index : index,
598
+ label: typeof tab.label === "string" ? tab.label : undefined,
599
+ tabId: typeof tab.tabId === "string" ? tab.tabId : undefined,
596
600
  title: typeof tab.title === "string" ? tab.title : undefined,
597
601
  url: typeof tab.url === "string" ? tab.url : undefined,
598
602
  }));
@@ -607,7 +611,7 @@ async function applyOpenResultTabCorrection(options: {
607
611
  }): Promise<OpenResultTabCorrection | undefined> {
608
612
  const { correction, cwd, sessionName, signal } = options;
609
613
  const result = await runSessionCommandData({
610
- args: ["tab", String(correction.selectedIndex)],
614
+ args: ["tab", correction.selectedTab],
611
615
  cwd,
612
616
  sessionName,
613
617
  signal,
@@ -816,7 +820,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
816
820
  sessionTabCorrection = plannedSessionTabSelection;
817
821
  processArgs = ["--json", "--session", executionPlan.sessionName, "batch"];
818
822
  processStdin = JSON.stringify([
819
- ["tab", String(plannedSessionTabSelection.selectedIndex)],
823
+ ["tab", plannedSessionTabSelection.selectedTab],
820
824
  commandTokens,
821
825
  ...(includePinnedNavigationSummary ? [["get", "title"], ["get", "url"]] : []),
822
826
  ]);
@@ -925,12 +929,42 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
925
929
  }
926
930
  }
927
931
 
932
+ const observedSessionTabTarget =
933
+ normalizeSessionTabTarget(navigationSummary) ?? extractSessionTabTargetFromData(presentationEnvelope?.data);
928
934
  const currentSessionTabTarget = deriveSessionTabTarget({
929
935
  command: executionPlan.commandInfo.command,
930
936
  data: presentationEnvelope?.data,
931
937
  navigationSummary,
932
938
  previousTarget: priorSessionTabTarget,
933
939
  });
940
+ if (
941
+ succeeded &&
942
+ priorSessionTabTarget &&
943
+ !sessionTabCorrection &&
944
+ observedSessionTabTarget &&
945
+ shouldCorrectSessionTabAfterCommand({
946
+ command: executionPlan.commandInfo.command,
947
+ sessionName: executionPlan.sessionName,
948
+ })
949
+ ) {
950
+ const postCommandTabCorrection = await collectSessionTabSelection({
951
+ cwd: ctx.cwd,
952
+ sessionName: executionPlan.sessionName,
953
+ signal,
954
+ target: observedSessionTabTarget,
955
+ });
956
+ if (postCommandTabCorrection) {
957
+ const appliedPostCommandCorrection = await applyOpenResultTabCorrection({
958
+ correction: postCommandTabCorrection,
959
+ cwd: ctx.cwd,
960
+ sessionName: executionPlan.sessionName,
961
+ signal,
962
+ });
963
+ if (appliedPostCommandCorrection && !sessionTabCorrection) {
964
+ sessionTabCorrection = appliedPostCommandCorrection;
965
+ }
966
+ }
967
+ }
934
968
  if (executionPlan.sessionName) {
935
969
  if (executionPlan.commandInfo.command === "close" && succeeded) {
936
970
  sessionTabTargets.delete(executionPlan.sessionName);
@@ -10,7 +10,11 @@ import { readFile, stat } from "node:fs/promises";
10
10
  import { resolve } from "node:path";
11
11
 
12
12
  import { parseCommandInfo, type CommandInfo } from "../runtime.js";
13
- import { type PersistentSessionArtifactStore } from "../temp.js";
13
+ import {
14
+ type PersistentSessionArtifactStore,
15
+ writePersistentSessionArtifactFile,
16
+ writeSecureTempFile,
17
+ } from "../temp.js";
14
18
  import { buildSnapshotPresentation, formatRawSnapshotText, formatSnapshotSummary } from "./snapshot.js";
15
19
  import {
16
20
  type AgentBrowserBatchResult,
@@ -19,8 +23,10 @@ import {
19
23
  type BatchStepPresentationDetails,
20
24
  type ToolPresentation,
21
25
  isRecord,
26
+ countLines,
22
27
  parsePositiveInteger,
23
28
  stringifyUnknown,
29
+ truncateText,
24
30
  } from "./shared.js";
25
31
 
26
32
  const IMAGE_EXTENSION_TO_MIME_TYPE: Record<string, string> = {
@@ -35,6 +41,11 @@ const INLINE_IMAGE_MAX_BYTES_ENV = "PI_AGENT_BROWSER_INLINE_IMAGE_MAX_BYTES";
35
41
  const DEFAULT_INLINE_IMAGE_MAX_BYTES = 5 * 1_024 * 1_024;
36
42
  const NAVIGATION_SUMMARY_COMMANDS = new Set(["back", "click", "dblclick", "forward", "reload"]);
37
43
  const NAVIGATION_SUMMARY_FIELD = "navigationSummary";
44
+ const LARGE_OUTPUT_INLINE_MAX_CHARS = 8_000;
45
+ const LARGE_OUTPUT_INLINE_MAX_LINES = 120;
46
+ const LARGE_OUTPUT_PREVIEW_MAX_CHARS = 2_500;
47
+ const LARGE_OUTPUT_PREVIEW_MAX_LINES = 40;
48
+ const LARGE_OUTPUT_FILE_PREFIX = "pi-agent-browser-output";
38
49
 
39
50
  interface NavigationSummary {
40
51
  title?: string;
@@ -73,8 +84,15 @@ function getTabSummary(data: Record<string, unknown>): string | undefined {
73
84
  const marker = tab.active === true ? "*" : "-";
74
85
  const title = typeof tab.title === "string" ? tab.title : "(untitled)";
75
86
  const url = typeof tab.url === "string" ? tab.url : "(no url)";
76
- const tabIndex = typeof tab.index === "number" ? tab.index : index;
77
- return `${marker} [${tabIndex}] ${title} ${url}`;
87
+ const tabSelector =
88
+ typeof tab.tabId === "string" && tab.tabId.trim().length > 0
89
+ ? tab.tabId.trim()
90
+ : typeof tab.label === "string" && tab.label.trim().length > 0
91
+ ? tab.label.trim()
92
+ : typeof tab.index === "number"
93
+ ? String(tab.index)
94
+ : String(index);
95
+ return `${marker} [${tabSelector}] ${title} — ${url}`;
78
96
  });
79
97
  return lines.join("\n");
80
98
  }
@@ -107,6 +125,19 @@ function getScreenshotSummary(data: Record<string, unknown>): string | undefined
107
125
  return typeof data.path === "string" ? `Saved image: ${data.path}` : undefined;
108
126
  }
109
127
 
128
+ function getSavedFileSummary(commandInfo: CommandInfo, data: Record<string, unknown>): string | undefined {
129
+ if (typeof data.path !== "string") {
130
+ return undefined;
131
+ }
132
+ if (commandInfo.command === "download") {
133
+ return `Downloaded file: ${data.path}`;
134
+ }
135
+ if (commandInfo.command === "pdf") {
136
+ return `Saved PDF: ${data.path}`;
137
+ }
138
+ return undefined;
139
+ }
140
+
110
141
  function getScalarExtractionResult(data: Record<string, unknown>): string | undefined {
111
142
  const { result } = data;
112
143
  if (typeof result === "string") {
@@ -430,6 +461,10 @@ function formatSummary(commandInfo: CommandInfo, data: unknown): string {
430
461
  if (commandInfo.command === "screenshot" && typeof data.path === "string") {
431
462
  return `Screenshot saved: ${data.path}`;
432
463
  }
464
+ const savedFileSummary = getSavedFileSummary(commandInfo, data);
465
+ if (savedFileSummary) {
466
+ return savedFileSummary;
467
+ }
433
468
  const extractionSummary = formatExtractionSummary(commandInfo, data);
434
469
  if (extractionSummary) {
435
470
  return extractionSummary;
@@ -483,6 +518,10 @@ function formatContentText(commandInfo: CommandInfo, data: unknown): string {
483
518
  const screenshotSummary = getScreenshotSummary(data);
484
519
  if (screenshotSummary) return screenshotSummary;
485
520
  }
521
+ const savedFileSummary = getSavedFileSummary(commandInfo, data);
522
+ if (savedFileSummary) {
523
+ return savedFileSummary;
524
+ }
486
525
 
487
526
  const extractionText = formatExtractionText(commandInfo, data);
488
527
  if (extractionText) {
@@ -539,6 +578,115 @@ async function attachInlineImage(presentation: ToolPresentation, imagePath: stri
539
578
  }
540
579
  }
541
580
 
581
+ function shouldCompactLargeOutput(text: string): boolean {
582
+ return text.length > LARGE_OUTPUT_INLINE_MAX_CHARS || countLines(text) > LARGE_OUTPUT_INLINE_MAX_LINES;
583
+ }
584
+
585
+ function buildLargeOutputPreview(text: string): { omittedLineCount: number; previewText: string } {
586
+ const lines = text.split("\n");
587
+ const previewLines: string[] = [];
588
+ let previewChars = 0;
589
+ for (const line of lines) {
590
+ if (previewLines.length >= LARGE_OUTPUT_PREVIEW_MAX_LINES || previewChars >= LARGE_OUTPUT_PREVIEW_MAX_CHARS) {
591
+ break;
592
+ }
593
+ const remainingChars = LARGE_OUTPUT_PREVIEW_MAX_CHARS - previewChars;
594
+ const previewLine = truncateText(line, Math.max(40, remainingChars));
595
+ previewLines.push(previewLine);
596
+ previewChars += previewLine.length + 1;
597
+ }
598
+ return {
599
+ omittedLineCount: Math.max(0, lines.length - previewLines.length),
600
+ previewText: previewLines.join("\n"),
601
+ };
602
+ }
603
+
604
+ async function writeLargeOutputSpillFile(options: {
605
+ data: unknown;
606
+ persistentArtifactStore?: PersistentSessionArtifactStore;
607
+ text: string;
608
+ }): Promise<string> {
609
+ const payload =
610
+ typeof options.data === "string"
611
+ ? options.data
612
+ : typeof options.data === "number" || typeof options.data === "boolean"
613
+ ? String(options.data)
614
+ : options.data === undefined
615
+ ? options.text
616
+ : stringifyUnknown(options.data);
617
+ const isStructuredPayload = typeof options.data !== "string" && typeof options.data !== "number" && typeof options.data !== "boolean";
618
+ const fileOptions = {
619
+ content: payload,
620
+ prefix: LARGE_OUTPUT_FILE_PREFIX,
621
+ suffix: isStructuredPayload ? ".json" : ".txt",
622
+ };
623
+ return options.persistentArtifactStore
624
+ ? await writePersistentSessionArtifactFile({ ...fileOptions, store: options.persistentArtifactStore })
625
+ : await writeSecureTempFile(fileOptions);
626
+ }
627
+
628
+ async function compactLargePresentationOutput(options: {
629
+ commandInfo: CommandInfo;
630
+ data: unknown;
631
+ persistentArtifactStore?: PersistentSessionArtifactStore;
632
+ presentation: ToolPresentation;
633
+ }): Promise<ToolPresentation> {
634
+ const text = getPresentationText(options.presentation);
635
+ if (text.length === 0 || !shouldCompactLargeOutput(text)) {
636
+ return options.presentation;
637
+ }
638
+
639
+ let fullOutputPath: string | undefined;
640
+ let spillErrorText: string | undefined;
641
+ try {
642
+ fullOutputPath = await writeLargeOutputSpillFile({
643
+ data: options.data,
644
+ persistentArtifactStore: options.persistentArtifactStore,
645
+ text,
646
+ });
647
+ } catch (error) {
648
+ spillErrorText = error instanceof Error ? error.message : String(error);
649
+ }
650
+
651
+ const { omittedLineCount, previewText } = buildLargeOutputPreview(text);
652
+ const commandLabel = options.commandInfo.command ?? "agent-browser";
653
+ const lines = [
654
+ `Large ${commandLabel} output compacted.`,
655
+ "",
656
+ "Preview:",
657
+ previewText,
658
+ ];
659
+ if (omittedLineCount > 0) {
660
+ lines.push(`- ... (${omittedLineCount} additional lines omitted)`);
661
+ }
662
+ lines.push(
663
+ "",
664
+ fullOutputPath
665
+ ? `Full output path: ${fullOutputPath}`
666
+ : `Full output unavailable: ${spillErrorText ?? "spill file could not be created."}`,
667
+ );
668
+
669
+ const firstTextIndex = options.presentation.content.findIndex((part) => part.type === "text");
670
+ const compactedText = lines.join("\n");
671
+ if (firstTextIndex >= 0) {
672
+ options.presentation.content[firstTextIndex] = { type: "text", text: compactedText };
673
+ } else {
674
+ options.presentation.content.unshift({ type: "text", text: compactedText });
675
+ }
676
+ options.presentation.data = {
677
+ compacted: true,
678
+ fullOutputPath,
679
+ outputCharCount: text.length,
680
+ outputLineCount: countLines(text),
681
+ previewCharCount: previewText.length,
682
+ previewLineCount: countLines(previewText),
683
+ spillError: spillErrorText,
684
+ };
685
+ options.presentation.fullOutputPath = fullOutputPath;
686
+ options.presentation.summary = `${options.presentation.summary} (compact)`;
687
+ return options.presentation;
688
+ }
689
+
542
690
  export async function buildToolPresentation(options: {
543
691
  commandInfo: CommandInfo;
544
692
  cwd: string;
@@ -568,9 +716,11 @@ export async function buildToolPresentation(options: {
568
716
  };
569
717
 
570
718
  const imagePath = extractImagePath(cwd, data);
571
- if (!imagePath) {
572
- return presentation;
573
- }
574
-
575
- return await attachInlineImage(presentation, imagePath);
719
+ const presentationWithImage = imagePath ? await attachInlineImage(presentation, imagePath) : presentation;
720
+ return await compactLargePresentationOutput({
721
+ commandInfo,
722
+ data,
723
+ persistentArtifactStore,
724
+ presentation: presentationWithImage,
725
+ });
576
726
  }
@@ -579,7 +579,7 @@ export async function buildSnapshotPresentation(
579
579
  );
580
580
  if (fallbackPreview.omittedCount > 0) {
581
581
  lines.push(
582
- `- ... (${fallbackPreview.omittedCount} additional snapshot lines omitted; ${fullOutputPath ? "use the spill file for everything" : "the full raw snapshot was omitted"})`,
582
+ `- ... (${fallbackPreview.omittedCount} additional snapshot lines omitted; ${fullOutputPath ? `full output path: ${fullOutputPath}` : "the full raw snapshot was omitted"})`,
583
583
  );
584
584
  }
585
585
  } else {
@@ -614,7 +614,7 @@ export async function buildSnapshotPresentation(
614
614
  lines.push(
615
615
  "",
616
616
  fullOutputPath
617
- ? "Full raw snapshot path is available in details.fullOutputPath."
617
+ ? `Full raw snapshot path: ${fullOutputPath}`
618
618
  : `Full raw snapshot unavailable: ${spillErrorText ?? "temp spill file could not be created."}`,
619
619
  );
620
620
 
@@ -11,7 +11,7 @@ import { basename } from "node:path";
11
11
 
12
12
  const STARTUP_SCOPED_FLAGS = ["--cdp", "--profile", "--session-name"] as const;
13
13
  const OPEN_COMMANDS = new Set(["goto", "navigate", "open"]);
14
- const OPENAI_HEADLESS_COMPAT_HOSTS = new Set(["chat.openai.com", "chatgpt.com"]);
14
+ const OPENAI_HEADLESS_COMPAT_HOSTS = new Set(["chat.com", "chat.openai.com", "chatgpt.com"]);
15
15
  const BRAVE_API_KEY_ENV = "BRAVE_API_KEY";
16
16
  const AGENT_BROWSER_IDLE_TIMEOUT_ENV = "AGENT_BROWSER_IDLE_TIMEOUT_MS";
17
17
  const IMPLICIT_SESSION_IDLE_TIMEOUT_ENV = "PI_AGENT_BROWSER_IMPLICIT_SESSION_IDLE_TIMEOUT_MS";
@@ -106,7 +106,8 @@ export interface CompatibilityWorkaround {
106
106
  }
107
107
 
108
108
  export interface OpenResultTabCorrection {
109
- selectedIndex: number;
109
+ selectedTab: string;
110
+ selectionKind: "index" | "label" | "tabId";
110
111
  targetTitle?: string;
111
112
  targetUrl: string;
112
113
  }
@@ -537,6 +538,26 @@ function normalizeComparableUrl(url: string): string | undefined {
537
538
  }
538
539
  }
539
540
 
541
+ function normalizeTabSelectionValue(value: string | undefined): string | undefined {
542
+ const normalizedValue = value?.trim();
543
+ return normalizedValue && normalizedValue.length > 0 ? normalizedValue : undefined;
544
+ }
545
+
546
+ function extractTabSelection(tab: { index?: number; label?: string; tabId?: string }): Pick<OpenResultTabCorrection, "selectedTab" | "selectionKind"> | undefined {
547
+ const tabId = normalizeTabSelectionValue(tab.tabId);
548
+ if (tabId) {
549
+ return { selectedTab: tabId, selectionKind: "tabId" };
550
+ }
551
+ const label = normalizeTabSelectionValue(tab.label);
552
+ if (label) {
553
+ return { selectedTab: label, selectionKind: "label" };
554
+ }
555
+ if (typeof tab.index === "number" && Number.isInteger(tab.index) && tab.index >= 0) {
556
+ return { selectedTab: String(tab.index), selectionKind: "index" };
557
+ }
558
+ return undefined;
559
+ }
560
+
540
561
  function parseComparableNavigationUrl(url: string): URL | undefined {
541
562
  try {
542
563
  return new URL(url);
@@ -727,7 +748,7 @@ export function buildExecutionPlan(
727
748
 
728
749
  export function chooseOpenResultTabCorrection(options: {
729
750
  activeTabIndex?: number;
730
- tabs: Array<{ active?: boolean; index?: number; title?: string; url?: string }>;
751
+ tabs: Array<{ active?: boolean; index?: number; label?: string; tabId?: string; title?: string; url?: string }>;
731
752
  targetTitle?: string;
732
753
  targetUrl?: string;
733
754
  }): OpenResultTabCorrection | undefined {
@@ -740,6 +761,8 @@ export function chooseOpenResultTabCorrection(options: {
740
761
  const tabsWithIndices = options.tabs.map((tab, index) => ({
741
762
  ...tab,
742
763
  index: typeof tab.index === "number" ? tab.index : index,
764
+ label: normalizeTabSelectionValue(tab.label),
765
+ tabId: normalizeTabSelectionValue(tab.tabId),
743
766
  }));
744
767
  const activeTab =
745
768
  tabsWithIndices.find((tab) => tab.active === true) ??
@@ -758,13 +781,14 @@ export function chooseOpenResultTabCorrection(options: {
758
781
  ? undefined
759
782
  : matchingTabs.find((tab) => typeof tab.title === "string" && tab.title.trim() === trimmedTargetTitle);
760
783
  const selectedTab = titledMatch ?? matchingTabs[0];
761
- return selectedTab.index === undefined
762
- ? undefined
763
- : {
764
- selectedIndex: selectedTab.index,
784
+ const tabSelection = extractTabSelection(selectedTab);
785
+ return tabSelection
786
+ ? {
787
+ ...tabSelection,
765
788
  targetTitle: trimmedTargetTitle.length > 0 ? trimmedTargetTitle : undefined,
766
789
  targetUrl: normalizedTargetUrl,
767
- };
790
+ }
791
+ : undefined;
768
792
  }
769
793
 
770
794
  export function parseCommandInfo(args: string[]): CommandInfo {
@@ -260,7 +260,7 @@ export async function writeSecureTempChunk(options: {
260
260
  const { content, fileHandle, path } = options;
261
261
  await enqueueTempMutation(async () => {
262
262
  await assertSecureTempRootBudget(dirname(path), getTempArtifactByteLength(content));
263
- await fileHandle.writeFile(content);
263
+ await fileHandle.appendFile(content);
264
264
  });
265
265
  }
266
266
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-agent-browser-native",
3
- "version": "0.2.7",
3
+ "version": "0.2.9",
4
4
  "description": "pi extension that exposes agent-browser as a native tool for browser automation",
5
5
  "type": "module",
6
6
  "author": "Mitch Fultz (https://github.com/fitchmultz)",
@@ -32,6 +32,7 @@
32
32
  "CHANGELOG.md",
33
33
  "LICENSE",
34
34
  "docs/ARCHITECTURE.md",
35
+ "docs/COMMAND_REFERENCE.md",
35
36
  "docs/RELEASE.md",
36
37
  "docs/REQUIREMENTS.md",
37
38
  "docs/TOOL_CONTRACT.md"