pi-agent-browser-native 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/README.md +33 -7
- package/docs/ARCHITECTURE.md +2 -1
- package/docs/COMMAND_REFERENCE.md +264 -0
- package/docs/RELEASE.md +14 -4
- package/docs/REQUIREMENTS.md +4 -1
- package/docs/TOOL_CONTRACT.md +9 -6
- package/extensions/agent-browser/index.ts +58 -24
- package/extensions/agent-browser/lib/results/presentation.ts +158 -8
- package/extensions/agent-browser/lib/results/snapshot.ts +2 -2
- package/extensions/agent-browser/lib/runtime.ts +32 -8
- package/extensions/agent-browser/lib/temp.ts +1 -1
- package/package.json +2 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,26 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
## 0.2.9 - 2026-04-17
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- large non-snapshot outputs such as oversized `eval --stdin` payloads now compact inline content, spill the full payload to a private file, and print the actual spill path directly in tool content instead of dumping huge raw output into model context
|
|
9
|
+
- file-save flows now render `download` results as explicit saved-file summaries so agents can see the downloaded path directly
|
|
10
|
+
- when a known target tab stays correct at command start but a restored/background tab steals focus after the command completes, the wrapper now best-effort restores the intended tab before returning control
|
|
11
|
+
- compact snapshot text now prints the actual raw-spill file path directly instead of only referring agents to `details.fullOutputPath`
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
- added a published `docs/COMMAND_REFERENCE.md` so agents have a repo-readable local command/help surface even when direct `agent-browser` binary usage is blocked
|
|
15
|
+
- expanded tool guidance, README, release notes, and repo guidance with download workflows, better `wait` usage, oversized-output handling, and the documentation-sync rule for upstream `agent-browser` updates
|
|
16
|
+
- clarified the checkout-versus-installed-package workflow in README, release notes, and repo agent guidance so local development keeps one active Pi package source for this extension at a time instead of treating the published entrypoint file as optional
|
|
17
|
+
|
|
18
|
+
## 0.2.8 - 2026-04-16
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
- updated the tab-correction and tab-pinning wrapper paths for `agent-browser` `0.26.0` tab metadata, so profiled launches and follow-up commands now re-select tabs using stable upstream tab ids instead of the retired numeric index shape
|
|
22
|
+
- updated tab-list rendering and tool guidance to show `agent-browser`'s stable tab ids/labels instead of suggesting `tab <n>` commands that no longer work in `0.26.0`
|
|
23
|
+
- extended the narrow ChatGPT/OpenAI headless user-agent compatibility fallback to cover `chat.com`, so `chat.com` redirects reuse the same authenticated headless path as `chatgpt.com`
|
|
24
|
+
|
|
5
25
|
## 0.2.7 - 2026-04-16
|
|
6
26
|
|
|
7
27
|
### Changed
|
package/README.md
CHANGED
|
@@ -85,7 +85,9 @@ Until you are using a published package release, prefer an explicit checkout-onl
|
|
|
85
85
|
pi --no-extensions -e /absolute/path/to/pi-agent-browser-native
|
|
86
86
|
```
|
|
87
87
|
|
|
88
|
-
This
|
|
88
|
+
This keeps the checkout isolated from any other active package source for the same extension.
|
|
89
|
+
|
|
90
|
+
This repository's `package.json` is itself a publishable pi package manifest that points at `extensions/agent-browser/index.ts`. That file is the real extension entrypoint for both the checkout and the published package. Keep exactly one active source for this extension in Pi settings at a time: either this checkout path or the published npm package.
|
|
89
91
|
|
|
90
92
|
The native tool exposed to the agent is named `agent_browser`.
|
|
91
93
|
|
|
@@ -132,6 +134,12 @@ Evaluate page JavaScript via stdin:
|
|
|
132
134
|
{ "args": ["eval", "--stdin"], "stdin": "document.title" }
|
|
133
135
|
```
|
|
134
136
|
|
|
137
|
+
Download a file to an explicit path instead of relying on `click` alone:
|
|
138
|
+
|
|
139
|
+
```json
|
|
140
|
+
{ "args": ["download", "@e5", "/tmp/report.pdf"] }
|
|
141
|
+
```
|
|
142
|
+
|
|
135
143
|
Start a fresh profiled launch after you already used the implicit session:
|
|
136
144
|
|
|
137
145
|
```json
|
|
@@ -154,17 +162,21 @@ Use the agent_browser tool to open https://react.dev and then take an interactiv
|
|
|
154
162
|
|
|
155
163
|
## Local development
|
|
156
164
|
|
|
157
|
-
Do not track or rely on a repo-local `.pi/extensions/agent-browser.ts` autoload shim for this package.
|
|
165
|
+
Do not track or rely on a repo-local `.pi/extensions/agent-browser.ts` autoload shim for this package. That creates an unnecessary second registration path.
|
|
158
166
|
|
|
167
|
+
The published entrypoint lives at `extensions/agent-browser/index.ts` and is referenced directly from this repo's `package.json`. While developing this repo, keep the checkout path enabled in Pi settings and disable or uninstall `npm:pi-agent-browser-native` so Pi has only one active source for this extension.
|
|
168
|
+
|
|
169
|
+
Recommended local development setup:
|
|
159
170
|
1. Install `agent-browser` separately via the upstream project.
|
|
160
171
|
2. Run `npm install`.
|
|
161
|
-
3.
|
|
172
|
+
3. Keep the checkout path enabled in Pi settings and disable or uninstall `npm:pi-agent-browser-native` while developing this repo.
|
|
173
|
+
4. Launch `pi` from this repository root with only the checkout extension loaded:
|
|
162
174
|
|
|
163
175
|
```bash
|
|
164
176
|
pi --no-extensions -e .
|
|
165
177
|
```
|
|
166
178
|
|
|
167
|
-
|
|
179
|
+
5. Prompt the agent to use `agent_browser`.
|
|
168
180
|
|
|
169
181
|
Example prompt:
|
|
170
182
|
|
|
@@ -172,18 +184,22 @@ Example prompt:
|
|
|
172
184
|
Use the agent_browser tool to open https://react.dev and then take an interactive snapshot.
|
|
173
185
|
```
|
|
174
186
|
|
|
187
|
+
For installed-package validation after a release, temporarily do the reverse: disable/remove the checkout path from Pi settings and validate the published npm package, or use an isolated ephemeral run such as `pi --no-extensions -e npm:pi-agent-browser-native@<version>`.
|
|
188
|
+
|
|
175
189
|
Validated workflow examples:
|
|
176
190
|
|
|
177
191
|
- open a page and snapshot it
|
|
178
192
|
- click a link and confirm the destination title
|
|
179
193
|
- use an explicit `--session` across multiple tool calls
|
|
180
194
|
- use an explicit `--profile` and verify persisted browser storage across restarts
|
|
181
|
-
- open `chatgpt.com` headlessly with `--profile Default` without forcing `--headed` or `--auto-connect`
|
|
195
|
+
- open `chat.com` or `chatgpt.com` headlessly with `--profile Default` without forcing `--headed` or `--auto-connect`
|
|
182
196
|
- verify `/reload` and full restart + `/resume` keep following the same implicit managed browser session
|
|
183
197
|
- run `batch` with JSON via `stdin`
|
|
184
198
|
- run `eval --stdin`
|
|
185
199
|
- take a screenshot with inline attachment support
|
|
186
200
|
- inspect `agent_browser --help` and `--version` via the tool's stateless plain-text inspection fallback
|
|
201
|
+
- use `download <selector> <path>` for attachment/file-save workflows instead of trying to infer downloads from generic clicks or large eval dumps
|
|
202
|
+
- confirm oversized outputs show the actual spill file path directly in tool content, not just a details key name
|
|
187
203
|
|
|
188
204
|
Inspection commands like `agent_browser --help` and `--version` are always supported. They return plain text, are useful for debugging or capability checks, and stay stateless: the extension does not inject its implicit session for them and they do not consume the managed-session slot needed for a later `--profile`, `--session-name`, or `--cdp` launch.
|
|
189
205
|
|
|
@@ -193,9 +209,11 @@ Current cautions:
|
|
|
193
209
|
- implicit `piab-*` sessions are extension-managed convenience sessions; they stay alive across `pi` shutdown/reload so later default calls can keep following the active managed browser on `/reload` or `/resume`, rely on the configured idle timeout to reduce stale background daemons, store persisted-session large snapshot spill files under a private session-scoped artifact directory with a bounded per-session budget so `details.fullOutputPath` survives reload/resume without unbounded growth, and still clean up process-private temp spill artifacts on shutdown
|
|
194
210
|
- `sessionMode: "fresh"` without an explicit `--session` rotates that extension-managed session to the new browser so later auto calls keep using it
|
|
195
211
|
- for local Unix launches, the wrapper uses a short private socket directory under `/tmp` so extension-generated session names do not trip upstream Unix socket-path limits in longer cwd/session-name combinations
|
|
196
|
-
- for direct headless local Chrome launches to `chatgpt.com
|
|
212
|
+
- for direct headless local Chrome launches to `chat.com`, `chatgpt.com`, and `chat.openai.com`, the extension injects a normal Chrome user agent when the caller did not explicitly provide `--user-agent`; this keeps the default headless workflow usable without forcing `--headed` or `--auto-connect`
|
|
197
213
|
- after profiled `open` calls, the extension best-effort re-selects the tab that matches the returned page URL when restored profile tabs steal focus during launch
|
|
198
|
-
- after a target tab is known, later active-tab commands
|
|
214
|
+
- after a target tab is known, later active-tab commands best-effort pin that same tab inside the same upstream invocation when a reconnect would otherwise drift to a restored tab
|
|
215
|
+
- after a successful command, the extension also best-effort restores that intended tab when a restored/background tab steals focus after the command completes
|
|
216
|
+
- oversized snapshots and oversized generic outputs compact inline content and print the actual spill file path directly in the tool result when a spill file exists
|
|
199
217
|
- explicit caller-provided `--session` values are treated as user-managed and are not auto-closed by the extension
|
|
200
218
|
- explicit caller-provided `--user-agent` values win over the ChatGPT/OpenAI compatibility workaround
|
|
201
219
|
- tool progress/details redact sensitive invocation values such as `--headers`, proxy credentials, and auth-bearing URL parameters before echoing them back into Pi
|
|
@@ -231,6 +249,7 @@ If you want to name the new upstream session yourself, pass an explicit session
|
|
|
231
249
|
- [`docs/REQUIREMENTS.md`](docs/REQUIREMENTS.md) — product requirements and constraints
|
|
232
250
|
- [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) — current architecture decision
|
|
233
251
|
- [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md) — proposed v1 tool shape
|
|
252
|
+
- [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md) — local repo-readable command reference for the blocked direct-binary path
|
|
234
253
|
- [`docs/RELEASE.md`](docs/RELEASE.md) — maintainer release and package verification workflow
|
|
235
254
|
|
|
236
255
|
## Documentation rule
|
|
@@ -240,3 +259,10 @@ When requirements change in chat:
|
|
|
240
259
|
1. update `docs/REQUIREMENTS.md`
|
|
241
260
|
2. update the affected design docs
|
|
242
261
|
3. update this README if user-facing expectations changed
|
|
262
|
+
|
|
263
|
+
When the upstream `agent-browser` binary changes:
|
|
264
|
+
|
|
265
|
+
1. re-check the upstream command/help surface
|
|
266
|
+
2. update `docs/COMMAND_REFERENCE.md`
|
|
267
|
+
3. update tool guidance, README, and release docs if behavior or recommended usage changed
|
|
268
|
+
4. verify the blocked direct-binary path still has an equally usable local extension-side documentation path
|
package/docs/ARCHITECTURE.md
CHANGED
|
@@ -89,6 +89,7 @@ Practical policy:
|
|
|
89
89
|
- leave explicit caller-provided `--session` choices alone unless the caller closes them explicitly
|
|
90
90
|
- after profiled `open` / `goto` / `navigate` calls, verify the active tab still matches the returned page URL and best-effort switch back when restored profile tabs steal focus
|
|
91
91
|
- once the wrapper knows which tab the agent is operating on, later active-tab commands may synthesize a tiny upstream `batch` that re-selects that tab and then runs the requested command in the same upstream invocation; this stays thin while avoiding reconnect-time drift on profile-restored sessions
|
|
92
|
+
- after a successful command on a known tab target, the wrapper may best-effort restore that same target again if restored/background tabs steal focus after the command returns
|
|
92
93
|
- for local Unix launches, set a short private socket directory so extension-generated session names do not fail on the upstream Unix socket-path length limit
|
|
93
94
|
|
|
94
95
|
This is primarily about ownership clarity and avoiding surprise, not adding a heavy safety wrapper. If the extension invented the session, the extension should own its lifecycle without breaking reload/resume semantics. If the caller explicitly chose the upstream session model, the extension should stay out of the way.
|
|
@@ -126,7 +127,7 @@ This keeps the product centered on native tool usage instead of auxiliary skill
|
|
|
126
127
|
- compact result summaries
|
|
127
128
|
- inline screenshots/images
|
|
128
129
|
- lightweight session convenience
|
|
129
|
-
- docs
|
|
130
|
+
- docs, including a repo-readable command reference that mirrors the blocked direct-binary help path closely enough for normal agent work
|
|
130
131
|
|
|
131
132
|
### Upstream `agent-browser` owns
|
|
132
133
|
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
# Agent Browser command reference
|
|
2
|
+
|
|
3
|
+
Related docs:
|
|
4
|
+
- [`../README.md`](../README.md)
|
|
5
|
+
- [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md)
|
|
6
|
+
- [`ARCHITECTURE.md`](ARCHITECTURE.md)
|
|
7
|
+
- [`RELEASE.md`](RELEASE.md)
|
|
8
|
+
|
|
9
|
+
## Purpose
|
|
10
|
+
|
|
11
|
+
Provide a local, repo-readable command reference for the native `agent_browser` tool.
|
|
12
|
+
|
|
13
|
+
This project intentionally blocks normal `agent-browser` bash usage in most agent sessions, so the agent still needs an accessible local equivalent of the upstream command surface. This document is the durable reference the agent can read inside the repository without calling the binary directly.
|
|
14
|
+
|
|
15
|
+
## Core mental model
|
|
16
|
+
|
|
17
|
+
Tool parameters:
|
|
18
|
+
|
|
19
|
+
```json
|
|
20
|
+
{
|
|
21
|
+
"args": ["open", "https://example.com"],
|
|
22
|
+
"stdin": "optional raw stdin content",
|
|
23
|
+
"sessionMode": "auto"
|
|
24
|
+
}
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
- `args`: exact `agent-browser` CLI tokens after the binary name
|
|
28
|
+
- `stdin`: only for commands like `batch` and `eval --stdin`
|
|
29
|
+
- `sessionMode`:
|
|
30
|
+
- `"auto"` reuse the extension-managed session when possible
|
|
31
|
+
- `"fresh"` rotate that managed session to a fresh upstream launch so startup-scoped flags like `--profile`, `--session-name`, or `--cdp` apply
|
|
32
|
+
|
|
33
|
+
## Recommended workflow
|
|
34
|
+
|
|
35
|
+
### Normal browse flow
|
|
36
|
+
|
|
37
|
+
```json
|
|
38
|
+
{ "args": ["open", "https://example.com"] }
|
|
39
|
+
{ "args": ["snapshot", "-i"] }
|
|
40
|
+
{ "args": ["click", "@e2"] }
|
|
41
|
+
{ "args": ["snapshot", "-i"] }
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Extract page data
|
|
45
|
+
|
|
46
|
+
```json
|
|
47
|
+
{ "args": ["get", "title"] }
|
|
48
|
+
{ "args": ["get", "url"] }
|
|
49
|
+
{ "args": ["eval", "--stdin"], "stdin": "document.title" }
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Run a multi-step flow in one browser invocation
|
|
53
|
+
|
|
54
|
+
```json
|
|
55
|
+
{ "args": ["batch"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Switch from an already-active implicit session to a fresh profiled launch
|
|
59
|
+
|
|
60
|
+
```json
|
|
61
|
+
{
|
|
62
|
+
"args": ["--profile", "Default", "open", "https://mail.google.com"],
|
|
63
|
+
"sessionMode": "fresh"
|
|
64
|
+
}
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## High-value commands
|
|
68
|
+
|
|
69
|
+
### Open and navigation
|
|
70
|
+
|
|
71
|
+
- `open <url>`
|
|
72
|
+
- `goto <url>`
|
|
73
|
+
- `navigate <url>`
|
|
74
|
+
- `back`
|
|
75
|
+
- `forward`
|
|
76
|
+
- `reload`
|
|
77
|
+
|
|
78
|
+
Examples:
|
|
79
|
+
|
|
80
|
+
```json
|
|
81
|
+
{ "args": ["open", "https://react.dev"] }
|
|
82
|
+
{ "args": ["reload"] }
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Snapshot and page inspection
|
|
86
|
+
|
|
87
|
+
- `snapshot`
|
|
88
|
+
- `snapshot -i` interactive elements only
|
|
89
|
+
- `snapshot -c` compact tree
|
|
90
|
+
- `snapshot -d <n>` limit depth
|
|
91
|
+
- `snapshot -s <selector>` scope to one subtree
|
|
92
|
+
|
|
93
|
+
Examples:
|
|
94
|
+
|
|
95
|
+
```json
|
|
96
|
+
{ "args": ["snapshot", "-i"] }
|
|
97
|
+
{ "args": ["snapshot", "-i", "-s", "main"] }
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Element interaction
|
|
101
|
+
|
|
102
|
+
- `click <selector-or-@ref>`
|
|
103
|
+
- `dblclick <selector-or-@ref>`
|
|
104
|
+
- `hover <selector-or-@ref>`
|
|
105
|
+
- `focus <selector-or-@ref>`
|
|
106
|
+
- `type <selector-or-@ref> <text>`
|
|
107
|
+
- `fill <selector-or-@ref> <text>`
|
|
108
|
+
- `press <key>`
|
|
109
|
+
- `check <selector-or-@ref>`
|
|
110
|
+
- `uncheck <selector-or-@ref>`
|
|
111
|
+
- `select <selector-or-@ref> <value...>`
|
|
112
|
+
- `drag <src> <dst>`
|
|
113
|
+
- `upload <selector-or-@ref> <files...>`
|
|
114
|
+
|
|
115
|
+
Examples:
|
|
116
|
+
|
|
117
|
+
```json
|
|
118
|
+
{ "args": ["click", "@e12"] }
|
|
119
|
+
{ "args": ["fill", "#email", "user@example.com"] }
|
|
120
|
+
{ "args": ["press", "Enter"] }
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Downloads and saved files
|
|
124
|
+
|
|
125
|
+
Use the purpose-built command when a click should save a file.
|
|
126
|
+
|
|
127
|
+
- `download <selector-or-@ref> <path>`
|
|
128
|
+
- `pdf <path>`
|
|
129
|
+
- `screenshot [path]`
|
|
130
|
+
|
|
131
|
+
Examples:
|
|
132
|
+
|
|
133
|
+
```json
|
|
134
|
+
{ "args": ["download", "@e5", "/tmp/report.pdf"] }
|
|
135
|
+
{ "args": ["pdf", "/tmp/page.pdf"] }
|
|
136
|
+
{ "args": ["screenshot", "/tmp/page.png"] }
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Rules:
|
|
140
|
+
|
|
141
|
+
- Prefer `download <selector> <path>` over `click` when the goal is a downloaded file on disk.
|
|
142
|
+
- Prefer explicit output paths when the calling task needs to read, move, or attach the saved file later.
|
|
143
|
+
- Use `--download-path <dir>` on the first launch when many downloads should land in one directory.
|
|
144
|
+
|
|
145
|
+
### Read page state
|
|
146
|
+
|
|
147
|
+
`get <subcommand>` supports:
|
|
148
|
+
|
|
149
|
+
- `title`
|
|
150
|
+
- `url`
|
|
151
|
+
- `text <selector>`
|
|
152
|
+
- `html <selector>`
|
|
153
|
+
- `value <selector>`
|
|
154
|
+
- `attr <selector> <name>`
|
|
155
|
+
- `count <selector>`
|
|
156
|
+
- `box <selector>`
|
|
157
|
+
- `styles <selector>`
|
|
158
|
+
- `cdp-url`
|
|
159
|
+
|
|
160
|
+
Examples:
|
|
161
|
+
|
|
162
|
+
```json
|
|
163
|
+
{ "args": ["get", "title"] }
|
|
164
|
+
{ "args": ["get", "text", "main"] }
|
|
165
|
+
{ "args": ["get", "attr", "a.primary", "href"] }
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### JavaScript evaluation
|
|
169
|
+
|
|
170
|
+
- `eval <js>`
|
|
171
|
+
- `eval --stdin` with JavaScript in `stdin`
|
|
172
|
+
|
|
173
|
+
Example:
|
|
174
|
+
|
|
175
|
+
```json
|
|
176
|
+
{ "args": ["eval", "--stdin"], "stdin": "Array.from(document.querySelectorAll('a')).map((a) => a.href)" }
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Rules:
|
|
180
|
+
|
|
181
|
+
- Return the intended value instead of relying on `console.log`.
|
|
182
|
+
- Scope DOM queries to the relevant route, component, or element.
|
|
183
|
+
- Prefer `snapshot -i` refs first when the task is interaction-heavy.
|
|
184
|
+
|
|
185
|
+
### Wait
|
|
186
|
+
|
|
187
|
+
- `wait <ms>`
|
|
188
|
+
- `wait <selector>`
|
|
189
|
+
- use explicit variants like `--load <state>`, `--url <matcher>`, `--fn <js>`, or `--text <matcher>` when needed
|
|
190
|
+
|
|
191
|
+
Important:
|
|
192
|
+
|
|
193
|
+
- bare `wait --load` is incomplete; `--load` needs a state value
|
|
194
|
+
|
|
195
|
+
### Tabs
|
|
196
|
+
|
|
197
|
+
- `tab list`
|
|
198
|
+
- `tab <tab-id-or-label>`
|
|
199
|
+
- `tab new`
|
|
200
|
+
- `tab close`
|
|
201
|
+
|
|
202
|
+
Examples:
|
|
203
|
+
|
|
204
|
+
```json
|
|
205
|
+
{ "args": ["tab", "list"] }
|
|
206
|
+
{ "args": ["tab", "t3"] }
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
Use this when:
|
|
210
|
+
|
|
211
|
+
- a restored profile tab steals focus
|
|
212
|
+
- an interaction opens a new tab
|
|
213
|
+
- the browser lands on the wrong page unexpectedly
|
|
214
|
+
|
|
215
|
+
### Batch
|
|
216
|
+
|
|
217
|
+
- `batch`
|
|
218
|
+
- `batch --bail`
|
|
219
|
+
|
|
220
|
+
Example:
|
|
221
|
+
|
|
222
|
+
```json
|
|
223
|
+
{ "args": ["batch", "--bail"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"],[\"click\",\"@e2\"]]" }
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### Session and inspection commands
|
|
227
|
+
|
|
228
|
+
- `session`
|
|
229
|
+
- `session list`
|
|
230
|
+
- `close`
|
|
231
|
+
- `close --all`
|
|
232
|
+
- `--help`
|
|
233
|
+
- `--version`
|
|
234
|
+
|
|
235
|
+
The wrapper keeps `--help` and `--version` stateless so they do not consume the implicit managed-session slot.
|
|
236
|
+
|
|
237
|
+
## Important global flags
|
|
238
|
+
|
|
239
|
+
- `--profile <name|path>` reuse Chrome profile state
|
|
240
|
+
- `--session <name>` explicit upstream session name
|
|
241
|
+
- `--session-name <name>` upstream saved auth/session state name
|
|
242
|
+
- `--cdp <port-or-url>` connect to an existing browser
|
|
243
|
+
- `--headed` show the browser window
|
|
244
|
+
- `--download-path <dir>` default download directory
|
|
245
|
+
- `--user-agent <ua>` custom user agent
|
|
246
|
+
- `--json` injected by the wrapper automatically for normal tool execution
|
|
247
|
+
|
|
248
|
+
## Wrapper-specific behavior worth knowing
|
|
249
|
+
|
|
250
|
+
- The extension may keep following one implicit managed session across later tool calls.
|
|
251
|
+
- If startup-scoped flags like `--profile`, `--session-name`, or `--cdp` would be ignored because that implicit session is already active, retry with `sessionMode: "fresh"`.
|
|
252
|
+
- After profiled opens, the wrapper best-effort restores the intended target tab when restored tabs steal focus.
|
|
253
|
+
- After the wrapper knows the intended tab for a session, later commands best-effort keep that tab active so reconnect drift does not silently move the browser to a restored/background tab.
|
|
254
|
+
- Oversized snapshots and oversized generic outputs may be compacted in tool content, with the full raw output written to a spill file path shown directly in the tool result.
|
|
255
|
+
|
|
256
|
+
## Maintenance rule
|
|
257
|
+
|
|
258
|
+
Whenever the upstream `agent-browser` binary version changes in this project:
|
|
259
|
+
|
|
260
|
+
1. re-check the upstream command/help surface
|
|
261
|
+
2. update this local command reference if anything changed
|
|
262
|
+
3. update tool prompt guidance if the recommended agent workflow changed
|
|
263
|
+
4. update README and release docs if the user-visible behavior changed
|
|
264
|
+
5. validate the extension still exposes local documentation that is at least as usable as the blocked direct-binary path for normal agent work
|
package/docs/RELEASE.md
CHANGED
|
@@ -54,10 +54,12 @@ node scripts/verify-package.mjs --list-files
|
|
|
54
54
|
Before publishing, also validate the explicit local-checkout path:
|
|
55
55
|
|
|
56
56
|
1. Install `agent-browser` separately.
|
|
57
|
-
2.
|
|
58
|
-
3.
|
|
59
|
-
4.
|
|
60
|
-
5.
|
|
57
|
+
2. Make sure Pi has only one active source for this extension during checkout validation.
|
|
58
|
+
3. Launch `pi --no-extensions -e .` from this repository root.
|
|
59
|
+
4. Confirm the checkout extension loads from `extensions/agent-browser/index.ts`.
|
|
60
|
+
5. Run a smoke prompt that exercises `agent_browser`.
|
|
61
|
+
6. Validate managed-session continuity with both `/reload` and a full restart + `/resume`.
|
|
62
|
+
7. Re-check local extension-side docs (`README.md`, `docs/COMMAND_REFERENCE.md`, and prompt guidance) if the upstream `agent-browser` version/help surface changed.
|
|
61
63
|
|
|
62
64
|
Example smoke prompt:
|
|
63
65
|
|
|
@@ -71,6 +73,8 @@ Recommended lifecycle follow-up:
|
|
|
71
73
|
2. Run `/reload`, then ask for `snapshot -i` and confirm the same page is still active.
|
|
72
74
|
3. Exit `pi`, relaunch it against the same session file or use `/resume`, then ask for `snapshot -i` again and confirm the same page is still active.
|
|
73
75
|
4. Open a large page that compacts its snapshot output and confirm `details.fullOutputPath` still exists after the restart/resume flow.
|
|
76
|
+
5. Trigger an oversized non-snapshot output (for example a deliberately large `eval --stdin` result) and confirm the tool prints the actual spill file path directly in content instead of only referencing a details key.
|
|
77
|
+
6. Validate at least one file-download flow with `download <selector> <path>`.
|
|
74
78
|
|
|
75
79
|
## Post-publish install validation
|
|
76
80
|
|
|
@@ -81,6 +85,11 @@ pi install npm:pi-agent-browser-native@<version>
|
|
|
81
85
|
pi -e npm:pi-agent-browser-native@<version>
|
|
82
86
|
```
|
|
83
87
|
|
|
88
|
+
For installed-package validation, make sure Pi has only one active source for this extension. The simplest safe paths are either:
|
|
89
|
+
|
|
90
|
+
- temporarily disable/remove the checkout path and then run plain `pi`, or
|
|
91
|
+
- use an isolated ephemeral run such as `pi --no-extensions -e npm:pi-agent-browser-native@<version>`
|
|
92
|
+
|
|
84
93
|
Then confirm `pi` exposes the native `agent_browser` tool, that a basic `open` + `snapshot -i` flow works, and that `/reload` plus restart/`/resume` keep following the same implicit managed browser session.
|
|
85
94
|
|
|
86
95
|
## Release notes checklist
|
|
@@ -89,6 +98,7 @@ Before publishing:
|
|
|
89
98
|
|
|
90
99
|
- update `CHANGELOG.md`
|
|
91
100
|
- confirm README install guidance still leads with the package-first flow
|
|
101
|
+
- confirm `docs/COMMAND_REFERENCE.md` still matches the effective upstream command/help surface used by the wrapper
|
|
92
102
|
- confirm the explicit local-checkout instructions still work for pre-release validation
|
|
93
103
|
- rerun `npm run verify:release`
|
|
94
104
|
- manually exercise `/reload` and full restart + `/resume` continuity in local checkout validation
|
package/docs/REQUIREMENTS.md
CHANGED
|
@@ -64,6 +64,7 @@ Define the product requirements and constraints for `pi-agent-browser-native`.
|
|
|
64
64
|
- Someone opening the repo should quickly understand the goal, purpose, install model, and usage.
|
|
65
65
|
- Documents should read as complete documents, not iterative logs, unless they are explicitly meant to be iterative, such as a changelog.
|
|
66
66
|
- Requirements, expectations, and durable rules from user conversations should be reflected in the appropriate docs.
|
|
67
|
+
- Because direct-binary usage is commonly blocked in normal agent sessions, the repo must carry a local command reference for the effective `agent_browser` surface and keep it in sync with upstream changes.
|
|
67
68
|
- Published package contents should include the canonical user-facing docs plus `LICENSE`.
|
|
68
69
|
- Published package contents should exclude agent-only and superseded docs such as `AGENTS.md`, `docs/v1-tool-contract.md`, and `docs/native-integration-design.md`.
|
|
69
70
|
|
|
@@ -85,7 +86,7 @@ The design should comfortably support workflows such as:
|
|
|
85
86
|
- web research
|
|
86
87
|
- using browser UIs for other LLMs such as ChatGPT, Grok, Gemini, and Claude
|
|
87
88
|
- isolated authenticated browser sessions
|
|
88
|
-
- headless authenticated ChatGPT/OpenAI browsing without forcing `--headed` or `--auto-connect`
|
|
89
|
+
- headless authenticated `chat.com` / ChatGPT / OpenAI browsing without forcing `--headed` or `--auto-connect`
|
|
89
90
|
- upstream profile/debug workflows without adding a local profile-cloning layer in this package
|
|
90
91
|
|
|
91
92
|
## Implications for the implementation
|
|
@@ -95,10 +96,12 @@ The design should comfortably support workflows such as:
|
|
|
95
96
|
- The wrapper should stay thin, with upstream `agent-browser` remaining the source of truth for command semantics.
|
|
96
97
|
- User-facing docs belong in `README.md` and the canonical published files under `docs/`.
|
|
97
98
|
- Agent workflow and deeper testing procedures can stay in `AGENTS.md`, but published docs must not depend on that file being present.
|
|
99
|
+
- When upstream `agent-browser` changes, refresh the local command reference, prompt guidance, and other extension-side docs so agents still have a repo-readable equivalent of the blocked direct-binary help path.
|
|
98
100
|
- Keep mitigations for legacy-skill coexistence simple; do not add extra moving parts unless observed behavior justifies them.
|
|
99
101
|
- Prefer narrow, evidence-backed compatibility mitigations over broad stealth layers when a specific upstream site starts rejecting the default headless launch fingerprint.
|
|
100
102
|
- Preserve the page that a profiled `open` just navigated to; if restored profile tabs steal focus during launch, the wrapper should best-effort switch back to the returned page URL before handing control back to the agent.
|
|
101
103
|
- Once a tab target is known for a session, later active-tab commands should best-effort pin that same tab inside the same upstream invocation when reconnect drift would otherwise land on a restored/background tab.
|
|
104
|
+
- If a restored/background tab steals focus after a successful command, the wrapper should best-effort restore the intended target tab again before handing control back.
|
|
102
105
|
- On local Unix launches, extension-generated session names should not fail just because the upstream default socket path is too long; the wrapper should choose a shorter socket directory when needed.
|
|
103
106
|
|
|
104
107
|
## Open design questions
|
package/docs/TOOL_CONTRACT.md
CHANGED
|
@@ -4,6 +4,7 @@ Related docs:
|
|
|
4
4
|
- [`../README.md`](../README.md)
|
|
5
5
|
- [`REQUIREMENTS.md`](REQUIREMENTS.md)
|
|
6
6
|
- [`ARCHITECTURE.md`](ARCHITECTURE.md)
|
|
7
|
+
- [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md)
|
|
7
8
|
|
|
8
9
|
## V1 tool
|
|
9
10
|
|
|
@@ -24,7 +25,7 @@ It also keeps the main UX where it belongs: the agent invokes the tool directly
|
|
|
24
25
|
|
|
25
26
|
The tool guidance should be written for task discovery first, not wrapper implementation first. That means the description should emphasize browser use cases like web research, reading live docs, clicking, filling, screenshots, extraction, and authenticated/profile-based workflows. Low-level wrapper details like `stdin` and exact CLI args belong in the schema and guidelines, not the lead description.
|
|
26
27
|
|
|
27
|
-
The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <
|
|
28
|
+
The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <tab-id-or-label>` / `snapshot -i` before retrying different URLs or fallback strategies). It should also discourage inventing fixed explicit session names for routine tasks, because those names leak stale browser state across otherwise unrelated `pi` sessions. For read-only browsing tasks, guidance should prefer answering from the current page state first: use the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating into media viewers, detail routes, or other new pages unless the current view lacks the needed information. For downloads, guidance should explicitly prefer `download <selector> <path>` over `click` when the goal is a file on disk. When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics. When using `eval --stdin` for extraction, return the intended value instead of relying on `console.log` as the primary result channel. Because the extension blocks normal direct-binary usage in most agent sessions, the repository must also carry a local command reference that stays in sync with the effective tool surface.
|
|
28
29
|
|
|
29
30
|
## Parameters
|
|
30
31
|
|
|
@@ -109,7 +110,7 @@ Primary content should be:
|
|
|
109
110
|
|
|
110
111
|
Examples:
|
|
111
112
|
- small `snapshot` results should include the actual snapshot text
|
|
112
|
-
- oversized `snapshot` results should switch to a compact view that preserves the primary content, nearby sections, and a trimmed set of high-value refs, while exposing the full raw snapshot path via `details.fullOutputPath`
|
|
113
|
+
- oversized `snapshot` results should switch to a compact view that preserves the primary content, nearby sections, and a trimmed set of high-value refs, while exposing the full raw snapshot path directly in the rendered tool text and via `details.fullOutputPath`
|
|
113
114
|
- successful navigation actions like `click`, `back`, `forward`, and `reload` should include a lightweight post-action title/url summary when the wrapper can address the active session
|
|
114
115
|
- `tab list` should include a readable tab summary
|
|
115
116
|
- `screenshot` should include the saved-path summary plus the inline image attachment when available
|
|
@@ -141,13 +142,13 @@ Additional structured fields can appear when relevant:
|
|
|
141
142
|
- `batchFailure` and `batchSteps` for `batch` rendering, including mixed-success runs
|
|
142
143
|
- `navigationSummary` for navigation-style commands like `click`, `back`, `forward`, and `reload`
|
|
143
144
|
- `imagePath` / `imagePaths` for screenshots and batched image outputs
|
|
144
|
-
- `fullOutputPath` / `fullOutputPaths` when large snapshot output is compacted and spilled to a private file; persisted sessions keep that path under a private session-scoped artifact directory with a bounded per-session budget so it survives reload/resume without unbounded growth
|
|
145
|
+
- `fullOutputPath` / `fullOutputPaths` when large snapshot output or other oversized tool output is compacted and spilled to a private file; persisted sessions keep that path under a private session-scoped artifact directory with a bounded per-session budget so it survives reload/resume without unbounded growth
|
|
145
146
|
- `sessionRecoveryHint` when startup-scoped flags need `sessionMode: "fresh"`
|
|
146
147
|
- `inspection: true` plus `stdout` for successful plain-text inspection commands like `--help` and `--version`
|
|
147
148
|
|
|
148
149
|
When the tool echoes `args` or `effectiveArgs` back into Pi, sensitive values such as `--headers`, proxy credentials, and auth-bearing URL parameters should be redacted first.
|
|
149
150
|
|
|
150
|
-
For oversized snapshots, details should switch to a compact metadata object and include `fullOutputPath` pointing at a private
|
|
151
|
+
For oversized snapshots and other oversized tool outputs, details should switch to a compact metadata object and include `fullOutputPath` pointing at a private spill file with the full upstream payload. The model-facing tool text should print the actual spill-file path when one exists instead of only saying to inspect a details key. Persisted sessions should keep that spill file under a private session-scoped artifact directory so the path remains usable after reload/restart, with the oldest persisted spill files evicted as needed to stay within the per-session budget.
|
|
151
152
|
|
|
152
153
|
## High-value result rendering
|
|
153
154
|
|
|
@@ -155,7 +156,8 @@ For oversized snapshots, details should switch to a compact metadata object and
|
|
|
155
156
|
|
|
156
157
|
Worth doing in v1:
|
|
157
158
|
- screenshots → inline image attachment
|
|
158
|
-
- snapshots → origin + ref count + main-content-first compact preview, with the raw snapshot spill path kept in `details.fullOutputPath` when the inline result would otherwise be too large
|
|
159
|
+
- snapshots → origin + ref count + main-content-first compact preview, with the raw snapshot spill path printed directly in content and kept in `details.fullOutputPath` when the inline result would otherwise be too large
|
|
160
|
+
- oversized generic outputs such as large `eval --stdin` payloads → compact preview plus the actual spill file path instead of dumping the whole payload into model context
|
|
159
161
|
- extraction-style commands like `eval --stdin` and `get title` → scalar-first text with lightweight origin context when available
|
|
160
162
|
- navigation actions like `click`, `back`, `forward`, and `reload` → lightweight post-action title/url summary when available
|
|
161
163
|
- tab lists → compact summary/table
|
|
@@ -184,10 +186,11 @@ If `agent-browser` is not on `PATH`, fail with a message that:
|
|
|
184
186
|
- pass explicit `--profile` straight through to upstream `agent-browser`; no profile-cloning or isolation layer is added in v1
|
|
185
187
|
- after profiled `open` / `goto` / `navigate`, if upstream leaves a restored profile tab active instead of the page that was just opened, best-effort switch back to the tab whose URL matches the returned open result before returning control to the agent
|
|
186
188
|
- once the wrapper has a known tab target for a session, later active-tab commands may best-effort pin that tab inside the same upstream invocation so reconnect drift does not send a `click`, `snapshot`, or similar action to a restored/background tab instead
|
|
189
|
+
- after a successful command on a known tab target, the wrapper may best-effort restore that same target again if a restored/background tab steals focus after the command completes
|
|
187
190
|
- on local Unix launches, set a short private socket directory for wrapper-spawned `agent-browser` processes so extension-generated session names do not fail the upstream Unix socket-path length limit in longer cwd/session-name combinations
|
|
188
191
|
- treat successful plain-text inspection commands like `--help` and `--version` as stateless: do not inject the implicit managed session and do not let those calls claim the managed-session slot
|
|
189
192
|
- if startup-scoped flags like `--profile`, `--session-name`, or `--cdp` are supplied after the implicit session is already active while `sessionMode` is `"auto"`, return a validation error with a structured recovery hint that recommends `sessionMode: "fresh"`
|
|
190
|
-
- for direct headless local Chrome launches to `chatgpt.com` / `chat.openai.com`, allow a narrow compatibility fallback that injects a normal Chrome `--user-agent` only when the caller did not explicitly provide one and did not choose `--headed`, `--cdp`, `--auto-connect`, or a provider-backed launch
|
|
193
|
+
- for direct headless local Chrome launches to `chat.com` / `chatgpt.com` / `chat.openai.com`, allow a narrow compatibility fallback that injects a normal Chrome `--user-agent` only when the caller did not explicitly provide one and did not choose `--headed`, `--cdp`, `--auto-connect`, or a provider-backed launch
|
|
191
194
|
|
|
192
195
|
## Non-goals
|
|
193
196
|
|
|
@@ -65,6 +65,7 @@ const QUICK_START_GUIDELINES = [
|
|
|
65
65
|
"Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch and eval --stdin; sessionMode=fresh switches the extension-managed session to a fresh upstream launch when you need new --profile, --session-name, or --cdp state.",
|
|
66
66
|
"Common first calls: { args: [\"open\", \"https://example.com\"] } then { args: [\"snapshot\", \"-i\"] }; after navigation, use { args: [\"click\", \"@e2\"] } then { args: [\"snapshot\", \"-i\"] }.",
|
|
67
67
|
"Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, and { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }.",
|
|
68
|
+
"High-value command reference: download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab.",
|
|
68
69
|
] as const;
|
|
69
70
|
const BRAVE_SEARCH_PROMPT_GUIDELINE =
|
|
70
71
|
"When a non-empty BRAVE_API_KEY is available in the current environment, prefer the Brave Search API via bash/curl to discover specific destination URLs, then open the chosen URL with agent_browser instead of browsing a search engine results page just to find the target.";
|
|
@@ -74,9 +75,10 @@ const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
|
|
|
74
75
|
"Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.",
|
|
75
76
|
"When using --profile, --session-name, or --cdp, put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.",
|
|
76
77
|
"If you already used the implicit session and now need startup-scoped flags like --profile, --session-name, or --cdp, retry with sessionMode set to fresh or pass an explicit --session for the new launch. After a successful unnamed fresh launch, later auto calls follow that new session.",
|
|
77
|
-
"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <
|
|
78
|
+
"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load <state>, --url <matcher>, --fn <js>, or --text <matcher>.",
|
|
78
79
|
"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
|
|
79
80
|
"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
|
|
81
|
+
"For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.",
|
|
80
82
|
"When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
|
|
81
83
|
"When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel.",
|
|
82
84
|
"Do not call --help or other exploratory inspection commands unless the user explicitly asks for them or debugging the browser integration is necessary.",
|
|
@@ -322,6 +324,7 @@ function extractStringResultField(data: unknown, fieldName: "title" | "url"): st
|
|
|
322
324
|
}
|
|
323
325
|
|
|
324
326
|
const SESSION_TAB_PINNING_EXCLUDED_COMMANDS = new Set(["batch", "close", "goto", "navigate", "open", "session", "tab"]);
|
|
327
|
+
const SESSION_TAB_POST_COMMAND_CORRECTION_EXCLUDED_COMMANDS = new Set(["batch", "close", "session", "tab"]);
|
|
325
328
|
|
|
326
329
|
interface SessionTabTarget {
|
|
327
330
|
title?: string;
|
|
@@ -413,26 +416,23 @@ function shouldPinSessionTabForCommand(options: { command?: string; sessionName?
|
|
|
413
416
|
);
|
|
414
417
|
}
|
|
415
418
|
|
|
419
|
+
function shouldCorrectSessionTabAfterCommand(options: { command?: string; sessionName?: string }): boolean {
|
|
420
|
+
return (
|
|
421
|
+
options.sessionName !== undefined &&
|
|
422
|
+
options.command !== undefined &&
|
|
423
|
+
!SESSION_TAB_POST_COMMAND_CORRECTION_EXCLUDED_COMMANDS.has(options.command)
|
|
424
|
+
);
|
|
425
|
+
}
|
|
426
|
+
|
|
416
427
|
function selectSessionTargetTab(options: {
|
|
417
|
-
tabs: Array<{ active?: boolean; index?: number; title?: string; url?: string }>;
|
|
428
|
+
tabs: Array<{ active?: boolean; index?: number; label?: string; tabId?: string; title?: string; url?: string }>;
|
|
418
429
|
target: SessionTabTarget;
|
|
419
430
|
}): OpenResultTabCorrection | undefined {
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
typeof options.target.title === "string"
|
|
426
|
-
? matchingTabs.find((tab) => tab.title?.trim() === options.target.title)
|
|
427
|
-
: undefined;
|
|
428
|
-
const selectedTab = titledMatch ?? matchingTabs[0];
|
|
429
|
-
return typeof selectedTab.index === "number"
|
|
430
|
-
? {
|
|
431
|
-
selectedIndex: selectedTab.index,
|
|
432
|
-
targetTitle: options.target.title,
|
|
433
|
-
targetUrl: options.target.url,
|
|
434
|
-
}
|
|
435
|
-
: undefined;
|
|
431
|
+
return chooseOpenResultTabCorrection({
|
|
432
|
+
tabs: options.tabs,
|
|
433
|
+
targetTitle: options.target.title,
|
|
434
|
+
targetUrl: options.target.url,
|
|
435
|
+
});
|
|
436
436
|
}
|
|
437
437
|
|
|
438
438
|
function deriveSessionTabTarget(options: {
|
|
@@ -570,9 +570,11 @@ async function collectOpenResultTabCorrection(options: {
|
|
|
570
570
|
if (!isRecord(tabData) || !Array.isArray(tabData.tabs)) {
|
|
571
571
|
return undefined;
|
|
572
572
|
}
|
|
573
|
-
const tabs = tabData.tabs.filter(isRecord).map((tab) => ({
|
|
573
|
+
const tabs = tabData.tabs.filter(isRecord).map((tab, index) => ({
|
|
574
574
|
active: tab.active === true,
|
|
575
|
-
index: typeof tab.index === "number" ? tab.index :
|
|
575
|
+
index: typeof tab.index === "number" ? tab.index : index,
|
|
576
|
+
label: typeof tab.label === "string" ? tab.label : undefined,
|
|
577
|
+
tabId: typeof tab.tabId === "string" ? tab.tabId : undefined,
|
|
576
578
|
title: typeof tab.title === "string" ? tab.title : undefined,
|
|
577
579
|
url: typeof tab.url === "string" ? tab.url : undefined,
|
|
578
580
|
}));
|
|
@@ -590,9 +592,11 @@ async function collectSessionTabSelection(options: {
|
|
|
590
592
|
if (!isRecord(tabData) || !Array.isArray(tabData.tabs)) {
|
|
591
593
|
return undefined;
|
|
592
594
|
}
|
|
593
|
-
const tabs = tabData.tabs.filter(isRecord).map((tab) => ({
|
|
595
|
+
const tabs = tabData.tabs.filter(isRecord).map((tab, index) => ({
|
|
594
596
|
active: tab.active === true,
|
|
595
|
-
index: typeof tab.index === "number" ? tab.index :
|
|
597
|
+
index: typeof tab.index === "number" ? tab.index : index,
|
|
598
|
+
label: typeof tab.label === "string" ? tab.label : undefined,
|
|
599
|
+
tabId: typeof tab.tabId === "string" ? tab.tabId : undefined,
|
|
596
600
|
title: typeof tab.title === "string" ? tab.title : undefined,
|
|
597
601
|
url: typeof tab.url === "string" ? tab.url : undefined,
|
|
598
602
|
}));
|
|
@@ -607,7 +611,7 @@ async function applyOpenResultTabCorrection(options: {
|
|
|
607
611
|
}): Promise<OpenResultTabCorrection | undefined> {
|
|
608
612
|
const { correction, cwd, sessionName, signal } = options;
|
|
609
613
|
const result = await runSessionCommandData({
|
|
610
|
-
args: ["tab",
|
|
614
|
+
args: ["tab", correction.selectedTab],
|
|
611
615
|
cwd,
|
|
612
616
|
sessionName,
|
|
613
617
|
signal,
|
|
@@ -816,7 +820,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
816
820
|
sessionTabCorrection = plannedSessionTabSelection;
|
|
817
821
|
processArgs = ["--json", "--session", executionPlan.sessionName, "batch"];
|
|
818
822
|
processStdin = JSON.stringify([
|
|
819
|
-
["tab",
|
|
823
|
+
["tab", plannedSessionTabSelection.selectedTab],
|
|
820
824
|
commandTokens,
|
|
821
825
|
...(includePinnedNavigationSummary ? [["get", "title"], ["get", "url"]] : []),
|
|
822
826
|
]);
|
|
@@ -925,12 +929,42 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
925
929
|
}
|
|
926
930
|
}
|
|
927
931
|
|
|
932
|
+
const observedSessionTabTarget =
|
|
933
|
+
normalizeSessionTabTarget(navigationSummary) ?? extractSessionTabTargetFromData(presentationEnvelope?.data);
|
|
928
934
|
const currentSessionTabTarget = deriveSessionTabTarget({
|
|
929
935
|
command: executionPlan.commandInfo.command,
|
|
930
936
|
data: presentationEnvelope?.data,
|
|
931
937
|
navigationSummary,
|
|
932
938
|
previousTarget: priorSessionTabTarget,
|
|
933
939
|
});
|
|
940
|
+
if (
|
|
941
|
+
succeeded &&
|
|
942
|
+
priorSessionTabTarget &&
|
|
943
|
+
!sessionTabCorrection &&
|
|
944
|
+
observedSessionTabTarget &&
|
|
945
|
+
shouldCorrectSessionTabAfterCommand({
|
|
946
|
+
command: executionPlan.commandInfo.command,
|
|
947
|
+
sessionName: executionPlan.sessionName,
|
|
948
|
+
})
|
|
949
|
+
) {
|
|
950
|
+
const postCommandTabCorrection = await collectSessionTabSelection({
|
|
951
|
+
cwd: ctx.cwd,
|
|
952
|
+
sessionName: executionPlan.sessionName,
|
|
953
|
+
signal,
|
|
954
|
+
target: observedSessionTabTarget,
|
|
955
|
+
});
|
|
956
|
+
if (postCommandTabCorrection) {
|
|
957
|
+
const appliedPostCommandCorrection = await applyOpenResultTabCorrection({
|
|
958
|
+
correction: postCommandTabCorrection,
|
|
959
|
+
cwd: ctx.cwd,
|
|
960
|
+
sessionName: executionPlan.sessionName,
|
|
961
|
+
signal,
|
|
962
|
+
});
|
|
963
|
+
if (appliedPostCommandCorrection && !sessionTabCorrection) {
|
|
964
|
+
sessionTabCorrection = appliedPostCommandCorrection;
|
|
965
|
+
}
|
|
966
|
+
}
|
|
967
|
+
}
|
|
934
968
|
if (executionPlan.sessionName) {
|
|
935
969
|
if (executionPlan.commandInfo.command === "close" && succeeded) {
|
|
936
970
|
sessionTabTargets.delete(executionPlan.sessionName);
|
|
@@ -10,7 +10,11 @@ import { readFile, stat } from "node:fs/promises";
|
|
|
10
10
|
import { resolve } from "node:path";
|
|
11
11
|
|
|
12
12
|
import { parseCommandInfo, type CommandInfo } from "../runtime.js";
|
|
13
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
type PersistentSessionArtifactStore,
|
|
15
|
+
writePersistentSessionArtifactFile,
|
|
16
|
+
writeSecureTempFile,
|
|
17
|
+
} from "../temp.js";
|
|
14
18
|
import { buildSnapshotPresentation, formatRawSnapshotText, formatSnapshotSummary } from "./snapshot.js";
|
|
15
19
|
import {
|
|
16
20
|
type AgentBrowserBatchResult,
|
|
@@ -19,8 +23,10 @@ import {
|
|
|
19
23
|
type BatchStepPresentationDetails,
|
|
20
24
|
type ToolPresentation,
|
|
21
25
|
isRecord,
|
|
26
|
+
countLines,
|
|
22
27
|
parsePositiveInteger,
|
|
23
28
|
stringifyUnknown,
|
|
29
|
+
truncateText,
|
|
24
30
|
} from "./shared.js";
|
|
25
31
|
|
|
26
32
|
const IMAGE_EXTENSION_TO_MIME_TYPE: Record<string, string> = {
|
|
@@ -35,6 +41,11 @@ const INLINE_IMAGE_MAX_BYTES_ENV = "PI_AGENT_BROWSER_INLINE_IMAGE_MAX_BYTES";
|
|
|
35
41
|
const DEFAULT_INLINE_IMAGE_MAX_BYTES = 5 * 1_024 * 1_024;
|
|
36
42
|
const NAVIGATION_SUMMARY_COMMANDS = new Set(["back", "click", "dblclick", "forward", "reload"]);
|
|
37
43
|
const NAVIGATION_SUMMARY_FIELD = "navigationSummary";
|
|
44
|
+
const LARGE_OUTPUT_INLINE_MAX_CHARS = 8_000;
|
|
45
|
+
const LARGE_OUTPUT_INLINE_MAX_LINES = 120;
|
|
46
|
+
const LARGE_OUTPUT_PREVIEW_MAX_CHARS = 2_500;
|
|
47
|
+
const LARGE_OUTPUT_PREVIEW_MAX_LINES = 40;
|
|
48
|
+
const LARGE_OUTPUT_FILE_PREFIX = "pi-agent-browser-output";
|
|
38
49
|
|
|
39
50
|
interface NavigationSummary {
|
|
40
51
|
title?: string;
|
|
@@ -73,8 +84,15 @@ function getTabSummary(data: Record<string, unknown>): string | undefined {
|
|
|
73
84
|
const marker = tab.active === true ? "*" : "-";
|
|
74
85
|
const title = typeof tab.title === "string" ? tab.title : "(untitled)";
|
|
75
86
|
const url = typeof tab.url === "string" ? tab.url : "(no url)";
|
|
76
|
-
const
|
|
77
|
-
|
|
87
|
+
const tabSelector =
|
|
88
|
+
typeof tab.tabId === "string" && tab.tabId.trim().length > 0
|
|
89
|
+
? tab.tabId.trim()
|
|
90
|
+
: typeof tab.label === "string" && tab.label.trim().length > 0
|
|
91
|
+
? tab.label.trim()
|
|
92
|
+
: typeof tab.index === "number"
|
|
93
|
+
? String(tab.index)
|
|
94
|
+
: String(index);
|
|
95
|
+
return `${marker} [${tabSelector}] ${title} — ${url}`;
|
|
78
96
|
});
|
|
79
97
|
return lines.join("\n");
|
|
80
98
|
}
|
|
@@ -107,6 +125,19 @@ function getScreenshotSummary(data: Record<string, unknown>): string | undefined
|
|
|
107
125
|
return typeof data.path === "string" ? `Saved image: ${data.path}` : undefined;
|
|
108
126
|
}
|
|
109
127
|
|
|
128
|
+
function getSavedFileSummary(commandInfo: CommandInfo, data: Record<string, unknown>): string | undefined {
|
|
129
|
+
if (typeof data.path !== "string") {
|
|
130
|
+
return undefined;
|
|
131
|
+
}
|
|
132
|
+
if (commandInfo.command === "download") {
|
|
133
|
+
return `Downloaded file: ${data.path}`;
|
|
134
|
+
}
|
|
135
|
+
if (commandInfo.command === "pdf") {
|
|
136
|
+
return `Saved PDF: ${data.path}`;
|
|
137
|
+
}
|
|
138
|
+
return undefined;
|
|
139
|
+
}
|
|
140
|
+
|
|
110
141
|
function getScalarExtractionResult(data: Record<string, unknown>): string | undefined {
|
|
111
142
|
const { result } = data;
|
|
112
143
|
if (typeof result === "string") {
|
|
@@ -430,6 +461,10 @@ function formatSummary(commandInfo: CommandInfo, data: unknown): string {
|
|
|
430
461
|
if (commandInfo.command === "screenshot" && typeof data.path === "string") {
|
|
431
462
|
return `Screenshot saved: ${data.path}`;
|
|
432
463
|
}
|
|
464
|
+
const savedFileSummary = getSavedFileSummary(commandInfo, data);
|
|
465
|
+
if (savedFileSummary) {
|
|
466
|
+
return savedFileSummary;
|
|
467
|
+
}
|
|
433
468
|
const extractionSummary = formatExtractionSummary(commandInfo, data);
|
|
434
469
|
if (extractionSummary) {
|
|
435
470
|
return extractionSummary;
|
|
@@ -483,6 +518,10 @@ function formatContentText(commandInfo: CommandInfo, data: unknown): string {
|
|
|
483
518
|
const screenshotSummary = getScreenshotSummary(data);
|
|
484
519
|
if (screenshotSummary) return screenshotSummary;
|
|
485
520
|
}
|
|
521
|
+
const savedFileSummary = getSavedFileSummary(commandInfo, data);
|
|
522
|
+
if (savedFileSummary) {
|
|
523
|
+
return savedFileSummary;
|
|
524
|
+
}
|
|
486
525
|
|
|
487
526
|
const extractionText = formatExtractionText(commandInfo, data);
|
|
488
527
|
if (extractionText) {
|
|
@@ -539,6 +578,115 @@ async function attachInlineImage(presentation: ToolPresentation, imagePath: stri
|
|
|
539
578
|
}
|
|
540
579
|
}
|
|
541
580
|
|
|
581
|
+
function shouldCompactLargeOutput(text: string): boolean {
|
|
582
|
+
return text.length > LARGE_OUTPUT_INLINE_MAX_CHARS || countLines(text) > LARGE_OUTPUT_INLINE_MAX_LINES;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
function buildLargeOutputPreview(text: string): { omittedLineCount: number; previewText: string } {
|
|
586
|
+
const lines = text.split("\n");
|
|
587
|
+
const previewLines: string[] = [];
|
|
588
|
+
let previewChars = 0;
|
|
589
|
+
for (const line of lines) {
|
|
590
|
+
if (previewLines.length >= LARGE_OUTPUT_PREVIEW_MAX_LINES || previewChars >= LARGE_OUTPUT_PREVIEW_MAX_CHARS) {
|
|
591
|
+
break;
|
|
592
|
+
}
|
|
593
|
+
const remainingChars = LARGE_OUTPUT_PREVIEW_MAX_CHARS - previewChars;
|
|
594
|
+
const previewLine = truncateText(line, Math.max(40, remainingChars));
|
|
595
|
+
previewLines.push(previewLine);
|
|
596
|
+
previewChars += previewLine.length + 1;
|
|
597
|
+
}
|
|
598
|
+
return {
|
|
599
|
+
omittedLineCount: Math.max(0, lines.length - previewLines.length),
|
|
600
|
+
previewText: previewLines.join("\n"),
|
|
601
|
+
};
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
async function writeLargeOutputSpillFile(options: {
|
|
605
|
+
data: unknown;
|
|
606
|
+
persistentArtifactStore?: PersistentSessionArtifactStore;
|
|
607
|
+
text: string;
|
|
608
|
+
}): Promise<string> {
|
|
609
|
+
const payload =
|
|
610
|
+
typeof options.data === "string"
|
|
611
|
+
? options.data
|
|
612
|
+
: typeof options.data === "number" || typeof options.data === "boolean"
|
|
613
|
+
? String(options.data)
|
|
614
|
+
: options.data === undefined
|
|
615
|
+
? options.text
|
|
616
|
+
: stringifyUnknown(options.data);
|
|
617
|
+
const isStructuredPayload = typeof options.data !== "string" && typeof options.data !== "number" && typeof options.data !== "boolean";
|
|
618
|
+
const fileOptions = {
|
|
619
|
+
content: payload,
|
|
620
|
+
prefix: LARGE_OUTPUT_FILE_PREFIX,
|
|
621
|
+
suffix: isStructuredPayload ? ".json" : ".txt",
|
|
622
|
+
};
|
|
623
|
+
return options.persistentArtifactStore
|
|
624
|
+
? await writePersistentSessionArtifactFile({ ...fileOptions, store: options.persistentArtifactStore })
|
|
625
|
+
: await writeSecureTempFile(fileOptions);
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
async function compactLargePresentationOutput(options: {
|
|
629
|
+
commandInfo: CommandInfo;
|
|
630
|
+
data: unknown;
|
|
631
|
+
persistentArtifactStore?: PersistentSessionArtifactStore;
|
|
632
|
+
presentation: ToolPresentation;
|
|
633
|
+
}): Promise<ToolPresentation> {
|
|
634
|
+
const text = getPresentationText(options.presentation);
|
|
635
|
+
if (text.length === 0 || !shouldCompactLargeOutput(text)) {
|
|
636
|
+
return options.presentation;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
let fullOutputPath: string | undefined;
|
|
640
|
+
let spillErrorText: string | undefined;
|
|
641
|
+
try {
|
|
642
|
+
fullOutputPath = await writeLargeOutputSpillFile({
|
|
643
|
+
data: options.data,
|
|
644
|
+
persistentArtifactStore: options.persistentArtifactStore,
|
|
645
|
+
text,
|
|
646
|
+
});
|
|
647
|
+
} catch (error) {
|
|
648
|
+
spillErrorText = error instanceof Error ? error.message : String(error);
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
const { omittedLineCount, previewText } = buildLargeOutputPreview(text);
|
|
652
|
+
const commandLabel = options.commandInfo.command ?? "agent-browser";
|
|
653
|
+
const lines = [
|
|
654
|
+
`Large ${commandLabel} output compacted.`,
|
|
655
|
+
"",
|
|
656
|
+
"Preview:",
|
|
657
|
+
previewText,
|
|
658
|
+
];
|
|
659
|
+
if (omittedLineCount > 0) {
|
|
660
|
+
lines.push(`- ... (${omittedLineCount} additional lines omitted)`);
|
|
661
|
+
}
|
|
662
|
+
lines.push(
|
|
663
|
+
"",
|
|
664
|
+
fullOutputPath
|
|
665
|
+
? `Full output path: ${fullOutputPath}`
|
|
666
|
+
: `Full output unavailable: ${spillErrorText ?? "spill file could not be created."}`,
|
|
667
|
+
);
|
|
668
|
+
|
|
669
|
+
const firstTextIndex = options.presentation.content.findIndex((part) => part.type === "text");
|
|
670
|
+
const compactedText = lines.join("\n");
|
|
671
|
+
if (firstTextIndex >= 0) {
|
|
672
|
+
options.presentation.content[firstTextIndex] = { type: "text", text: compactedText };
|
|
673
|
+
} else {
|
|
674
|
+
options.presentation.content.unshift({ type: "text", text: compactedText });
|
|
675
|
+
}
|
|
676
|
+
options.presentation.data = {
|
|
677
|
+
compacted: true,
|
|
678
|
+
fullOutputPath,
|
|
679
|
+
outputCharCount: text.length,
|
|
680
|
+
outputLineCount: countLines(text),
|
|
681
|
+
previewCharCount: previewText.length,
|
|
682
|
+
previewLineCount: countLines(previewText),
|
|
683
|
+
spillError: spillErrorText,
|
|
684
|
+
};
|
|
685
|
+
options.presentation.fullOutputPath = fullOutputPath;
|
|
686
|
+
options.presentation.summary = `${options.presentation.summary} (compact)`;
|
|
687
|
+
return options.presentation;
|
|
688
|
+
}
|
|
689
|
+
|
|
542
690
|
export async function buildToolPresentation(options: {
|
|
543
691
|
commandInfo: CommandInfo;
|
|
544
692
|
cwd: string;
|
|
@@ -568,9 +716,11 @@ export async function buildToolPresentation(options: {
|
|
|
568
716
|
};
|
|
569
717
|
|
|
570
718
|
const imagePath = extractImagePath(cwd, data);
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
719
|
+
const presentationWithImage = imagePath ? await attachInlineImage(presentation, imagePath) : presentation;
|
|
720
|
+
return await compactLargePresentationOutput({
|
|
721
|
+
commandInfo,
|
|
722
|
+
data,
|
|
723
|
+
persistentArtifactStore,
|
|
724
|
+
presentation: presentationWithImage,
|
|
725
|
+
});
|
|
576
726
|
}
|
|
@@ -579,7 +579,7 @@ export async function buildSnapshotPresentation(
|
|
|
579
579
|
);
|
|
580
580
|
if (fallbackPreview.omittedCount > 0) {
|
|
581
581
|
lines.push(
|
|
582
|
-
`- ... (${fallbackPreview.omittedCount} additional snapshot lines omitted; ${fullOutputPath ?
|
|
582
|
+
`- ... (${fallbackPreview.omittedCount} additional snapshot lines omitted; ${fullOutputPath ? `full output path: ${fullOutputPath}` : "the full raw snapshot was omitted"})`,
|
|
583
583
|
);
|
|
584
584
|
}
|
|
585
585
|
} else {
|
|
@@ -614,7 +614,7 @@ export async function buildSnapshotPresentation(
|
|
|
614
614
|
lines.push(
|
|
615
615
|
"",
|
|
616
616
|
fullOutputPath
|
|
617
|
-
?
|
|
617
|
+
? `Full raw snapshot path: ${fullOutputPath}`
|
|
618
618
|
: `Full raw snapshot unavailable: ${spillErrorText ?? "temp spill file could not be created."}`,
|
|
619
619
|
);
|
|
620
620
|
|
|
@@ -11,7 +11,7 @@ import { basename } from "node:path";
|
|
|
11
11
|
|
|
12
12
|
const STARTUP_SCOPED_FLAGS = ["--cdp", "--profile", "--session-name"] as const;
|
|
13
13
|
const OPEN_COMMANDS = new Set(["goto", "navigate", "open"]);
|
|
14
|
-
const OPENAI_HEADLESS_COMPAT_HOSTS = new Set(["chat.openai.com", "chatgpt.com"]);
|
|
14
|
+
const OPENAI_HEADLESS_COMPAT_HOSTS = new Set(["chat.com", "chat.openai.com", "chatgpt.com"]);
|
|
15
15
|
const BRAVE_API_KEY_ENV = "BRAVE_API_KEY";
|
|
16
16
|
const AGENT_BROWSER_IDLE_TIMEOUT_ENV = "AGENT_BROWSER_IDLE_TIMEOUT_MS";
|
|
17
17
|
const IMPLICIT_SESSION_IDLE_TIMEOUT_ENV = "PI_AGENT_BROWSER_IMPLICIT_SESSION_IDLE_TIMEOUT_MS";
|
|
@@ -106,7 +106,8 @@ export interface CompatibilityWorkaround {
|
|
|
106
106
|
}
|
|
107
107
|
|
|
108
108
|
export interface OpenResultTabCorrection {
|
|
109
|
-
|
|
109
|
+
selectedTab: string;
|
|
110
|
+
selectionKind: "index" | "label" | "tabId";
|
|
110
111
|
targetTitle?: string;
|
|
111
112
|
targetUrl: string;
|
|
112
113
|
}
|
|
@@ -537,6 +538,26 @@ function normalizeComparableUrl(url: string): string | undefined {
|
|
|
537
538
|
}
|
|
538
539
|
}
|
|
539
540
|
|
|
541
|
+
function normalizeTabSelectionValue(value: string | undefined): string | undefined {
|
|
542
|
+
const normalizedValue = value?.trim();
|
|
543
|
+
return normalizedValue && normalizedValue.length > 0 ? normalizedValue : undefined;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
function extractTabSelection(tab: { index?: number; label?: string; tabId?: string }): Pick<OpenResultTabCorrection, "selectedTab" | "selectionKind"> | undefined {
|
|
547
|
+
const tabId = normalizeTabSelectionValue(tab.tabId);
|
|
548
|
+
if (tabId) {
|
|
549
|
+
return { selectedTab: tabId, selectionKind: "tabId" };
|
|
550
|
+
}
|
|
551
|
+
const label = normalizeTabSelectionValue(tab.label);
|
|
552
|
+
if (label) {
|
|
553
|
+
return { selectedTab: label, selectionKind: "label" };
|
|
554
|
+
}
|
|
555
|
+
if (typeof tab.index === "number" && Number.isInteger(tab.index) && tab.index >= 0) {
|
|
556
|
+
return { selectedTab: String(tab.index), selectionKind: "index" };
|
|
557
|
+
}
|
|
558
|
+
return undefined;
|
|
559
|
+
}
|
|
560
|
+
|
|
540
561
|
function parseComparableNavigationUrl(url: string): URL | undefined {
|
|
541
562
|
try {
|
|
542
563
|
return new URL(url);
|
|
@@ -727,7 +748,7 @@ export function buildExecutionPlan(
|
|
|
727
748
|
|
|
728
749
|
export function chooseOpenResultTabCorrection(options: {
|
|
729
750
|
activeTabIndex?: number;
|
|
730
|
-
tabs: Array<{ active?: boolean; index?: number; title?: string; url?: string }>;
|
|
751
|
+
tabs: Array<{ active?: boolean; index?: number; label?: string; tabId?: string; title?: string; url?: string }>;
|
|
731
752
|
targetTitle?: string;
|
|
732
753
|
targetUrl?: string;
|
|
733
754
|
}): OpenResultTabCorrection | undefined {
|
|
@@ -740,6 +761,8 @@ export function chooseOpenResultTabCorrection(options: {
|
|
|
740
761
|
const tabsWithIndices = options.tabs.map((tab, index) => ({
|
|
741
762
|
...tab,
|
|
742
763
|
index: typeof tab.index === "number" ? tab.index : index,
|
|
764
|
+
label: normalizeTabSelectionValue(tab.label),
|
|
765
|
+
tabId: normalizeTabSelectionValue(tab.tabId),
|
|
743
766
|
}));
|
|
744
767
|
const activeTab =
|
|
745
768
|
tabsWithIndices.find((tab) => tab.active === true) ??
|
|
@@ -758,13 +781,14 @@ export function chooseOpenResultTabCorrection(options: {
|
|
|
758
781
|
? undefined
|
|
759
782
|
: matchingTabs.find((tab) => typeof tab.title === "string" && tab.title.trim() === trimmedTargetTitle);
|
|
760
783
|
const selectedTab = titledMatch ?? matchingTabs[0];
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
784
|
+
const tabSelection = extractTabSelection(selectedTab);
|
|
785
|
+
return tabSelection
|
|
786
|
+
? {
|
|
787
|
+
...tabSelection,
|
|
765
788
|
targetTitle: trimmedTargetTitle.length > 0 ? trimmedTargetTitle : undefined,
|
|
766
789
|
targetUrl: normalizedTargetUrl,
|
|
767
|
-
}
|
|
790
|
+
}
|
|
791
|
+
: undefined;
|
|
768
792
|
}
|
|
769
793
|
|
|
770
794
|
export function parseCommandInfo(args: string[]): CommandInfo {
|
|
@@ -260,7 +260,7 @@ export async function writeSecureTempChunk(options: {
|
|
|
260
260
|
const { content, fileHandle, path } = options;
|
|
261
261
|
await enqueueTempMutation(async () => {
|
|
262
262
|
await assertSecureTempRootBudget(dirname(path), getTempArtifactByteLength(content));
|
|
263
|
-
await fileHandle.
|
|
263
|
+
await fileHandle.appendFile(content);
|
|
264
264
|
});
|
|
265
265
|
}
|
|
266
266
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-agent-browser-native",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.9",
|
|
4
4
|
"description": "pi extension that exposes agent-browser as a native tool for browser automation",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"author": "Mitch Fultz (https://github.com/fitchmultz)",
|
|
@@ -32,6 +32,7 @@
|
|
|
32
32
|
"CHANGELOG.md",
|
|
33
33
|
"LICENSE",
|
|
34
34
|
"docs/ARCHITECTURE.md",
|
|
35
|
+
"docs/COMMAND_REFERENCE.md",
|
|
35
36
|
"docs/RELEASE.md",
|
|
36
37
|
"docs/REQUIREMENTS.md",
|
|
37
38
|
"docs/TOOL_CONTRACT.md"
|