pi-agent-browser-native 0.2.21 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,23 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## 0.2.23 - 2026-05-10
6
+
7
+ ### Fixed
8
+ - added safe `auth save --password-stdin` support for native tool calls and redacted password stdin from model-visible content, tool details, upstream failure output, and preserved parse-failure spill files
9
+ - improved session and launch-flag handling for agent workflows, including disabled `--auto-connect`, optional boolean flag values, dash-starting `--args` values, and stale `@ref` recovery guidance through pinned commands and user batch stdin
10
+ - expanded sensitive argument redaction for password and credential command forms
11
+
12
+ ### Changed
13
+ - rewrote the public README around outcome-first usage, fastest install paths, profile/auth workflow guidance, and release verification proof
14
+ - clarified native-tool command guidance for password stdin, cookie/privacy handling, stable tab ids, and explicit session persistence limits
15
+
16
+ ## 0.2.22 - 2026-05-07
17
+
18
+ ### Compatibility
19
+ - migrated the local pi development baseline and peer metadata from deprecated `@mariozechner/*` packages to maintained `@earendil-works/*` `0.74.0`
20
+ - regenerated the npm lockfile against the current stable dependency graph and confirmed package verification remains green
21
+
5
22
  ## 0.2.21 - 2026-05-07
6
23
 
7
24
  ### Fixed
package/README.md CHANGED
@@ -1,61 +1,85 @@
1
1
  # pi-agent-browser-native
2
2
 
3
- Native `pi` integration for [`agent-browser`](https://agent-browser.dev/).
3
+ A Pi extension that lets coding agents drive real browser sessions with a native `agent_browser` tool instead of brittle shell commands.
4
4
 
5
- ## Status
5
+ It is for Pi users who want agents to browse sites, inspect pages, click through flows, capture screenshots, use persistent profiles, and handle authenticated web apps without spending context on `agent-browser` CLI ceremony.
6
6
 
7
- Published pre-1.0 package.
7
+ ## What this looks like in Pi
8
8
 
9
- The native `agent_browser` tool, local verification workflow, package-content checks, and release checks are in place. Package install is the default path; checkout loading is for development and validation.
9
+ You prompt the agent in plain English:
10
10
 
11
- ## Goal
11
+ ```text
12
+ Use the agent_browser tool to open https://react.dev and then take an interactive snapshot.
13
+ ```
12
14
 
13
- Expose `agent-browser` to `pi` as a native tool so agents can automate the browser without going through a bash-backed skill.
15
+ The agent gets a native tool, not a bash workaround:
14
16
 
15
- ## Product stance
17
+ ```json
18
+ { "args": ["open", "https://react.dev"] }
19
+ { "args": ["snapshot", "-i"] }
20
+ ```
16
21
 
17
- - **Not bundled**: users install `agent-browser` separately and keep it on `PATH`
18
- - **Latest-version only**: no backward-compatibility support or shims for older `agent-browser` versions
19
- - **Thin wrapper**: stay close to upstream `agent-browser` instead of re-implementing its CLI
20
- - **Agent-invoked first**: the main UX is the agent calling the tool directly, like `read` or `write`
21
- - **Global-install first**: package behavior matters more than repo-local development wiring
22
+ The result is optimized for agent work:
22
23
 
23
- Upstream install/docs:
24
- - https://agent-browser.dev/
25
- - https://github.com/vercel-labs/agent-browser
24
+ - compact page snapshots that lead with useful page content instead of chrome/sidebar noise
25
+ - interactive `@eN` refs for follow-up clicks and form fills
26
+ - screenshots and downloaded files surfaced as Pi artifacts
27
+ - structured details for titles, URLs, saved files, sessions, and errors
28
+ - spill files for oversized raw output instead of dumping pages into context
29
+ - recovery hints when a tab, selector, stale `@ref`, or launch mode needs a different next step
30
+
31
+ ## Who this is for
26
32
 
27
- ## Why this exists
33
+ - **Pi users** who want browser automation available as a normal tool beside `read`, `write`, and `bash`.
34
+ - **Coding agents** that need low-context browser workflows for docs, QA, research, dashboards, and web apps.
35
+ - **Maintainers** who want a thin integration that tracks the current upstream [`agent-browser`](https://agent-browser.dev/) CLI without bundling or re-implementing it.
28
36
 
29
- A native `pi` integration can improve on the current skill by adding:
37
+ ## The problem
30
38
 
31
- - structured tool calls instead of shell strings
32
- - parsed results instead of bash stdout
33
- - compact model-facing snapshot shaping with full raw spill files for oversized pages
34
- - main-content-first snapshot previews so the model sees the important page region before unrelated chrome or sidebar noise
35
- - inline screenshots and artifacts
36
- - lightweight session convenience inside `pi`
37
- - a better base for serious browser automation
39
+ `agent-browser` is powerful, but plain CLI use is awkward inside an agent harness:
38
40
 
39
- ## Example use cases
41
+ - shell strings are easy for agents to quote wrong
42
+ - large page snapshots can waste model context
43
+ - screenshots and downloads need artifact metadata, not just text paths
44
+ - implicit browser sessions need predictable reuse and cleanup
45
+ - profile/debug launches need a clear way to start fresh after public browsing
46
+ - secrets and auth material must not be echoed into model-visible output
47
+ - stale element refs need actionable recovery guidance, not generic failures
40
48
 
41
- - UI testing and exploratory QA
42
- - web research
43
- - driving web UIs for ChatGPT, Grok, Gemini, and Claude
44
- - authenticated browser sessions and persistent profiles
49
+ `pi-agent-browser-native` keeps upstream `agent-browser` as the browser engine and adds the Pi-native wrapper behavior needed for reliable agent use.
45
50
 
46
- ## Install and try
51
+ ## What it does
47
52
 
48
- The product direction is package-first. Prefer the package source for normal use; keep the local-checkout flow for development and pre-release validation.
53
+ | Pain | Native wrapper capability | Proof surface |
54
+ |---|---|---|
55
+ | Agents build fragile shell commands | Exposes `agent_browser` with exact `args`, controlled `stdin`, and `sessionMode` fields | `extensions/agent-browser/index.ts`, [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md) |
56
+ | Page snapshots are too large | Shows compact, main-content-first summaries and stores full raw output in spill files when needed | `test/agent-browser.presentation.test.ts` |
57
+ | Screenshots/downloads get lost in text | Normalizes artifact paths and reports existence, size, cwd, session, and repair status | [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#download-screenshot-and-pdf-files) |
58
+ | Profile restores and tab drift confuse agents | Tracks managed sessions, pins intended tabs, and re-selects target tabs after drift | generated tab-recovery notes below; `test/agent-browser.resume-state.test.ts` |
59
+ | Auth/profile workflows can leak secrets | Supports `auth save --password-stdin` and redacts sensitive args, URLs, stdout/stderr, details, and parse-failure spills | `test/agent-browser.extension-validation.test.ts` |
60
+ | Stale `@eN` refs fail mysteriously | Adds recovery guidance to rerun `snapshot -i` or use stable `find` locators | `test/agent-browser.results.test.ts` |
61
+ | Direct binary help may be blocked in agent sessions | Publishes a repo-readable command reference and verifies it against the target upstream version | `npm run verify` |
49
62
 
50
- ### Preferred package install
63
+ ## Fastest way to try it
51
64
 
52
- Install `agent-browser` separately, then install this package into `pi`:
65
+ Install upstream `agent-browser` first and make sure it is on `PATH`:
66
+
67
+ - https://agent-browser.dev/
68
+ - https://github.com/vercel-labs/agent-browser
69
+
70
+ Then install this Pi package:
53
71
 
54
72
  ```bash
55
73
  pi install npm:pi-agent-browser-native
56
74
  ```
57
75
 
58
- To try a published package without installing it permanently, isolate that temporary package source from any configured checkout or global install:
76
+ Start Pi and ask for a browser action:
77
+
78
+ ```text
79
+ Use the agent_browser tool to open https://example.com and then take an interactive snapshot.
80
+ ```
81
+
82
+ For a one-off trial that does not touch your configured Pi extensions:
59
83
 
60
84
  ```bash
61
85
  pi --no-extensions -e npm:pi-agent-browser-native
@@ -67,127 +91,123 @@ For a specific published version:
67
91
  pi --no-extensions -e npm:pi-agent-browser-native@<version>
68
92
  ```
69
93
 
70
- ### First-run doctor
71
-
72
- Run the package doctor before first use or when `agent_browser` is missing or duplicated:
73
-
74
- ```bash
75
- pi-agent-browser-doctor
76
- # one-off without installing the package source permanently:
77
- npm exec --package pi-agent-browser-native -- pi-agent-browser-doctor
78
- # from a checkout:
79
- npm run doctor
80
- ```
81
-
82
- The doctor is read-only. It checks that upstream `agent-browser` is on `PATH`, that `agent-browser --version` matches the wrapper's capability baseline, and that Pi settings do not point at multiple active `pi-agent-browser-native` sources. It does not run upstream `agent-browser doctor --fix` or edit Pi settings.
83
-
84
- If it reports duplicate sources, keep exactly one active source. For normal use, keep `pi install npm:pi-agent-browser-native` and remove checkout paths from Pi settings. For temporary package or checkout trials, use `pi --no-extensions -e npm:pi-agent-browser-native[@<version>]` or `pi --no-extensions -e /path/to/checkout` so configured sources are bypassed.
85
-
86
- ### GitHub install
87
-
88
- For the source install path, prefer the repository URL:
94
+ To install directly from source instead of npm:
89
95
 
90
96
  ```bash
91
97
  pi install https://github.com/fitchmultz/pi-agent-browser-native
92
98
  ```
93
99
 
94
- To try the GitHub source without installing it permanently, isolate that temporary source extension from your normal installed package set:
100
+ For a temporary source trial, keep it isolated from your normal package sources:
95
101
 
96
102
  ```bash
97
103
  pi --no-extensions -e https://github.com/fitchmultz/pi-agent-browser-native
98
104
  ```
99
105
 
100
- This avoids duplicate `agent_browser` registrations when you already have `pi-agent-browser-native` installed globally.
101
-
102
- ### Current practical local-checkout flows
103
-
104
- This repository's `package.json` is itself a publishable pi package manifest that points at `extensions/agent-browser/index.ts`. That file is the real extension entrypoint for both the checkout and the published package.
105
-
106
- Use two local-checkout modes intentionally:
107
-
108
- - **Quick isolated smoke test:** run the checkout explicitly with `-e` and disable extension discovery:
109
-
110
- ```bash
111
- pi --no-extensions -e /absolute/path/to/pi-agent-browser-native
112
- ```
113
-
114
- This bypasses Pi settings and any configured checkout/global package sources, so it avoids duplicate `agent_browser` registrations. After editing extension code, restart this `pi` process to validate the new source; do not use this mode as proof that configured-source `/reload` works.
106
+ ## First-run health check
115
107
 
116
- - **Configured-source lifecycle validation:** run `npm run verify -- lifecycle` for the opt-in automated tmux harness, or keep exactly one active source for this extension in Pi settings and launch plain `pi` for manual checks. Use this mode when validating `/reload`, full restart, and `/resume` behavior because Pi's reload flow operates on discovered/configured resources.
108
+ Run the read-only doctor when installing, upgrading, or debugging missing/duplicated tools:
117
109
 
118
- The native tool exposed to the agent is named `agent_browser`.
119
-
120
- The primary session control parameter is `sessionMode`:
110
+ ```bash
111
+ pi-agent-browser-doctor
112
+ # one-off without permanent install:
113
+ npm exec --package pi-agent-browser-native -- pi-agent-browser-doctor
114
+ # from this checkout:
115
+ npm run doctor
116
+ ```
121
117
 
122
- - `"auto"` (default) reuses the extension-managed `pi`-scoped session when possible
123
- - `"fresh"` switches that managed session to a fresh upstream launch so launch-scoped flags like `--profile`, `--session-name`, `--cdp`, `--state`, `--auto-connect`, `--init-script`, and `--enable` apply and later auto calls follow the new browser
118
+ The doctor checks:
124
119
 
125
- ## Agent quick start
120
+ - upstream `agent-browser` exists on `PATH`
121
+ - the installed upstream version matches this wrapper's command-reference baseline
122
+ - Pi settings do not point at multiple active `pi-agent-browser-native` sources
126
123
 
127
- ### Mental model
124
+ It does **not** edit Pi settings and does **not** run upstream `agent-browser doctor --fix`.
128
125
 
129
- - `args` exact CLI args after `agent-browser`
130
- - `stdin` — raw stdin only for `batch` and `eval --stdin` (other command/stdin combinations are rejected before `agent-browser` is launched)
131
- - `sessionMode`
132
- - `"auto"` — default, reuse the extension-managed `pi`-scoped session
133
- - `"fresh"` — switch that managed session to a new profile/debug launch
126
+ ## Common agent calls
134
127
 
135
- ### Common call shapes
128
+ You usually prompt the agent in natural language. These JSON snippets show the exact native tool shape the agent should use.
136
129
 
137
- Open a page, then take an interactive snapshot:
130
+ Open a page and inspect it:
138
131
 
139
132
  ```json
140
133
  { "args": ["open", "https://example.com"] }
141
134
  { "args": ["snapshot", "-i"] }
142
135
  ```
143
136
 
144
- Click a ref, then re-snapshot after navigation or a major DOM change:
137
+ Click a visible ref, then refresh refs after navigation or a DOM update:
145
138
 
146
139
  ```json
147
140
  { "args": ["click", "@e2"] }
148
141
  { "args": ["snapshot", "-i"] }
149
142
  ```
150
143
 
151
- Run a multi-step browser flow in one tool call:
144
+ Run a multi-step flow in one tool call:
152
145
 
153
146
  ```json
154
147
  { "args": ["batch"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }
155
148
  ```
156
149
 
157
- Evaluate page JavaScript via stdin:
150
+ Evaluate page JavaScript through stdin:
158
151
 
159
152
  ```json
160
153
  { "args": ["eval", "--stdin"], "stdin": "document.title" }
161
154
  ```
162
155
 
163
- Download a file from a known link/control directly:
156
+ Save an auth profile without putting the password in `args`:
157
+
158
+ ```json
159
+ { "args": ["auth", "save", "demo", "--password-stdin"], "stdin": "<password>" }
160
+ ```
161
+
162
+ Download a file from a known link or control:
164
163
 
165
164
  ```json
166
165
  { "args": ["download", "@e5", "/tmp/report.pdf"] }
167
166
  ```
168
167
 
169
- For dashboards that start an export asynchronously after a click or navigation, click first and then wait for the download. The wrapper reports `Download completed: /tmp/report.csv` and exposes upstream-reported `details.savedFilePath` plus `details.savedFile` for the `wait` result; with upstream `agent-browser 0.27.0`, confirm `details.artifacts[].exists` before relying on a requested `wait --download <path>` file being present on disk (tracked upstream at [vercel-labs/agent-browser#1300](https://github.com/vercel-labs/agent-browser/issues/1300)):
168
+ For asynchronous exports, click first and then wait for the download:
170
169
 
171
170
  ```json
172
171
  { "args": ["click", "@export"] }
173
172
  { "args": ["wait", "--download", "/tmp/report.csv"] }
174
173
  ```
175
174
 
176
- Batch flows preserve the same saved-file metadata on the wait step:
175
+ With upstream `agent-browser 0.27.0`, treat `details.savedFilePath` as upstream-reported metadata and confirm `details.artifacts[].exists` before relying on the requested `wait --download <path>` file being present on disk.
176
+
177
+ Start a fresh profiled browser after the implicit public-browsing session already exists:
177
178
 
178
179
  ```json
179
- { "args": ["batch"], "stdin": "[[\"click\",\"@export\"],[\"wait\",\"--download\",\"/tmp/report.csv\"]]" }
180
+ { "args": ["--profile", "Default", "open", "https://example.com/account"], "sessionMode": "fresh" }
180
181
  ```
181
182
 
182
- Start a fresh profiled launch after you already used the implicit session:
183
+ After a successful unnamed fresh launch, later default `sessionMode: "auto"` calls follow that browser automatically.
184
+
185
+ ## Authenticated/profile workflows
186
+
187
+ The wrapper does not clone profiles or hide what upstream Chrome profile you chose. Passing `--profile` is an explicit upstream `agent-browser` choice.
188
+
189
+ Use these rules:
190
+
191
+ - Use public/temp profiles for tests and examples.
192
+ - Use `sessionMode: "fresh"` when switching from public browsing to `--profile`, `--session-name`, `--cdp`, `--state`, `--auto-connect`, `--init-script`, or `--enable`.
193
+ - Use `--session` when you want to manage a live upstream session name yourself.
194
+ - Do not treat `--session` as persisted auth or tab restore after `close`; use `--profile`, `--session-name`, or `--state` for persistence.
195
+ - Prefer page actions and storage checks over cookie dumps. `cookies get` can expose real profile cookies.
196
+ - Prefer `auth save --password-stdin` over putting passwords in `args`.
197
+
198
+ Example explicit session plus profile launch:
183
199
 
184
200
  ```json
185
- { "args": ["--profile", "Default", "open", "https://example.com/account"], "sessionMode": "fresh" }
201
+ {
202
+ "args": ["--session", "auth-flow", "--profile", "Default", "open", "https://example.com/account"]
203
+ }
186
204
  ```
187
205
 
188
- After a successful unnamed fresh launch, later `sessionMode: "auto"` calls follow that new browser automatically.
206
+ ## React, SPA, and first-navigation setup
189
207
 
190
- React and SPA tooling added upstream in `agent-browser` v0.27.0 is passed through as native tool calls. Launch React introspection with the DevTools hook before first navigation, then use the `react` commands; `vitals` and `pushstate` work as regular command tokens:
208
+ React and SPA tooling from upstream `agent-browser` is passed through directly.
209
+
210
+ Launch React introspection before first navigation:
191
211
 
192
212
  ```json
193
213
  { "args": ["open", "--enable", "react-devtools", "https://example.com"], "sessionMode": "fresh" }
@@ -196,11 +216,16 @@ React and SPA tooling added upstream in `agent-browser` v0.27.0 is passed throug
196
216
  { "args": ["react", "renders", "start"] }
197
217
  { "args": ["react", "renders", "stop"] }
198
218
  { "args": ["react", "suspense", "--only-dynamic"] }
199
- { "args": ["vitals", "https://example.com", "--json"] }
219
+ ```
220
+
221
+ Use SPA and Web Vitals helpers as normal command tokens:
222
+
223
+ ```json
200
224
  { "args": ["pushstate", "/dashboard"] }
225
+ { "args": ["vitals", "https://example.com", "--json"] }
201
226
  ```
202
227
 
203
- For first-navigation setup, launch a fresh blank page before staging routes, cookies, or scripts:
228
+ For setup that must happen before first navigation, open a blank fresh page, stage routes/cookies/scripts, then navigate:
204
229
 
205
230
  ```json
206
231
  { "args": ["open"], "sessionMode": "fresh" }
@@ -209,68 +234,93 @@ For first-navigation setup, launch a fresh blank page before staging routes, coo
209
234
  { "args": ["navigate", "https://example.com"] }
210
235
  ```
211
236
 
212
- Name a new upstream session explicitly when you want to keep reusing it yourself:
237
+ ## Proof and verification
213
238
 
214
- ```json
215
- { "args": ["--session", "auth-flow", "open", "https://example.com"] }
239
+ The local verification gate is:
240
+
241
+ ```bash
242
+ npm run verify
216
243
  ```
217
244
 
218
- ### First useful prompt in a fresh `pi` session
245
+ It runs:
219
246
 
220
- ```text
221
- Use the agent_browser tool to open https://react.dev and then take an interactive snapshot.
247
+ - generated playbook/documentation drift checks
248
+ - `tsc --noEmit`
249
+ - the test suite
250
+ - command-reference baseline checks
251
+ - live command-reference verification against the targeted installed upstream `agent-browser`
252
+
253
+ The opt-in real-upstream suite is separate because it drives a real browser installation:
254
+
255
+ ```bash
256
+ npm run verify -- real-upstream
222
257
  ```
223
258
 
259
+ For package release confidence, follow [`docs/RELEASE.md`](docs/RELEASE.md). The release gate is:
260
+
261
+ ```bash
262
+ npm run doctor
263
+ npm run verify -- release
264
+ ```
265
+
266
+ `npm run verify -- release` includes the default verification gate plus packaged Pi smoke coverage. The package also has a `prepublishOnly` hook that runs default verification and `npm pack --dry-run` during `npm publish`.
267
+
268
+ ## How it works
269
+
270
+ `pi-agent-browser-native` is intentionally thin:
271
+
272
+ 1. Pi loads `extensions/agent-browser/index.ts` from the package manifest.
273
+ 2. The extension registers one native tool named `agent_browser`.
274
+ 3. Tool calls are translated into upstream `agent-browser` CLI invocations with controlled args, stdin, environment, timeout, and session planning.
275
+ 4. Upstream JSON/plain-text output is parsed into model-friendly content and structured details.
276
+ 5. Screenshots, downloads, recordings, traces, profiles, and spill files are normalized as Pi-visible artifacts where possible.
277
+ 6. Generated playbook text in docs and tool metadata stays aligned with `extensions/agent-browser/lib/playbook.ts`.
278
+
279
+ The upstream browser engine remains [`agent-browser`](https://agent-browser.dev/). This package does not bundle it and does not maintain compatibility shims for old upstream versions.
280
+
281
+ ## Current limits
282
+
283
+ - Published pre-1.0 package.
284
+ - Targets the current locally installed upstream `agent-browser` version only.
285
+ - Does not bundle `agent-browser`; users install it separately.
286
+ - Does not provide a human browser UI inside Pi; the primary UX is agent-invoked tool calls.
287
+ - Real authenticated profile use is powerful but sensitive. Treat profile and cookie access as user-approved, task-specific behavior.
288
+ - Wrapper tab/session recovery is best effort around observed upstream behavior, not a replacement for explicit profile/session design.
289
+
224
290
  ## Local development
225
291
 
226
- Do not track or rely on a repo-local `.pi/extensions/agent-browser.ts` autoload shim for this package. That creates an unnecessary second registration path.
292
+ Install upstream `agent-browser`, then install dependencies:
227
293
 
228
- The published entrypoint lives at `extensions/agent-browser/index.ts` and is referenced directly from this repo's `package.json`.
294
+ ```bash
295
+ npm install
296
+ ```
229
297
 
230
- Recommended local development setup:
231
- 1. Install `agent-browser` separately via the upstream project.
232
- 2. Run `npm install`.
233
- 3. For a quick checkout-only smoke test, launch `pi` from this repository root with discovery disabled:
298
+ Quick isolated checkout smoke test:
234
299
 
235
300
  ```bash
236
301
  pi --no-extensions -e .
237
302
  ```
238
303
 
239
- 4. Prompt the agent to use `agent_browser`.
240
- 5. For hot-reload or resume validation, run `npm run verify -- lifecycle` or configure exactly one active source for this extension in Pi settings, launch plain `pi`, and exercise `/reload` plus restart/`/resume`. Settings matter only in this configured-source mode; they are bypassed by `--no-extensions -e .`. See [`docs/RELEASE.md`](docs/RELEASE.md) for the automated harness behavior, cleanup, and transcript retention details.
304
+ This bypasses Pi settings and configured extensions. After editing extension code, restart that Pi process to test the new checkout.
241
305
 
242
- Example prompt:
306
+ Configured-source lifecycle validation:
243
307
 
244
- ```text
245
- Use the agent_browser tool to open https://react.dev and then take an interactive snapshot.
308
+ ```bash
309
+ npm run verify -- lifecycle
246
310
  ```
247
311
 
248
- For installed-package validation after a release, use exactly one active source. The canonical isolated validation sequence is:
312
+ Use lifecycle validation when testing `/reload`, full restart, `/resume`, managed-session continuity, or persisted artifact behavior.
313
+
314
+ Installed-package validation after publish:
249
315
 
250
316
  ```bash
251
317
  npm run verify -- package-pi
252
318
  pi --no-extensions -e npm:pi-agent-browser-native@<version>
253
319
  ```
254
320
 
255
- Only use plain `pi` for installed-package validation after disabling or removing the checkout source from Pi settings.
256
-
257
- Validated workflow examples:
258
-
259
- - open a page and snapshot it
260
- - click a link and confirm the destination title
261
- - use an explicit `--session` across multiple tool calls
262
- - use an explicit `--profile` and verify persisted browser storage across restarts
263
- - open `chat.com` or `chatgpt.com` headlessly with `--profile Default` without forcing `--headed` or `--auto-connect`
264
- - in configured-source lifecycle mode, verify `/reload` and full restart + `/resume` keep following the same implicit managed browser session
265
- - run `batch` with JSON via `stdin`
266
- - run `eval --stdin`
267
- - take a screenshot with inline attachment support and visible artifact metadata: artifact type, requested path, absolute path, existence, size, cwd, session, and repair/copy status when applicable
268
- - inspect upstream help/version through native tool calls like `{ "args": ["--help"] }` and `{ "args": ["--version"] }` via the tool's stateless plain-text inspection fallback
269
- - use `download <selector> <path>` for direct attachment/file-save workflows instead of trying to infer downloads from generic clicks or large eval dumps
270
- - for `.dogfood/...` or other dot-directory screenshot paths, rely on the wrapper's path normalization/repair contract; the visible result reports the requested path and absolute path rather than only an upstream temp path
271
- - use `click` plus `wait --download <path>` for asynchronous export flows, confirm `details.savedFilePath`/`details.savedFile` are present on the wait result or batch wait step, and check `details.artifacts[].exists` before relying on requested-path persistence
272
- - confirm oversized outputs show the actual spill file path directly in tool content, not just a details key name
273
- - inspect `details.artifactManifest` / `details.artifactRetentionSummary` during artifact-heavy flows to recover recent saved files, spill files, and visible eviction state after reload/resume
321
+ ## Generated native-tool playbook notes
322
+
323
+ These sections are generated from `extensions/agent-browser/lib/playbook.ts`. Run `npm run docs -- playbook write` after changing the canonical playbook source.
274
324
 
275
325
  <!-- agent-browser-playbook:start inspection -->
276
326
  <!-- Generated from extensions/agent-browser/lib/playbook.ts. Run `npm run docs -- playbook write` to update. -->
@@ -282,14 +332,6 @@ Native inspection calls use the `agent_browser` tool shape, not shell-like direc
282
332
  These calls return plain text and stay stateless: the extension does not inject its implicit session and does not let inspection consume the managed-session slot needed for later profile, session, CDP, state, or auto-connect launches.
283
333
  <!-- agent-browser-playbook:end inspection -->
284
334
 
285
- Current cautions:
286
- - passing `--profile` is an explicit upstream choice; this extension does not add its own profile-cloning or isolation layer
287
- - launch-scoped flags like `--profile`, `--session-name`, `--cdp`, `--state`, `--auto-connect`, `--init-script`, and `--enable` are for the first command that launches a session; if the implicit session is already active, retry that call with `sessionMode: "fresh"` or provide an explicit `--session ...` for the new launch
288
- - implicit `piab-*` sessions are extension-managed convenience sessions; they stay alive across `/reload` and resumable session transitions so later default calls can keep following the active managed browser on `/reload` or `/resume`, close when the originating `pi` process quits, rely on the configured idle timeout only as an abnormal-exit backstop, store persisted-session large snapshot spill files under a private session-scoped artifact directory with a bounded per-session budget so `details.fullOutputPath` and metadata-only `details.artifactManifest` survive reload/resume without unbounded growth, and still clean up process-private temp spill artifacts on shutdown
289
- - `sessionMode: "fresh"` without an explicit `--session` rotates that extension-managed session to the new browser so later auto calls keep using it
290
- - for local Unix launches, the wrapper uses a short private socket directory under `/tmp` so extension-generated session names do not trip upstream Unix socket-path limits in longer cwd/session-name combinations
291
- - wrapper-spawned commands clamp `AGENT_BROWSER_DEFAULT_TIMEOUT` to 25 seconds and use a 28-second process watchdog so a single upstream CLI call does not cross the upstream 30-second IPC read-timeout/retry path; split intentionally long waits into shorter tool calls
292
- - for direct headless local Chrome launches to `chat.com`, `chatgpt.com`, and `chat.openai.com`, the extension injects a normal Chrome user agent when the caller did not explicitly provide `--user-agent`; this keeps the default headless workflow usable without forcing `--headed` or `--auto-connect`
293
335
  <!-- agent-browser-playbook:start wrapper-tab-recovery -->
294
336
  <!-- Generated from extensions/agent-browser/lib/playbook.ts. Run `npm run docs -- playbook write` to update. -->
295
337
  - After launch-scoped open/goto/navigate calls that can restore existing tabs (for example --profile, --session-name, or --state), agent_browser best-effort re-selects the tab whose URL matches the returned page when restored tabs steal focus during launch.
@@ -297,59 +339,32 @@ Current cautions:
297
339
  - After a successful command on a known target tab, agent_browser also best-effort restores that intended tab if a restored/background tab steals focus after the command completes.
298
340
  - If a known session target unexpectedly reports about:blank, agent_browser preserves the prior intended target, best-effort re-selects it when it still exists, and reports exact recovery guidance when it cannot be re-selected.
299
341
  <!-- agent-browser-playbook:end wrapper-tab-recovery -->
300
- - oversized snapshots and oversized generic outputs compact inline content and print the actual spill file path directly in the tool result when a spill file exists; recent spills and explicit saved artifacts are also summarized in `details.artifactManifest`, including `evicted` entries when retention budgets remove older persisted files
301
- - artifact-producing commands render direct readable artifact metadata in visible content and `details.artifacts`: `kind`/`artifactType`, `path`, `requestedPath`, `absolutePath`, `exists`, `sizeBytes`, `status`, `cwd`, `session`, and `tempPath` when the wrapper repaired an upstream temp fallback
302
- - if the caller explicitly passes `--json`, the visible text content is valid JSON; for `stream status`, the wrapper enriches data with `wsUrl` and `frameFormat`
303
- - `trace` and `profiler` share upstream tracing machinery; the wrapper blocks starts/stops that conflict with owner state it observed in the current Pi session, but the message says "wrapper believes" because upstream or external CLI calls can desynchronize that local state
304
- - explicit caller-provided `--session` values are treated as user-managed and are not auto-closed by the extension
305
- - explicit caller-provided `--user-agent` values win over the ChatGPT/OpenAI compatibility workaround
306
- - tool progress/details redact sensitive invocation values such as `--headers`, proxy credentials, and auth-bearing URL parameters before echoing them back into Pi
307
-
308
- ### Switching from public browsing to a fresh profile/debug launch
309
-
310
- A common agent workflow is:
311
-
312
- 1. browse a public page with the default implicit session
313
- 2. then switch to a fresh authenticated/profile/debug launch
314
-
315
- Use `sessionMode: "fresh"` for that transition instead of relying on the implicit session:
316
-
317
- ```json
318
- {
319
- "args": ["--profile", "Default", "open", "https://example.com/account"],
320
- "sessionMode": "fresh"
321
- }
322
- ```
323
-
324
- After that call succeeds, later default `sessionMode: "auto"` calls continue in the new fresh browser.
325
-
326
- If you want to name the new upstream session yourself, pass an explicit session instead:
327
-
328
- ```json
329
- {
330
- "args": ["--session", "auth-flow", "--profile", "Default", "open", "https://example.com/account"]
331
- }
332
- ```
333
342
 
334
- ## Docs
343
+ ## Project map
335
344
 
336
- - [`docs/REQUIREMENTS.md`](docs/REQUIREMENTS.md) product requirements and constraints
337
- - [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) — current architecture decision
338
- - [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md) proposed v1 tool shape
339
- - [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md) local repo-readable command reference for the blocked direct-binary path
340
- - [`docs/RELEASE.md`](docs/RELEASE.md) maintainer release and package verification workflow
345
+ | Path | Purpose |
346
+ |---|---|
347
+ | `extensions/agent-browser/index.ts` | Pi extension entrypoint and native tool wrapper |
348
+ | `extensions/agent-browser/lib/runtime.ts` | Args, session planning, redaction, process, and runtime helpers |
349
+ | `extensions/agent-browser/lib/results/` | Model-facing result rendering and error guidance |
350
+ | `extensions/agent-browser/lib/playbook.ts` | Canonical generated agent/browser guidance |
351
+ | `docs/COMMAND_REFERENCE.md` | Repo-readable native command reference |
352
+ | `docs/TOOL_CONTRACT.md` | Tool parameters, result shape, and behavior contract |
353
+ | `docs/ARCHITECTURE.md` | Design decisions and implementation structure |
354
+ | `docs/REQUIREMENTS.md` | Product requirements and constraints |
355
+ | `docs/RELEASE.md` | Release, package, and lifecycle verification workflow |
356
+ | `test/` | Wrapper, runtime, presentation, lifecycle, and package tests |
341
357
 
342
- ## Documentation rule
358
+ ## More docs
343
359
 
344
- When requirements change in chat:
360
+ - [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md) full native command reference and upstream capability baseline
361
+ - [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md) — exact tool contract
362
+ - [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) — how the wrapper is designed
363
+ - [`docs/REQUIREMENTS.md`](docs/REQUIREMENTS.md) — product constraints and non-goals
364
+ - [`docs/RELEASE.md`](docs/RELEASE.md) — maintainer release workflow
345
365
 
346
- 1. update `docs/REQUIREMENTS.md`
347
- 2. update the affected design docs
348
- 3. update this README if user-facing expectations changed
366
+ ## Next action
349
367
 
350
- When the upstream `agent-browser` binary changes:
368
+ If you are a user, install the package and ask Pi to open a public page with `agent_browser`.
351
369
 
352
- 1. re-check the upstream command/help surface
353
- 2. update `docs/COMMAND_REFERENCE.md`
354
- 3. update tool guidance, README, and release docs if behavior or recommended usage changed
355
- 4. verify the blocked direct-binary path still has an equally usable local extension-side documentation path
370
+ If you are evaluating the implementation, read [`extensions/agent-browser/index.ts`](extensions/agent-browser/index.ts), then run `npm run verify`.
@@ -31,7 +31,7 @@ The extension should:
31
31
  - resolve `agent-browser` from `PATH`
32
32
  - invoke it directly, not through a shell
33
33
  - inject `--json`
34
- - support optional stdin only for `eval --stdin` and `batch`, rejecting other command/stdin combinations before launch
34
+ - support optional stdin only for `eval --stdin`, `batch`, and `auth save --password-stdin`, rejecting other command/stdin combinations before launch
35
35
 
36
36
  ### Agent-first UX
37
37
 
@@ -34,7 +34,7 @@ Tool parameters:
34
34
  ```
35
35
 
36
36
  - `args`: exact `agent-browser` CLI tokens after the binary name.
37
- - `stdin`: only for `batch` and `eval --stdin`; other command/stdin combinations are rejected before `agent-browser` is launched.
37
+ - `stdin`: only for `batch`, `eval --stdin`, and `auth save --password-stdin`; other command/stdin combinations are rejected before `agent-browser` is launched.
38
38
  - `sessionMode`:
39
39
  - `"auto"` reuses the extension-managed session when possible.
40
40
  - `"fresh"` rotates that managed session to a fresh upstream launch so launch-scoped flags like `--profile`, `--session-name`, `--cdp`, `--state`, `--auto-connect`, `--init-script`, or `--enable` apply.
@@ -220,7 +220,7 @@ The tables below intentionally list more than the recommended workflow. Rare com
220
220
 
221
221
  ### Built-in skills
222
222
 
223
- Native-tool note: upstream skills are written for the standalone `agent-browser` CLI and may show bash/heredoc examples. In pi, convert those examples to `agent_browser` calls: pass CLI tokens in `args`, and pass heredoc/stdin bodies through the tool `stdin` field for `batch` or `eval --stdin`.
223
+ Native-tool note: upstream skills are written for the standalone `agent-browser` CLI and may show bash/heredoc examples. In pi, convert those examples to `agent_browser` calls: pass CLI tokens in `args`, and pass heredoc/stdin bodies through the tool `stdin` field for `batch`, `eval --stdin`, or `auth save --password-stdin`.
224
224
 
225
225
  | Command | Purpose |
226
226
  | --- | --- |
@@ -300,9 +300,11 @@ These calls return plain text and stay stateless: the extension does not inject
300
300
  | `cookies [get|set|clear]` | Manage cookies. `set` supports `--url`, `--domain`, `--path`, `--httpOnly`, `--secure`, `--sameSite`, `--expires`, and `--curl <file>` for JSON, cURL, or bare Cookie-header bulk imports. |
301
301
  | `storage <local|session>` | Manage web storage. |
302
302
 
303
+ Privacy note: `cookies get` can expose real profile cookies. Do not run it against `--profile Default` or other authenticated profiles unless the user explicitly needs cookie inspection; prefer task-specific page actions and storage checks.
304
+
303
305
  ### Tabs
304
306
 
305
- Stable tab ids look like `t1`, `t2`, and `t3`. Optional user labels such as `docs` or `app` are interchangeable with ids wherever a tab reference is accepted.
307
+ Stable tab ids look like `t1`, `t2`, and `t3`. Optional user labels such as `docs` or `app` are interchangeable with ids wherever a tab reference is accepted. Upstream help may refer to numeric tab positions, but this wrapper guidance uses stable `t<N>` ids because positional integers are not accepted by current upstream `agent-browser`.
306
308
 
307
309
  | Command | Purpose |
308
310
  | --- | --- |
@@ -377,7 +379,7 @@ When these diagnostic commands are invoked through the native `agent_browser` to
377
379
  | Command | Purpose |
378
380
  | --- | --- |
379
381
  | `batch [--bail] ["cmd" ...]` | Execute multiple commands sequentially from args or stdin. |
380
- | `auth save <name> [opts]` | Save an auth profile with options such as `--url`, `--username`, `--password`, or `--password-stdin`. |
382
+ | `auth save <name> [opts]` | Save an auth profile with options such as `--url`, `--username`, `--password`, or `--password-stdin`. Prefer `--password-stdin` with the tool `stdin` field; avoid putting passwords in `args`. |
381
383
  | `auth login <name>` | Login using saved credentials. |
382
384
  | `auth list` | List saved auth profiles. |
383
385
  | `auth show <name>` | Show auth profile metadata. |
@@ -78,7 +78,7 @@ Examples:
78
78
 
79
79
  - type: `string`
80
80
  - optional
81
- - raw stdin for `eval --stdin` and `batch`
81
+ - raw stdin for `eval --stdin`, `batch`, and `auth save --password-stdin`
82
82
  - rejected before launch for any other command/stdin combination, including commands such as `click`, `snapshot`, or `open`
83
83
 
84
84
  Examples:
@@ -91,6 +91,10 @@ Examples:
91
91
  { "args": ["batch"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }
92
92
  ```
93
93
 
94
+ ```json
95
+ { "args": ["auth", "save", "my-login", "--password-stdin"], "stdin": "password from the user-approved secret source" }
96
+ ```
97
+
94
98
  ### `sessionMode`
95
99
 
96
100
  - type: `"auto" | "fresh"`
@@ -222,7 +226,7 @@ If `agent-browser` is not on `PATH`, fail with a message that:
222
226
  - reconstruct the current extension-managed session and latest `artifactManifest` from persisted tool details on resume/reload so later default calls keep following the active managed browser and can continue reporting artifact retention state
223
227
  - when an unnamed `sessionMode: "fresh"` launch succeeds, make it the new extension-managed session so later default calls keep using it
224
228
  - if that unnamed fresh launch replaced an already-active managed session, best-effort close the old managed session after the switch succeeds
225
- - treat explicit caller-provided `--session` choices as user-managed
229
+ - treat explicit caller-provided `--session` choices as user-managed; `--session` isolates a live browser session but is not a persisted tab/auth restore mechanism after `close`, so use `--profile`, `--session-name`, or `--state` when persisted auth/tab state is required
226
230
  - pass explicit `--profile` straight through to upstream `agent-browser`; no profile-cloning or isolation layer is added in v1
227
231
  <!-- agent-browser-playbook:start wrapper-tab-recovery -->
228
232
  <!-- Generated from extensions/agent-browser/lib/playbook.ts. Run `npm run docs -- playbook write` to update. -->
@@ -9,8 +9,8 @@
9
9
  import { copyFile, mkdir, readFile, rm, stat } from "node:fs/promises";
10
10
  import { dirname, extname, isAbsolute, join, resolve } from "node:path";
11
11
 
12
- import { StringEnum } from "@mariozechner/pi-ai";
13
- import { isToolCallEventType, type AgentToolResult, type ExtensionAPI } from "@mariozechner/pi-coding-agent";
12
+ import { StringEnum } from "@earendil-works/pi-ai";
13
+ import { isToolCallEventType, type AgentToolResult, type ExtensionAPI } from "@earendil-works/pi-coding-agent";
14
14
  import { Type } from "typebox";
15
15
 
16
16
  import {
@@ -73,7 +73,7 @@ const AGENT_BROWSER_PARAMS = Type.Object({
73
73
  description: "Exact agent-browser CLI arguments, excluding the binary name and any shell operators.",
74
74
  minItems: 1,
75
75
  }),
76
- stdin: Type.Optional(Type.String({ description: "Optional raw stdin content; only supported for batch and eval --stdin." })),
76
+ stdin: Type.Optional(Type.String({ description: "Optional raw stdin content; only supported for batch, eval --stdin, and auth save --password-stdin." })),
77
77
  sessionMode: Type.Optional(
78
78
  StringEnum(["auto", "fresh"] as const, {
79
79
  description:
@@ -936,6 +936,45 @@ function restoreArtifactManifestFromBranch(branch: unknown[]): SessionArtifactMa
936
936
  return restoredManifest;
937
937
  }
938
938
 
939
+ function isPasswordStdinAuthSave(options: { command?: string; commandTokens: string[] }): boolean {
940
+ return options.command === "auth" && options.commandTokens[1] === "save" && options.commandTokens.includes("--password-stdin");
941
+ }
942
+
943
+ function getExactSensitiveStdinValues(options: { command?: string; commandTokens: string[]; stdin?: string }): string[] {
944
+ if (options.stdin === undefined || !isPasswordStdinAuthSave(options)) {
945
+ return [];
946
+ }
947
+ return [...new Set([options.stdin, options.stdin.trimEnd(), options.stdin.trim()].filter((value) => value.length > 0))];
948
+ }
949
+
950
+ function redactExactSensitiveText(text: string, sensitiveValues: string[]): string {
951
+ let redacted = text;
952
+ for (const value of sensitiveValues) {
953
+ redacted = redacted.split(value).join("[REDACTED]");
954
+ }
955
+ return redacted;
956
+ }
957
+
958
+ function redactExactSensitiveValue(value: unknown, sensitiveValues: string[]): unknown {
959
+ if (sensitiveValues.length === 0) {
960
+ return value;
961
+ }
962
+ if (typeof value === "string") {
963
+ return redactExactSensitiveText(value, sensitiveValues);
964
+ }
965
+ if (Array.isArray(value)) {
966
+ return value.map((item) => redactExactSensitiveValue(item, sensitiveValues));
967
+ }
968
+ if (!isRecord(value)) {
969
+ return value;
970
+ }
971
+ return Object.fromEntries(Object.entries(value).map(([key, entryValue]) => [key, redactExactSensitiveValue(entryValue, sensitiveValues)]));
972
+ }
973
+
974
+ function redactToolDetails(details: Record<string, unknown>, sensitiveValues: string[]): Record<string, unknown> {
975
+ return redactSensitiveValue(redactExactSensitiveValue(details, sensitiveValues)) as Record<string, unknown>;
976
+ }
977
+
939
978
  function validateStdinCommandContract(options: { command?: string; commandTokens: string[]; stdin?: string }): string | undefined {
940
979
  if (options.stdin === undefined) {
941
980
  return undefined;
@@ -946,8 +985,11 @@ function validateStdinCommandContract(options: { command?: string; commandTokens
946
985
  if (options.command === "eval" && options.commandTokens.includes("--stdin")) {
947
986
  return undefined;
948
987
  }
988
+ if (isPasswordStdinAuthSave(options)) {
989
+ return undefined;
990
+ }
949
991
  const commandLabel = options.command ? `\`${options.command}\`` : "the requested command";
950
- return `agent_browser stdin is only supported for \`batch\` and \`eval --stdin\`; remove stdin from ${commandLabel} or use one of those command forms.`;
992
+ return `agent_browser stdin is only supported for \`batch\`, \`eval --stdin\`, and \`auth save --password-stdin\`; remove stdin from ${commandLabel} or use one of those command forms.`;
951
993
  }
952
994
 
953
995
  function supportsPinnedStdinCommand(options: { command?: string; commandTokens: string[]; stdin?: string }): boolean {
@@ -1029,6 +1071,17 @@ function parseUserBatchStdin(stdin: string | undefined): { error?: string; steps
1029
1071
  }
1030
1072
  }
1031
1073
 
1074
+ function getStaleRefArgs(commandTokens: string[], stdin?: string): string[] {
1075
+ if (commandTokens[0] !== "batch" || stdin === undefined) {
1076
+ return commandTokens;
1077
+ }
1078
+ const parsed = parseUserBatchStdin(stdin);
1079
+ if (parsed.error || parsed.steps === undefined) {
1080
+ return commandTokens;
1081
+ }
1082
+ return parsed.steps.flatMap((step) => step);
1083
+ }
1084
+
1032
1085
  function buildPinnedBatchPlan(options: {
1033
1086
  command?: string;
1034
1087
  commandTokens: string[];
@@ -1293,6 +1346,7 @@ function getPersistentSessionArtifactStore(ctx: {
1293
1346
 
1294
1347
  async function preserveParseFailureOutput(options: {
1295
1348
  artifactManifest?: SessionArtifactManifest;
1349
+ exactSensitiveValues?: string[];
1296
1350
  persistentArtifactStore?: PersistentSessionArtifactStore;
1297
1351
  stdoutSpillPath?: string;
1298
1352
  }): Promise<{
@@ -1306,7 +1360,7 @@ async function preserveParseFailureOutput(options: {
1306
1360
  }
1307
1361
 
1308
1362
  try {
1309
- const rawOutput = await readFile(options.stdoutSpillPath);
1363
+ const rawOutput = redactExactSensitiveText(await readFile(options.stdoutSpillPath, "utf8"), options.exactSensitiveValues ?? []);
1310
1364
  const nowMs = Date.now();
1311
1365
  let evictedArtifacts: PersistentSessionArtifactEviction[] = [];
1312
1366
  let fullOutputPath: string;
@@ -1546,6 +1600,11 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
1546
1600
  }
1547
1601
 
1548
1602
  const commandTokens = extractCommandTokens(preparedArgs.args);
1603
+ const exactSensitiveValues = getExactSensitiveStdinValues({
1604
+ command: executionPlan.commandInfo.command,
1605
+ commandTokens,
1606
+ stdin: params.stdin,
1607
+ });
1549
1608
  const traceOwnerGuardMessage = getTraceOwnerGuardMessage({
1550
1609
  command: executionPlan.commandInfo.command,
1551
1610
  sessionName: executionPlan.sessionName,
@@ -1755,9 +1814,13 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
1755
1814
  presentationEnvelope = repairedBatchScreenshots.envelope;
1756
1815
  const screenshotArtifactRequest = repairedScreenshot.request;
1757
1816
  const batchScreenshotArtifactRequests = repairedBatchScreenshots.requests;
1817
+ if (presentationEnvelope && exactSensitiveValues.length > 0) {
1818
+ presentationEnvelope = redactExactSensitiveValue(presentationEnvelope, exactSensitiveValues) as AgentBrowserEnvelope;
1819
+ }
1758
1820
  const parseFailureOutput = parseError
1759
1821
  ? await preserveParseFailureOutput({
1760
1822
  artifactManifest,
1823
+ exactSensitiveValues,
1761
1824
  persistentArtifactStore,
1762
1825
  stdoutSpillPath: processResult.stdoutSpillPath,
1763
1826
  })
@@ -1934,6 +1997,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
1934
1997
  exitCode: processResult.exitCode,
1935
1998
  parseError,
1936
1999
  plainTextInspection,
2000
+ staleRefArgs: getStaleRefArgs(commandTokens, params.stdin),
1937
2001
  spawnError: processResult.spawnError,
1938
2002
  stderr: processResult.stderr,
1939
2003
  timedOut: processResult.timedOut,
@@ -2009,54 +2073,55 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
2009
2073
  contentWithSessionWarnings.unshift({ type: "text", text: warningText });
2010
2074
  }
2011
2075
  }
2012
- const redactedContent = contentWithSessionWarnings.map((item) =>
2013
- item.type === "text" && !(userRequestedJson && !plainTextInspection) ? { ...item, text: redactSensitiveText(item.text) } : item,
2014
- );
2076
+ const redactedContent = contentWithSessionWarnings.map((item) => {
2077
+ if (item.type !== "text") return item;
2078
+ const exactRedactedText = redactExactSensitiveText(item.text, exactSensitiveValues);
2079
+ return userRequestedJson && !plainTextInspection
2080
+ ? { ...item, text: exactRedactedText }
2081
+ : { ...item, text: redactSensitiveText(exactRedactedText) };
2082
+ });
2083
+ const details = {
2084
+ args: redactedArgs,
2085
+ artifactManifest: presentation.artifactManifest,
2086
+ artifactRetentionSummary: presentation.artifactRetentionSummary,
2087
+ artifacts: presentation.artifacts,
2088
+ batchFailure: presentation.batchFailure,
2089
+ batchSteps: presentation.batchSteps,
2090
+ command: executionPlan.commandInfo.command,
2091
+ compatibilityWorkaround,
2092
+ subcommand: executionPlan.commandInfo.subcommand,
2093
+ data: presentation.data,
2094
+ error: plainTextInspection ? undefined : presentationEnvelope?.error,
2095
+ inspection: plainTextInspection || undefined,
2096
+ navigationSummary,
2097
+ aboutBlankSessionMismatch,
2098
+ openResultTabCorrection,
2099
+ effectiveArgs: redactedProcessArgs,
2100
+ exitCode: processResult.exitCode,
2101
+ fullOutputPath: parseFailureOutput.fullOutputPath ?? presentation.fullOutputPath,
2102
+ fullOutputPaths: presentation.fullOutputPaths,
2103
+ fullOutputUnavailable: parseFailureOutput.fullOutputUnavailable,
2104
+ imagePath: presentation.imagePath,
2105
+ imagePaths: presentation.imagePaths,
2106
+ parseError: plainTextInspection ? undefined : parseError,
2107
+ savedFile: presentation.savedFile,
2108
+ savedFilePath: presentation.savedFilePath,
2109
+ sessionMode,
2110
+ sessionTabCorrection,
2111
+ sessionTabTarget: currentSessionTabTarget,
2112
+ ...buildSessionDetailFields(executionPlan.sessionName, executionPlan.usedImplicitSession),
2113
+ sessionRecoveryHint: redactedRecoveryHint,
2114
+ startupScopedFlags: executionPlan.startupScopedFlags,
2115
+ stderr: processResult.stderr,
2116
+ stdout: plainTextInspection ? inspectionText ?? "" : parseSucceeded ? undefined : processResult.stdout,
2117
+ summary: presentation.summary,
2118
+ timedOut: processResult.timedOut || undefined,
2119
+ timeoutMs: processResult.timeoutMs,
2120
+ };
2015
2121
 
2016
2122
  return {
2017
2123
  content: redactedContent,
2018
- details: {
2019
- args: redactedArgs,
2020
- artifactManifest: redactSensitiveValue(presentation.artifactManifest),
2021
- artifactRetentionSummary: presentation.artifactRetentionSummary,
2022
- artifacts: redactSensitiveValue(presentation.artifacts),
2023
- batchFailure: redactSensitiveValue(presentation.batchFailure),
2024
- batchSteps: redactSensitiveValue(presentation.batchSteps),
2025
- command: executionPlan.commandInfo.command,
2026
- compatibilityWorkaround,
2027
- subcommand: executionPlan.commandInfo.subcommand,
2028
- data: redactSensitiveValue(presentation.data),
2029
- error: plainTextInspection ? undefined : redactSensitiveValue(presentationEnvelope?.error),
2030
- inspection: plainTextInspection || undefined,
2031
- navigationSummary: redactSensitiveValue(navigationSummary),
2032
- aboutBlankSessionMismatch: redactSensitiveValue(aboutBlankSessionMismatch),
2033
- openResultTabCorrection: redactSensitiveValue(openResultTabCorrection),
2034
- effectiveArgs: redactedProcessArgs,
2035
- exitCode: processResult.exitCode,
2036
- fullOutputPath: parseFailureOutput.fullOutputPath ?? presentation.fullOutputPath,
2037
- fullOutputPaths: presentation.fullOutputPaths,
2038
- fullOutputUnavailable: parseFailureOutput.fullOutputUnavailable,
2039
- imagePath: presentation.imagePath,
2040
- imagePaths: presentation.imagePaths,
2041
- parseError: plainTextInspection ? undefined : parseError,
2042
- savedFile: redactSensitiveValue(presentation.savedFile),
2043
- savedFilePath: presentation.savedFilePath ? redactSensitiveText(presentation.savedFilePath) : undefined,
2044
- sessionMode,
2045
- sessionTabCorrection: redactSensitiveValue(sessionTabCorrection),
2046
- sessionTabTarget: redactSensitiveValue(currentSessionTabTarget),
2047
- ...buildSessionDetailFields(executionPlan.sessionName, executionPlan.usedImplicitSession),
2048
- sessionRecoveryHint: redactedRecoveryHint,
2049
- startupScopedFlags: executionPlan.startupScopedFlags,
2050
- stderr: processResult.stderr ? redactSensitiveText(processResult.stderr) : undefined,
2051
- stdout: plainTextInspection
2052
- ? redactSensitiveText(inspectionText ?? "")
2053
- : parseSucceeded
2054
- ? undefined
2055
- : redactSensitiveText(processResult.stdout),
2056
- summary: redactSensitiveText(presentation.summary),
2057
- timedOut: processResult.timedOut || undefined,
2058
- timeoutMs: processResult.timeoutMs,
2059
- },
2124
+ details: redactToolDetails(details, exactSensitiveValues),
2060
2125
  isError: !succeeded,
2061
2126
  };
2062
2127
  } finally {
@@ -3,7 +3,7 @@
3
3
  * Responsibilities: Define stable guidance bullets, native tool-call examples, and wrapper-behavior notes without importing runtime/browser process code.
4
4
  * Scope: Agent-facing documentation and prompt-guidance text only; command execution and wrapper state behavior live in runtime modules.
5
5
  * Usage: Imported by the extension entrypoint for promptGuidelines and by the documentation drift-check script for generated Markdown blocks.
6
- * Invariants/Assumptions: The native pi tool receives args after the agent-browser binary, stdin is only for batch/eval --stdin, and wrapper behavior documented here must match implemented behavior.
6
+ * Invariants/Assumptions: The native pi tool receives args after the agent-browser binary, stdin is only for batch/eval --stdin/auth save --password-stdin, and wrapper behavior documented here must match implemented behavior.
7
7
  */
8
8
 
9
9
  export const PROJECT_RULE_PROMPT =
@@ -14,9 +14,9 @@ export const TOOL_PROMPT_GUIDELINES_PREFIX = [
14
14
  ] as const;
15
15
 
16
16
  export const QUICK_START_GUIDELINES = [
17
- "Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch and eval --stdin, and other command/stdin combinations are rejected before launch; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new --profile, --session-name, --cdp, --state, --auto-connect, --init-script, or --enable state.",
17
+ "Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch, eval --stdin, and auth save --password-stdin, and other command/stdin combinations are rejected before launch; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new --profile, --session-name, --cdp, --state, --auto-connect, --init-script, or --enable state.",
18
18
  "Common first calls: { args: [\"open\", \"https://example.com\"] } then { args: [\"snapshot\", \"-i\"] }; after navigation, use { args: [\"click\", \"@e2\"] } then { args: [\"snapshot\", \"-i\"] }.",
19
- "Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }, and { args: [\"open\", \"--enable\", \"react-devtools\", \"https://example.com\"], sessionMode: \"fresh\" }.",
19
+ "Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, { args: [\"auth\", \"save\", \"name\", \"--password-stdin\"], stdin: \"<password from user-approved secret source>\" }, { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }, and { args: [\"open\", \"--enable\", \"react-devtools\", \"https://example.com\"], sessionMode: \"fresh\" }.",
20
20
  "High-value command reference: download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation.",
21
21
  "For artifact-producing commands, read the visible artifact block for requested path, absolute path, existence, size, type, cwd, and session; details.artifacts contains the same machine-readable metadata. For annotated screenshots inside batch, put --annotate in top-level args (for example { args: [\"--annotate\", \"batch\"], stdin: \"[[\\\"screenshot\\\",\\\"/tmp/page.png\\\"]]\" }) rather than inside the screenshot step.",
22
22
  ] as const;
@@ -47,7 +47,7 @@ export const TOOL_PROMPT_GUIDELINES_SUFFIX = [
47
47
  "Prefer agent_browser over bash for opening sites, reading docs on the web, clicking, filling, screenshots, eval, and batch workflows.",
48
48
  "Do not fall back to osascript, AppleScript, or generic browser-driving bash commands when agent_browser can do the job.",
49
49
  "Pass exact agent-browser CLI arguments in args, excluding the binary name.",
50
- "Use stdin only for eval --stdin and batch instead of shell heredocs; other command/stdin combinations are rejected before launch.",
50
+ "Use stdin only for eval --stdin, batch, and auth save --password-stdin instead of shell heredocs or password args; other command/stdin combinations are rejected before launch.",
51
51
  "Let the extension-managed session handle the common path unless you explicitly need a fresh launch for upstream flags like --profile, --session-name, --cdp, --state, --auto-connect, --init-script, or --enable.",
52
52
  "Use sessionMode=fresh when switching from an existing implicit session to a new profile/debug/init-script launch without inventing a fixed explicit session name; later auto calls will follow that new session.",
53
53
  ] as const;
@@ -135,6 +135,17 @@ function buildUpstreamIpcReadTimeoutMessage(): string {
135
135
  ].join(" ");
136
136
  }
137
137
 
138
+ function maybeAppendStaleRefHint(message: string, args?: string[]): string {
139
+ const usedRef = args?.some((arg) => /^@e\d+\b/.test(arg)) ?? false;
140
+ if (!usedRef || !/could not locate element|element not found|no element/i.test(message)) {
141
+ return message;
142
+ }
143
+ return [
144
+ message,
145
+ "This @ref may be stale after navigation, scrolling, or a DOM update. Run `agent_browser` with `{ \"args\": [\"snapshot\", \"-i\"] }` again and retry with a current ref, or use a stable `find` locator.",
146
+ ].join("\n");
147
+ }
148
+
138
149
  export function getAgentBrowserErrorText(options: {
139
150
  aborted: boolean;
140
151
  command?: string;
@@ -144,6 +155,7 @@ export function getAgentBrowserErrorText(options: {
144
155
  parseError?: string;
145
156
  plainTextInspection: boolean;
146
157
  spawnError?: Error;
158
+ staleRefArgs?: string[];
147
159
  stderr: string;
148
160
  timedOut?: boolean;
149
161
  timeoutMs?: number;
@@ -163,7 +175,8 @@ export function getAgentBrowserErrorText(options: {
163
175
  if (envelopeErrorText && isUpstreamIpcReadTimeoutMessage(envelopeErrorText)) {
164
176
  return buildUpstreamIpcReadTimeoutMessage();
165
177
  }
166
- return envelopeErrorText ?? (stderr.trim() || buildFailureFallback(options));
178
+ const fallback = envelopeErrorText ?? (stderr.trim() || buildFailureFallback(options));
179
+ return maybeAppendStaleRefHint(fallback, options.staleRefArgs ?? options.effectiveArgs);
167
180
  }
168
181
  if (exitCode !== 0) {
169
182
  return stderr.trim() || buildExitCodeFallback(options);
@@ -349,6 +349,9 @@ function splitShellWords(input: string): string[] | undefined {
349
349
  current += input[index];
350
350
  continue;
351
351
  }
352
+ if (char === "#" && current.length === 0) {
353
+ break;
354
+ }
352
355
  if (/\s/.test(char)) {
353
356
  if (current.length > 0) {
354
357
  words.push(current);
@@ -384,7 +387,7 @@ function formatNativeSkillContent(content: string): string {
384
387
  const heredocMatch = /^(.*?)\s+(<<-?)['"]?([A-Za-z_][A-Za-z0-9_]*)['"]?\s*$/.exec(rawArgsText);
385
388
  const argsText = heredocMatch?.[1] ?? rawArgsText;
386
389
  const args = splitShellWords(argsText);
387
- if (!args) {
390
+ if (!args || args.length === 0) {
388
391
  output.push(line);
389
392
  continue;
390
393
  }
@@ -419,7 +422,7 @@ function formatSkillsText(commandInfo: CommandInfo, data: unknown): string | und
419
422
  if (content) {
420
423
  const note = [
421
424
  "Pi native-tool note: upstream skill text was adapted for this native tool.",
422
- "Use args for CLI tokens and stdin only for batch or eval --stdin; do not pipe heredocs through bash unless the user explicitly asks for a bash workflow.",
425
+ "Use args for CLI tokens and stdin only for batch, eval --stdin, or auth save --password-stdin; do not pipe heredocs through bash unless the user explicitly asks for a bash workflow.",
423
426
  ].join("\n");
424
427
  return `${note}\n\n${redactModelFacingText(formatNativeSkillContent(content))}`;
425
428
  }
@@ -87,11 +87,29 @@ const LEGACY_BASH_ALLOW_PATTERNS = [
87
87
  const BROWSER_PROMPT_PATTERNS = [
88
88
  /\b(?:agent[_ -]?browser|browser automation|eval\s+--stdin|screenshot|snapshot|tab\s+list)\b/i,
89
89
  /\b(?:react\s+(?:tree|inspect|renders|suspense)|web\s+vitals|core\s+web\s+vitals|pushstate)\b/i,
90
+ /\b(?:live\s+docs?|online\s+research|research\s+(?:online|the\s+web)|search\s+(?:online|the\s+web)|web\s+research)\b/i,
90
91
  /\bbrowser\b.*\b(?:automation|click|fill|navigate|open|page|screenshot|site|snapshot|tab|url|visit|web(?:site| page)?)\b/i,
91
92
  /\b(?:browse|click|fill|login|navigate|open|visit)\b.*\b(?:https?:\/\/\S+|page|site|tab|url|web(?:site| page)?)\b/i,
92
93
  ];
93
94
  const INSPECTION_FLAGS = new Set(["--help", "-h", "--version", "-V"]);
94
- const SENSITIVE_VALUE_FLAGS = new Set(["--headers", "--proxy"]);
95
+ const SENSITIVE_VALUE_FLAGS = new Set(["--headers", "--password", "--proxy"]);
96
+ const GLOBAL_VALUE_FLAGS_ALLOWING_DASH_VALUE = new Set(["--args"]);
97
+ const GLOBAL_BOOLEAN_FLAGS_WITH_OPTIONAL_VALUES = new Set([
98
+ "--allow-file-access",
99
+ "--annotate",
100
+ "--auto-connect",
101
+ "--confirm-interactive",
102
+ "--content-boundaries",
103
+ "--debug",
104
+ "--headed",
105
+ "--ignore-https-errors",
106
+ "--json",
107
+ "--no-auto-dialog",
108
+ "--quiet",
109
+ "-q",
110
+ "--verbose",
111
+ "-v",
112
+ ]);
95
113
  const SENSITIVE_QUERY_PARAM_PATTERN =
96
114
  /^(?:access(?:_|-)?token|api(?:_|-)?key|auth|authorization|bearer|client(?:_|-)?secret|code|cookie|id(?:_|-)?token|key|pass(?:word)?|refresh(?:_|-)?token|secret|session(?:_|-)?id|sig(?:nature)?|token)$/i;
97
115
  const SENSITIVE_FIELD_NAME_PATTERN =
@@ -425,6 +443,15 @@ export function redactInvocationArgs(args: string[]): string[] {
425
443
  redacted.push(redactUrlToken(token));
426
444
  }
427
445
 
446
+ const commandStartIndex = findCommandStartIndex(args);
447
+ if (commandStartIndex !== undefined && args[commandStartIndex] === "set" && args[commandStartIndex + 1] === "credentials") {
448
+ for (const index of [commandStartIndex + 2, commandStartIndex + 3]) {
449
+ if (redacted[index] !== undefined) {
450
+ redacted[index] = "[REDACTED]";
451
+ }
452
+ }
453
+ }
454
+
428
455
  return redacted;
429
456
  }
430
457
 
@@ -654,8 +681,14 @@ export function validateToolArgs(args: string[]): string | undefined {
654
681
  return undefined;
655
682
  }
656
683
 
684
+ function isBooleanLiteral(token: string | undefined): boolean {
685
+ const normalized = token?.trim().toLowerCase();
686
+ return normalized === "true" || normalized === "false";
687
+ }
688
+
657
689
  function getInvalidValueFlagDetails(args: string[]): InvalidValueFlagDetails | undefined {
658
- for (const [index, token] of args.entries()) {
690
+ for (let index = 0; index < args.length; index += 1) {
691
+ const token = args[index];
659
692
  if (!token.startsWith("-")) {
660
693
  continue;
661
694
  }
@@ -682,7 +715,7 @@ function getInvalidValueFlagDetails(args: string[]): InvalidValueFlagDetails | u
682
715
  reason: "missing-value",
683
716
  };
684
717
  }
685
- if (receivedToken.startsWith("-")) {
718
+ if (receivedToken.startsWith("-") && !GLOBAL_VALUE_FLAGS_ALLOWING_DASH_VALUE.has(normalizedToken)) {
686
719
  return {
687
720
  flag: normalizedToken,
688
721
  index,
@@ -690,7 +723,7 @@ function getInvalidValueFlagDetails(args: string[]): InvalidValueFlagDetails | u
690
723
  receivedToken,
691
724
  };
692
725
  }
693
- continue;
726
+ index += 1;
694
727
  }
695
728
  return undefined;
696
729
  }
@@ -794,7 +827,7 @@ function getCompatibilityWorkaround(args: string[], commandInfo: CommandInfo): C
794
827
  if (isBooleanFlagEnabled(args, "--headed")) {
795
828
  return undefined;
796
829
  }
797
- if (hasFlagToken(args, "--cdp") || hasFlagToken(args, "--provider") || hasFlagToken(args, "-p") || hasFlagToken(args, "--auto-connect")) {
830
+ if (hasFlagToken(args, "--cdp") || hasFlagToken(args, "--provider") || hasFlagToken(args, "-p") || isBooleanFlagEnabled(args, "--auto-connect")) {
798
831
  return undefined;
799
832
  }
800
833
  const engine = getFlagValue(args, "--engine");
@@ -831,7 +864,7 @@ export function extractExplicitSessionName(args: string[]): string | undefined {
831
864
  export function getStartupScopedFlags(args: string[]): string[] {
832
865
  return LAUNCH_SCOPED_FLAG_DEFINITIONS
833
866
  .map((definition) => definition.flag)
834
- .filter((flag) => hasFlagToken(args, flag));
867
+ .filter((flag) => flag === "--auto-connect" ? isBooleanFlagEnabled(args, flag) : hasFlagToken(args, flag));
835
868
  }
836
869
 
837
870
  export function hasLaunchScopedTabCorrectionFlag(args: string[]): boolean {
@@ -1039,7 +1072,7 @@ export function parseCommandInfo(args: string[]): CommandInfo {
1039
1072
  };
1040
1073
  }
1041
1074
 
1042
- export function extractCommandTokens(args: string[]): string[] {
1075
+ function findCommandStartIndex(args: string[]): number | undefined {
1043
1076
  for (let index = 0; index < args.length; index += 1) {
1044
1077
  const token = args[index];
1045
1078
  if (token.startsWith("--session=")) {
@@ -1049,10 +1082,21 @@ export function extractCommandTokens(args: string[]): string[] {
1049
1082
  const normalizedToken = token.split("=", 1)[0] ?? token;
1050
1083
  if (GLOBAL_FLAGS_WITH_VALUES.has(normalizedToken) && !token.includes("=")) {
1051
1084
  index += 1;
1085
+ } else if (
1086
+ GLOBAL_BOOLEAN_FLAGS_WITH_OPTIONAL_VALUES.has(normalizedToken) &&
1087
+ !token.includes("=") &&
1088
+ isBooleanLiteral(args[index + 1])
1089
+ ) {
1090
+ index += 1;
1052
1091
  }
1053
1092
  continue;
1054
1093
  }
1055
- return args.slice(index);
1094
+ return index;
1056
1095
  }
1057
- return [];
1096
+ return undefined;
1097
+ }
1098
+
1099
+ export function extractCommandTokens(args: string[]): string[] {
1100
+ const commandStartIndex = findCommandStartIndex(args);
1101
+ return commandStartIndex === undefined ? [] : args.slice(commandStartIndex);
1058
1102
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-agent-browser-native",
3
- "version": "0.2.21",
3
+ "version": "0.2.23",
4
4
  "description": "pi extension that exposes agent-browser as a native tool for browser automation",
5
5
  "type": "module",
6
6
  "author": "Mitch Fultz (https://github.com/fitchmultz)",
@@ -48,20 +48,20 @@
48
48
  ]
49
49
  },
50
50
  "peerDependencies": {
51
- "@mariozechner/pi-ai": "*",
52
- "typebox": "*",
53
- "@mariozechner/pi-coding-agent": "*"
51
+ "@earendil-works/pi-ai": "*",
52
+ "@earendil-works/pi-coding-agent": "*",
53
+ "typebox": "*"
54
54
  },
55
55
  "devDependencies": {
56
- "@mariozechner/pi-ai": "^0.72.0",
57
- "@mariozechner/pi-coding-agent": "^0.72.0",
58
- "@types/node": "^25.6.0",
56
+ "@earendil-works/pi-ai": "^0.74.0",
57
+ "@earendil-works/pi-coding-agent": "^0.74.0",
58
+ "@types/node": "^25.6.1",
59
59
  "tsx": "^4.21.0",
60
- "typebox": "^1.1.37",
60
+ "typebox": "^1.1.38",
61
61
  "typescript": "^6.0.3"
62
62
  },
63
63
  "overrides": {
64
- "basic-ftp": "5.3.0"
64
+ "basic-ftp": "6.0.1"
65
65
  },
66
66
  "scripts": {
67
67
  "docs": "node ./scripts/project.mjs docs",
@@ -70,5 +70,5 @@
70
70
  "verify": "node ./scripts/project.mjs verify",
71
71
  "prepublishOnly": "npm run verify && npm pack --dry-run"
72
72
  },
73
- "packageManager": "npm@10.9.8"
73
+ "packageManager": "npm@11.14.0"
74
74
  }