npm - pi-agent-browser-native - Versions diffs - 0.2.22 → 0.2.23 - Mend

pi-agent-browser-native 0.2.22 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/CHANGELOG.md +11 -0
package/README.md +206 -191
package/docs/ARCHITECTURE.md +1 -1
package/docs/COMMAND_REFERENCE.md +6 -4
package/docs/TOOL_CONTRACT.md +6 -2
package/extensions/agent-browser/index.ts +113 -48
package/extensions/agent-browser/lib/playbook.ts +4 -4
package/extensions/agent-browser/lib/results/envelope.ts +14 -1
package/extensions/agent-browser/lib/results/presentation.ts +5 -2
package/extensions/agent-browser/lib/runtime.ts +53 -9
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,17 @@
 ## Unreleased
+## 0.2.23 - 2026-05-10
+### Fixed
+- added safe `auth save --password-stdin` support for native tool calls and redacted password stdin from model-visible content, tool details, upstream failure output, and preserved parse-failure spill files
+- improved session and launch-flag handling for agent workflows, including disabled `--auto-connect`, optional boolean flag values, dash-starting `--args` values, and stale `@ref` recovery guidance through pinned commands and user batch stdin
+- expanded sensitive argument redaction for password and credential command forms
+### Changed
+- rewrote the public README around outcome-first usage, fastest install paths, profile/auth workflow guidance, and release verification proof
+- clarified native-tool command guidance for password stdin, cookie/privacy handling, stable tab ids, and explicit session persistence limits
 ## 0.2.22 - 2026-05-07
 ### Compatibility

package/README.md CHANGED Viewed

@@ -1,61 +1,85 @@
 # pi-agent-browser-native
-Native `pi` integration for [`agent-browser`](https://agent-browser.dev/).
+A Pi extension that lets coding agents drive real browser sessions with a native `agent_browser` tool instead of brittle shell commands.
-## Status
+It is for Pi users who want agents to browse sites, inspect pages, click through flows, capture screenshots, use persistent profiles, and handle authenticated web apps without spending context on `agent-browser` CLI ceremony.
-Published pre-1.0 package.
+## What this looks like in Pi
-The native `agent_browser` tool, local verification workflow, package-content checks, and release checks are in place. Package install is the default path; checkout loading is for development and validation.
+You prompt the agent in plain English:
-## Goal
+```text
+Use the agent_browser tool to open https://react.dev and then take an interactive snapshot.
+```
-Expose `agent-browser` to `pi` as a native tool so agents can automate the browser without going through a bash-backed skill.
+The agent gets a native tool, not a bash workaround:
-## Product stance
+```json
+{ "args": ["open", "https://react.dev"] }
+{ "args": ["snapshot", "-i"] }
+```
-- **Not bundled**: users install `agent-browser` separately and keep it on `PATH`
-- **Latest-version only**: no backward-compatibility support or shims for older `agent-browser` versions
-- **Thin wrapper**: stay close to upstream `agent-browser` instead of re-implementing its CLI
-- **Agent-invoked first**: the main UX is the agent calling the tool directly, like `read` or `write`
-- **Global-install first**: package behavior matters more than repo-local development wiring
+The result is optimized for agent work:
-Upstream install/docs:
-- https://agent-browser.dev/
-- https://github.com/vercel-labs/agent-browser
+- compact page snapshots that lead with useful page content instead of chrome/sidebar noise
+- interactive `@eN` refs for follow-up clicks and form fills
+- screenshots and downloaded files surfaced as Pi artifacts
+- structured details for titles, URLs, saved files, sessions, and errors
+- spill files for oversized raw output instead of dumping pages into context
+- recovery hints when a tab, selector, stale `@ref`, or launch mode needs a different next step
+## Who this is for
-## Why this exists
+- **Pi users** who want browser automation available as a normal tool beside `read`, `write`, and `bash`.
+- **Coding agents** that need low-context browser workflows for docs, QA, research, dashboards, and web apps.
+- **Maintainers** who want a thin integration that tracks the current upstream [`agent-browser`](https://agent-browser.dev/) CLI without bundling or re-implementing it.
-A native `pi` integration can improve on the current skill by adding:
+## The problem
-- structured tool calls instead of shell strings
-- parsed results instead of bash stdout
-- compact model-facing snapshot shaping with full raw spill files for oversized pages
-- main-content-first snapshot previews so the model sees the important page region before unrelated chrome or sidebar noise
-- inline screenshots and artifacts
-- lightweight session convenience inside `pi`
-- a better base for serious browser automation
+`agent-browser` is powerful, but plain CLI use is awkward inside an agent harness:
-## Example use cases
+- shell strings are easy for agents to quote wrong
+- large page snapshots can waste model context
+- screenshots and downloads need artifact metadata, not just text paths
+- implicit browser sessions need predictable reuse and cleanup
+- profile/debug launches need a clear way to start fresh after public browsing
+- secrets and auth material must not be echoed into model-visible output
+- stale element refs need actionable recovery guidance, not generic failures
-- UI testing and exploratory QA
-- web research
-- driving web UIs for ChatGPT, Grok, Gemini, and Claude
-- authenticated browser sessions and persistent profiles
+`pi-agent-browser-native` keeps upstream `agent-browser` as the browser engine and adds the Pi-native wrapper behavior needed for reliable agent use.
-## Install and try
+## What it does
-The product direction is package-first. Prefer the package source for normal use; keep the local-checkout flow for development and pre-release validation.
+| Pain | Native wrapper capability | Proof surface |
+|---|---|---|
+| Agents build fragile shell commands | Exposes `agent_browser` with exact `args`, controlled `stdin`, and `sessionMode` fields | `extensions/agent-browser/index.ts`, [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md) |
+| Page snapshots are too large | Shows compact, main-content-first summaries and stores full raw output in spill files when needed | `test/agent-browser.presentation.test.ts` |
+| Screenshots/downloads get lost in text | Normalizes artifact paths and reports existence, size, cwd, session, and repair status | [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#download-screenshot-and-pdf-files) |
+| Profile restores and tab drift confuse agents | Tracks managed sessions, pins intended tabs, and re-selects target tabs after drift | generated tab-recovery notes below; `test/agent-browser.resume-state.test.ts` |
+| Auth/profile workflows can leak secrets | Supports `auth save --password-stdin` and redacts sensitive args, URLs, stdout/stderr, details, and parse-failure spills | `test/agent-browser.extension-validation.test.ts` |
+| Stale `@eN` refs fail mysteriously | Adds recovery guidance to rerun `snapshot -i` or use stable `find` locators | `test/agent-browser.results.test.ts` |
+| Direct binary help may be blocked in agent sessions | Publishes a repo-readable command reference and verifies it against the target upstream version | `npm run verify` |
-### Preferred package install
+## Fastest way to try it
-Install `agent-browser` separately, then install this package into `pi`:
+Install upstream `agent-browser` first and make sure it is on `PATH`:
+- https://agent-browser.dev/
+- https://github.com/vercel-labs/agent-browser
+Then install this Pi package:
 ```bash
 pi install npm:pi-agent-browser-native
 ```
-To try a published package without installing it permanently, isolate that temporary package source from any configured checkout or global install:
+Start Pi and ask for a browser action:
+```text
+Use the agent_browser tool to open https://example.com and then take an interactive snapshot.
+```
+For a one-off trial that does not touch your configured Pi extensions:
 ```bash
 pi --no-extensions -e npm:pi-agent-browser-native
@@ -67,127 +91,123 @@ For a specific published version:
 pi --no-extensions -e npm:pi-agent-browser-native@<version>
 ```
-### First-run doctor
-Run the package doctor before first use or when `agent_browser` is missing or duplicated:
-```bash
-pi-agent-browser-doctor
-# one-off without installing the package source permanently:
-npm exec --package pi-agent-browser-native -- pi-agent-browser-doctor
-# from a checkout:
-npm run doctor
-```
-The doctor is read-only. It checks that upstream `agent-browser` is on `PATH`, that `agent-browser --version` matches the wrapper's capability baseline, and that Pi settings do not point at multiple active `pi-agent-browser-native` sources. It does not run upstream `agent-browser doctor --fix` or edit Pi settings.
-If it reports duplicate sources, keep exactly one active source. For normal use, keep `pi install npm:pi-agent-browser-native` and remove checkout paths from Pi settings. For temporary package or checkout trials, use `pi --no-extensions -e npm:pi-agent-browser-native[@<version>]` or `pi --no-extensions -e /path/to/checkout` so configured sources are bypassed.
-### GitHub install
-For the source install path, prefer the repository URL:
+To install directly from source instead of npm:
 ```bash
 pi install https://github.com/fitchmultz/pi-agent-browser-native
 ```
-To try the GitHub source without installing it permanently, isolate that temporary source extension from your normal installed package set:
+For a temporary source trial, keep it isolated from your normal package sources:
 ```bash
 pi --no-extensions -e https://github.com/fitchmultz/pi-agent-browser-native
 ```
-This avoids duplicate `agent_browser` registrations when you already have `pi-agent-browser-native` installed globally.
-### Current practical local-checkout flows
-This repository's `package.json` is itself a publishable pi package manifest that points at `extensions/agent-browser/index.ts`. That file is the real extension entrypoint for both the checkout and the published package.
-Use two local-checkout modes intentionally:
-- **Quick isolated smoke test:** run the checkout explicitly with `-e` and disable extension discovery:
-  ```bash
-  pi --no-extensions -e /absolute/path/to/pi-agent-browser-native
-  ```
-  This bypasses Pi settings and any configured checkout/global package sources, so it avoids duplicate `agent_browser` registrations. After editing extension code, restart this `pi` process to validate the new source; do not use this mode as proof that configured-source `/reload` works.
+## First-run health check
-- **Configured-source lifecycle validation:** run `npm run verify -- lifecycle` for the opt-in automated tmux harness, or keep exactly one active source for this extension in Pi settings and launch plain `pi` for manual checks. Use this mode when validating `/reload`, full restart, and `/resume` behavior because Pi's reload flow operates on discovered/configured resources.
+Run the read-only doctor when installing, upgrading, or debugging missing/duplicated tools:
-The native tool exposed to the agent is named `agent_browser`.
-The primary session control parameter is `sessionMode`:
+```bash
+pi-agent-browser-doctor
+# one-off without permanent install:
+npm exec --package pi-agent-browser-native -- pi-agent-browser-doctor
+# from this checkout:
+npm run doctor
+```
-- `"auto"` (default) reuses the extension-managed `pi`-scoped session when possible
-- `"fresh"` switches that managed session to a fresh upstream launch so launch-scoped flags like `--profile`, `--session-name`, `--cdp`, `--state`, `--auto-connect`, `--init-script`, and `--enable` apply and later auto calls follow the new browser
+The doctor checks:
-## Agent quick start
+- upstream `agent-browser` exists on `PATH`
+- the installed upstream version matches this wrapper's command-reference baseline
+- Pi settings do not point at multiple active `pi-agent-browser-native` sources
-### Mental model
+It does **not** edit Pi settings and does **not** run upstream `agent-browser doctor --fix`.
-- `args` — exact CLI args after `agent-browser`
-- `stdin` — raw stdin only for `batch` and `eval --stdin` (other command/stdin combinations are rejected before `agent-browser` is launched)
-- `sessionMode`
-  - `"auto"` — default, reuse the extension-managed `pi`-scoped session
-  - `"fresh"` — switch that managed session to a new profile/debug launch
+## Common agent calls
-### Common call shapes
+You usually prompt the agent in natural language. These JSON snippets show the exact native tool shape the agent should use.
-Open a page, then take an interactive snapshot:
+Open a page and inspect it:
 ```json
 { "args": ["open", "https://example.com"] }
 { "args": ["snapshot", "-i"] }
 ```
-Click a ref, then re-snapshot after navigation or a major DOM change:
+Click a visible ref, then refresh refs after navigation or a DOM update:
 ```json
 { "args": ["click", "@e2"] }
 { "args": ["snapshot", "-i"] }
 ```
-Run a multi-step browser flow in one tool call:
+Run a multi-step flow in one tool call:
 ```json
 { "args": ["batch"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }
 ```
-Evaluate page JavaScript via stdin:
+Evaluate page JavaScript through stdin:
 ```json
 { "args": ["eval", "--stdin"], "stdin": "document.title" }
 ```
-Download a file from a known link/control directly:
+Save an auth profile without putting the password in `args`:
+```json
+{ "args": ["auth", "save", "demo", "--password-stdin"], "stdin": "<password>" }
+```
+Download a file from a known link or control:
 ```json
 { "args": ["download", "@e5", "/tmp/report.pdf"] }
 ```
-For dashboards that start an export asynchronously after a click or navigation, click first and then wait for the download. The wrapper reports `Download completed: /tmp/report.csv` and exposes upstream-reported `details.savedFilePath` plus `details.savedFile` for the `wait` result; with upstream `agent-browser 0.27.0`, confirm `details.artifacts[].exists` before relying on a requested `wait --download <path>` file being present on disk (tracked upstream at [vercel-labs/agent-browser#1300](https://github.com/vercel-labs/agent-browser/issues/1300)):
+For asynchronous exports, click first and then wait for the download:
 ```json
 { "args": ["click", "@export"] }
 { "args": ["wait", "--download", "/tmp/report.csv"] }
 ```
-Batch flows preserve the same saved-file metadata on the wait step:
+With upstream `agent-browser 0.27.0`, treat `details.savedFilePath` as upstream-reported metadata and confirm `details.artifacts[].exists` before relying on the requested `wait --download <path>` file being present on disk.
+Start a fresh profiled browser after the implicit public-browsing session already exists:
 ```json
-{ "args": ["batch"], "stdin": "[[\"click\",\"@export\"],[\"wait\",\"--download\",\"/tmp/report.csv\"]]" }
+{ "args": ["--profile", "Default", "open", "https://example.com/account"], "sessionMode": "fresh" }
 ```
-Start a fresh profiled launch after you already used the implicit session:
+After a successful unnamed fresh launch, later default `sessionMode: "auto"` calls follow that browser automatically.
+## Authenticated/profile workflows
+The wrapper does not clone profiles or hide what upstream Chrome profile you chose. Passing `--profile` is an explicit upstream `agent-browser` choice.
+Use these rules:
+- Use public/temp profiles for tests and examples.
+- Use `sessionMode: "fresh"` when switching from public browsing to `--profile`, `--session-name`, `--cdp`, `--state`, `--auto-connect`, `--init-script`, or `--enable`.
+- Use `--session` when you want to manage a live upstream session name yourself.
+- Do not treat `--session` as persisted auth or tab restore after `close`; use `--profile`, `--session-name`, or `--state` for persistence.
+- Prefer page actions and storage checks over cookie dumps. `cookies get` can expose real profile cookies.
+- Prefer `auth save --password-stdin` over putting passwords in `args`.
+Example explicit session plus profile launch:
 ```json
-{ "args": ["--profile", "Default", "open", "https://example.com/account"], "sessionMode": "fresh" }
+{
+  "args": ["--session", "auth-flow", "--profile", "Default", "open", "https://example.com/account"]
+}
 ```
-After a successful unnamed fresh launch, later `sessionMode: "auto"` calls follow that new browser automatically.
+## React, SPA, and first-navigation setup
-React and SPA tooling added upstream in `agent-browser` v0.27.0 is passed through as native tool calls. Launch React introspection with the DevTools hook before first navigation, then use the `react` commands; `vitals` and `pushstate` work as regular command tokens:
+React and SPA tooling from upstream `agent-browser` is passed through directly.
+Launch React introspection before first navigation:
 ```json
 { "args": ["open", "--enable", "react-devtools", "https://example.com"], "sessionMode": "fresh" }
@@ -196,11 +216,16 @@ React and SPA tooling added upstream in `agent-browser` v0.27.0 is passed throug
 { "args": ["react", "renders", "start"] }
 { "args": ["react", "renders", "stop"] }
 { "args": ["react", "suspense", "--only-dynamic"] }
-{ "args": ["vitals", "https://example.com", "--json"] }
+```
+Use SPA and Web Vitals helpers as normal command tokens:
+```json
 { "args": ["pushstate", "/dashboard"] }
+{ "args": ["vitals", "https://example.com", "--json"] }
 ```
-For first-navigation setup, launch a fresh blank page before staging routes, cookies, or scripts:
+For setup that must happen before first navigation, open a blank fresh page, stage routes/cookies/scripts, then navigate:
 ```json
 { "args": ["open"], "sessionMode": "fresh" }
@@ -209,68 +234,93 @@ For first-navigation setup, launch a fresh blank page before staging routes, coo
 { "args": ["navigate", "https://example.com"] }
 ```
-Name a new upstream session explicitly when you want to keep reusing it yourself:
+## Proof and verification
-```json
-{ "args": ["--session", "auth-flow", "open", "https://example.com"] }
+The local verification gate is:
+```bash
+npm run verify
 ```
-### First useful prompt in a fresh `pi` session
+It runs:
-```text
-Use the agent_browser tool to open https://react.dev and then take an interactive snapshot.
+- generated playbook/documentation drift checks
+- `tsc --noEmit`
+- the test suite
+- command-reference baseline checks
+- live command-reference verification against the targeted installed upstream `agent-browser`
+The opt-in real-upstream suite is separate because it drives a real browser installation:
+```bash
+npm run verify -- real-upstream
 ```
+For package release confidence, follow [`docs/RELEASE.md`](docs/RELEASE.md). The release gate is:
+```bash
+npm run doctor
+npm run verify -- release
+```
+`npm run verify -- release` includes the default verification gate plus packaged Pi smoke coverage. The package also has a `prepublishOnly` hook that runs default verification and `npm pack --dry-run` during `npm publish`.
+## How it works
+`pi-agent-browser-native` is intentionally thin:
+1. Pi loads `extensions/agent-browser/index.ts` from the package manifest.
+2. The extension registers one native tool named `agent_browser`.
+3. Tool calls are translated into upstream `agent-browser` CLI invocations with controlled args, stdin, environment, timeout, and session planning.
+4. Upstream JSON/plain-text output is parsed into model-friendly content and structured details.
+5. Screenshots, downloads, recordings, traces, profiles, and spill files are normalized as Pi-visible artifacts where possible.
+6. Generated playbook text in docs and tool metadata stays aligned with `extensions/agent-browser/lib/playbook.ts`.
+The upstream browser engine remains [`agent-browser`](https://agent-browser.dev/). This package does not bundle it and does not maintain compatibility shims for old upstream versions.
+## Current limits
+- Published pre-1.0 package.
+- Targets the current locally installed upstream `agent-browser` version only.
+- Does not bundle `agent-browser`; users install it separately.
+- Does not provide a human browser UI inside Pi; the primary UX is agent-invoked tool calls.
+- Real authenticated profile use is powerful but sensitive. Treat profile and cookie access as user-approved, task-specific behavior.
+- Wrapper tab/session recovery is best effort around observed upstream behavior, not a replacement for explicit profile/session design.
 ## Local development
-Do not track or rely on a repo-local `.pi/extensions/agent-browser.ts` autoload shim for this package. That creates an unnecessary second registration path.
+Install upstream `agent-browser`, then install dependencies:
-The published entrypoint lives at `extensions/agent-browser/index.ts` and is referenced directly from this repo's `package.json`.
+```bash
+npm install
+```
-Recommended local development setup:
-1. Install `agent-browser` separately via the upstream project.
-2. Run `npm install`.
-3. For a quick checkout-only smoke test, launch `pi` from this repository root with discovery disabled:
+Quick isolated checkout smoke test:
 ```bash
 pi --no-extensions -e .
 ```
-4. Prompt the agent to use `agent_browser`.
-5. For hot-reload or resume validation, run `npm run verify -- lifecycle` or configure exactly one active source for this extension in Pi settings, launch plain `pi`, and exercise `/reload` plus restart/`/resume`. Settings matter only in this configured-source mode; they are bypassed by `--no-extensions -e .`. See [`docs/RELEASE.md`](docs/RELEASE.md) for the automated harness behavior, cleanup, and transcript retention details.
+This bypasses Pi settings and configured extensions. After editing extension code, restart that Pi process to test the new checkout.
-Example prompt:
+Configured-source lifecycle validation:
-```text
-Use the agent_browser tool to open https://react.dev and then take an interactive snapshot.
+```bash
+npm run verify -- lifecycle
 ```
-For installed-package validation after a release, use exactly one active source. The canonical isolated validation sequence is:
+Use lifecycle validation when testing `/reload`, full restart, `/resume`, managed-session continuity, or persisted artifact behavior.
+Installed-package validation after publish:
 ```bash
 npm run verify -- package-pi
 pi --no-extensions -e npm:pi-agent-browser-native@<version>
 ```
-Only use plain `pi` for installed-package validation after disabling or removing the checkout source from Pi settings.
-Validated workflow examples:
-- open a page and snapshot it
-- click a link and confirm the destination title
-- use an explicit `--session` across multiple tool calls
-- use an explicit `--profile` and verify persisted browser storage across restarts
-- open `chat.com` or `chatgpt.com` headlessly with `--profile Default` without forcing `--headed` or `--auto-connect`
-- in configured-source lifecycle mode, verify `/reload` and full restart + `/resume` keep following the same implicit managed browser session
-- run `batch` with JSON via `stdin`
-- run `eval --stdin`
-- take a screenshot with inline attachment support and visible artifact metadata: artifact type, requested path, absolute path, existence, size, cwd, session, and repair/copy status when applicable
-- inspect upstream help/version through native tool calls like `{ "args": ["--help"] }` and `{ "args": ["--version"] }` via the tool's stateless plain-text inspection fallback
-- use `download <selector> <path>` for direct attachment/file-save workflows instead of trying to infer downloads from generic clicks or large eval dumps
-- for `.dogfood/...` or other dot-directory screenshot paths, rely on the wrapper's path normalization/repair contract; the visible result reports the requested path and absolute path rather than only an upstream temp path
-- use `click` plus `wait --download <path>` for asynchronous export flows, confirm `details.savedFilePath`/`details.savedFile` are present on the wait result or batch wait step, and check `details.artifacts[].exists` before relying on requested-path persistence
-- confirm oversized outputs show the actual spill file path directly in tool content, not just a details key name
-- inspect `details.artifactManifest` / `details.artifactRetentionSummary` during artifact-heavy flows to recover recent saved files, spill files, and visible eviction state after reload/resume
+## Generated native-tool playbook notes
+These sections are generated from `extensions/agent-browser/lib/playbook.ts`. Run `npm run docs -- playbook write` after changing the canonical playbook source.
 <!-- agent-browser-playbook:start inspection -->
 <!-- Generated from extensions/agent-browser/lib/playbook.ts. Run `npm run docs -- playbook write` to update. -->
@@ -282,14 +332,6 @@ Native inspection calls use the `agent_browser` tool shape, not shell-like direc
 These calls return plain text and stay stateless: the extension does not inject its implicit session and does not let inspection consume the managed-session slot needed for later profile, session, CDP, state, or auto-connect launches.
 <!-- agent-browser-playbook:end inspection -->
-Current cautions:
-- passing `--profile` is an explicit upstream choice; this extension does not add its own profile-cloning or isolation layer
-- launch-scoped flags like `--profile`, `--session-name`, `--cdp`, `--state`, `--auto-connect`, `--init-script`, and `--enable` are for the first command that launches a session; if the implicit session is already active, retry that call with `sessionMode: "fresh"` or provide an explicit `--session ...` for the new launch
-- implicit `piab-*` sessions are extension-managed convenience sessions; they stay alive across `/reload` and resumable session transitions so later default calls can keep following the active managed browser on `/reload` or `/resume`, close when the originating `pi` process quits, rely on the configured idle timeout only as an abnormal-exit backstop, store persisted-session large snapshot spill files under a private session-scoped artifact directory with a bounded per-session budget so `details.fullOutputPath` and metadata-only `details.artifactManifest` survive reload/resume without unbounded growth, and still clean up process-private temp spill artifacts on shutdown
-- `sessionMode: "fresh"` without an explicit `--session` rotates that extension-managed session to the new browser so later auto calls keep using it
-- for local Unix launches, the wrapper uses a short private socket directory under `/tmp` so extension-generated session names do not trip upstream Unix socket-path limits in longer cwd/session-name combinations
-- wrapper-spawned commands clamp `AGENT_BROWSER_DEFAULT_TIMEOUT` to 25 seconds and use a 28-second process watchdog so a single upstream CLI call does not cross the upstream 30-second IPC read-timeout/retry path; split intentionally long waits into shorter tool calls
-- for direct headless local Chrome launches to `chat.com`, `chatgpt.com`, and `chat.openai.com`, the extension injects a normal Chrome user agent when the caller did not explicitly provide `--user-agent`; this keeps the default headless workflow usable without forcing `--headed` or `--auto-connect`
 <!-- agent-browser-playbook:start wrapper-tab-recovery -->
 <!-- Generated from extensions/agent-browser/lib/playbook.ts. Run `npm run docs -- playbook write` to update. -->
 - After launch-scoped open/goto/navigate calls that can restore existing tabs (for example --profile, --session-name, or --state), agent_browser best-effort re-selects the tab whose URL matches the returned page when restored tabs steal focus during launch.
@@ -297,59 +339,32 @@ Current cautions:
 - After a successful command on a known target tab, agent_browser also best-effort restores that intended tab if a restored/background tab steals focus after the command completes.
 - If a known session target unexpectedly reports about:blank, agent_browser preserves the prior intended target, best-effort re-selects it when it still exists, and reports exact recovery guidance when it cannot be re-selected.
 <!-- agent-browser-playbook:end wrapper-tab-recovery -->
-- oversized snapshots and oversized generic outputs compact inline content and print the actual spill file path directly in the tool result when a spill file exists; recent spills and explicit saved artifacts are also summarized in `details.artifactManifest`, including `evicted` entries when retention budgets remove older persisted files
-- artifact-producing commands render direct readable artifact metadata in visible content and `details.artifacts`: `kind`/`artifactType`, `path`, `requestedPath`, `absolutePath`, `exists`, `sizeBytes`, `status`, `cwd`, `session`, and `tempPath` when the wrapper repaired an upstream temp fallback
-- if the caller explicitly passes `--json`, the visible text content is valid JSON; for `stream status`, the wrapper enriches data with `wsUrl` and `frameFormat`
-- `trace` and `profiler` share upstream tracing machinery; the wrapper blocks starts/stops that conflict with owner state it observed in the current Pi session, but the message says "wrapper believes" because upstream or external CLI calls can desynchronize that local state
-- explicit caller-provided `--session` values are treated as user-managed and are not auto-closed by the extension
-- explicit caller-provided `--user-agent` values win over the ChatGPT/OpenAI compatibility workaround
-- tool progress/details redact sensitive invocation values such as `--headers`, proxy credentials, and auth-bearing URL parameters before echoing them back into Pi
-### Switching from public browsing to a fresh profile/debug launch
-A common agent workflow is:
-1. browse a public page with the default implicit session
-2. then switch to a fresh authenticated/profile/debug launch
-Use `sessionMode: "fresh"` for that transition instead of relying on the implicit session:
-```json
-{
-  "args": ["--profile", "Default", "open", "https://example.com/account"],
-  "sessionMode": "fresh"
-}
-```
-After that call succeeds, later default `sessionMode: "auto"` calls continue in the new fresh browser.
-If you want to name the new upstream session yourself, pass an explicit session instead:
-```json
-{
-  "args": ["--session", "auth-flow", "--profile", "Default", "open", "https://example.com/account"]
-}
-```
-## Docs
+## Project map
-- [`docs/REQUIREMENTS.md`](docs/REQUIREMENTS.md) — product requirements and constraints
-- [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) — current architecture decision
-- [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md) — proposed v1 tool shape
-- [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md) — local repo-readable command reference for the blocked direct-binary path
-- [`docs/RELEASE.md`](docs/RELEASE.md) — maintainer release and package verification workflow
+| Path | Purpose |
+|---|---|
+| `extensions/agent-browser/index.ts` | Pi extension entrypoint and native tool wrapper |
+| `extensions/agent-browser/lib/runtime.ts` | Args, session planning, redaction, process, and runtime helpers |
+| `extensions/agent-browser/lib/results/` | Model-facing result rendering and error guidance |
+| `extensions/agent-browser/lib/playbook.ts` | Canonical generated agent/browser guidance |
+| `docs/COMMAND_REFERENCE.md` | Repo-readable native command reference |
+| `docs/TOOL_CONTRACT.md` | Tool parameters, result shape, and behavior contract |
+| `docs/ARCHITECTURE.md` | Design decisions and implementation structure |
+| `docs/REQUIREMENTS.md` | Product requirements and constraints |
+| `docs/RELEASE.md` | Release, package, and lifecycle verification workflow |
+| `test/` | Wrapper, runtime, presentation, lifecycle, and package tests |
-## Documentation rule
+## More docs
-When requirements change in chat:
+- [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md) — full native command reference and upstream capability baseline
+- [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md) — exact tool contract
+- [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) — how the wrapper is designed
+- [`docs/REQUIREMENTS.md`](docs/REQUIREMENTS.md) — product constraints and non-goals
+- [`docs/RELEASE.md`](docs/RELEASE.md) — maintainer release workflow
-1. update `docs/REQUIREMENTS.md`
-2. update the affected design docs
-3. update this README if user-facing expectations changed
+## Next action
-When the upstream `agent-browser` binary changes:
+If you are a user, install the package and ask Pi to open a public page with `agent_browser`.
-1. re-check the upstream command/help surface
-2. update `docs/COMMAND_REFERENCE.md`
-3. update tool guidance, README, and release docs if behavior or recommended usage changed
-4. verify the blocked direct-binary path still has an equally usable local extension-side documentation path
+If you are evaluating the implementation, read [`extensions/agent-browser/index.ts`](extensions/agent-browser/index.ts), then run `npm run verify`.

package/docs/ARCHITECTURE.md CHANGED Viewed

@@ -31,7 +31,7 @@ The extension should:
 - resolve `agent-browser` from `PATH`
 - invoke it directly, not through a shell
 - inject `--json`
-- support optional stdin only for `eval --stdin` and `batch`, rejecting other command/stdin combinations before launch
+- support optional stdin only for `eval --stdin`, `batch`, and `auth save --password-stdin`, rejecting other command/stdin combinations before launch
 ### Agent-first UX

package/docs/COMMAND_REFERENCE.md CHANGED Viewed

@@ -34,7 +34,7 @@ Tool parameters:
 ```
 - `args`: exact `agent-browser` CLI tokens after the binary name.
-- `stdin`: only for `batch` and `eval --stdin`; other command/stdin combinations are rejected before `agent-browser` is launched.
+- `stdin`: only for `batch`, `eval --stdin`, and `auth save --password-stdin`; other command/stdin combinations are rejected before `agent-browser` is launched.
 - `sessionMode`:
   - `"auto"` reuses the extension-managed session when possible.
   - `"fresh"` rotates that managed session to a fresh upstream launch so launch-scoped flags like `--profile`, `--session-name`, `--cdp`, `--state`, `--auto-connect`, `--init-script`, or `--enable` apply.
@@ -220,7 +220,7 @@ The tables below intentionally list more than the recommended workflow. Rare com
 ### Built-in skills
-Native-tool note: upstream skills are written for the standalone `agent-browser` CLI and may show bash/heredoc examples. In pi, convert those examples to `agent_browser` calls: pass CLI tokens in `args`, and pass heredoc/stdin bodies through the tool `stdin` field for `batch` or `eval --stdin`.
+Native-tool note: upstream skills are written for the standalone `agent-browser` CLI and may show bash/heredoc examples. In pi, convert those examples to `agent_browser` calls: pass CLI tokens in `args`, and pass heredoc/stdin bodies through the tool `stdin` field for `batch`, `eval --stdin`, or `auth save --password-stdin`.
 | Command | Purpose |
 | --- | --- |
@@ -300,9 +300,11 @@ These calls return plain text and stay stateless: the extension does not inject
 | `cookies [get|set|clear]` | Manage cookies. `set` supports `--url`, `--domain`, `--path`, `--httpOnly`, `--secure`, `--sameSite`, `--expires`, and `--curl <file>` for JSON, cURL, or bare Cookie-header bulk imports. |
 | `storage <local|session>` | Manage web storage. |
+Privacy note: `cookies get` can expose real profile cookies. Do not run it against `--profile Default` or other authenticated profiles unless the user explicitly needs cookie inspection; prefer task-specific page actions and storage checks.
 ### Tabs
-Stable tab ids look like `t1`, `t2`, and `t3`. Optional user labels such as `docs` or `app` are interchangeable with ids wherever a tab reference is accepted.
+Stable tab ids look like `t1`, `t2`, and `t3`. Optional user labels such as `docs` or `app` are interchangeable with ids wherever a tab reference is accepted. Upstream help may refer to numeric tab positions, but this wrapper guidance uses stable `t<N>` ids because positional integers are not accepted by current upstream `agent-browser`.
 | Command | Purpose |
 | --- | --- |
@@ -377,7 +379,7 @@ When these diagnostic commands are invoked through the native `agent_browser` to
 | Command | Purpose |
 | --- | --- |
 | `batch [--bail] ["cmd" ...]` | Execute multiple commands sequentially from args or stdin. |
-| `auth save <name> [opts]` | Save an auth profile with options such as `--url`, `--username`, `--password`, or `--password-stdin`. |
+| `auth save <name> [opts]` | Save an auth profile with options such as `--url`, `--username`, `--password`, or `--password-stdin`. Prefer `--password-stdin` with the tool `stdin` field; avoid putting passwords in `args`. |
 | `auth login <name>` | Login using saved credentials. |
 | `auth list` | List saved auth profiles. |
 | `auth show <name>` | Show auth profile metadata. |

package/docs/TOOL_CONTRACT.md CHANGED Viewed

@@ -78,7 +78,7 @@ Examples:
 - type: `string`
 - optional
-- raw stdin for `eval --stdin` and `batch`
+- raw stdin for `eval --stdin`, `batch`, and `auth save --password-stdin`
 - rejected before launch for any other command/stdin combination, including commands such as `click`, `snapshot`, or `open`
 Examples:
@@ -91,6 +91,10 @@ Examples:
 { "args": ["batch"], "stdin": "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }
 ```
+```json
+{ "args": ["auth", "save", "my-login", "--password-stdin"], "stdin": "password from the user-approved secret source" }
+```
 ### `sessionMode`
 - type: `"auto" | "fresh"`
@@ -222,7 +226,7 @@ If `agent-browser` is not on `PATH`, fail with a message that:
 - reconstruct the current extension-managed session and latest `artifactManifest` from persisted tool details on resume/reload so later default calls keep following the active managed browser and can continue reporting artifact retention state
 - when an unnamed `sessionMode: "fresh"` launch succeeds, make it the new extension-managed session so later default calls keep using it
 - if that unnamed fresh launch replaced an already-active managed session, best-effort close the old managed session after the switch succeeds
-- treat explicit caller-provided `--session` choices as user-managed
+- treat explicit caller-provided `--session` choices as user-managed; `--session` isolates a live browser session but is not a persisted tab/auth restore mechanism after `close`, so use `--profile`, `--session-name`, or `--state` when persisted auth/tab state is required
 - pass explicit `--profile` straight through to upstream `agent-browser`; no profile-cloning or isolation layer is added in v1
 <!-- agent-browser-playbook:start wrapper-tab-recovery -->
 <!-- Generated from extensions/agent-browser/lib/playbook.ts. Run `npm run docs -- playbook write` to update. -->

package/extensions/agent-browser/index.ts CHANGED Viewed

@@ -73,7 +73,7 @@ const AGENT_BROWSER_PARAMS = Type.Object({
 		description: "Exact agent-browser CLI arguments, excluding the binary name and any shell operators.",
 		minItems: 1,
 	}),
-	stdin: Type.Optional(Type.String({ description: "Optional raw stdin content; only supported for batch and eval --stdin." })),
+	stdin: Type.Optional(Type.String({ description: "Optional raw stdin content; only supported for batch, eval --stdin, and auth save --password-stdin." })),
 	sessionMode: Type.Optional(
 		StringEnum(["auto", "fresh"] as const, {
 			description:
@@ -936,6 +936,45 @@ function restoreArtifactManifestFromBranch(branch: unknown[]): SessionArtifactMa
 	return restoredManifest;
 }
+function isPasswordStdinAuthSave(options: { command?: string; commandTokens: string[] }): boolean {
+	return options.command === "auth" && options.commandTokens[1] === "save" && options.commandTokens.includes("--password-stdin");
+}
+function getExactSensitiveStdinValues(options: { command?: string; commandTokens: string[]; stdin?: string }): string[] {
+	if (options.stdin === undefined || !isPasswordStdinAuthSave(options)) {
+		return [];
+	}
+	return [...new Set([options.stdin, options.stdin.trimEnd(), options.stdin.trim()].filter((value) => value.length > 0))];
+}
+function redactExactSensitiveText(text: string, sensitiveValues: string[]): string {
+	let redacted = text;
+	for (const value of sensitiveValues) {
+		redacted = redacted.split(value).join("[REDACTED]");
+	}
+	return redacted;
+}
+function redactExactSensitiveValue(value: unknown, sensitiveValues: string[]): unknown {
+	if (sensitiveValues.length === 0) {
+		return value;
+	}
+	if (typeof value === "string") {
+		return redactExactSensitiveText(value, sensitiveValues);
+	}
+	if (Array.isArray(value)) {
+		return value.map((item) => redactExactSensitiveValue(item, sensitiveValues));
+	}
+	if (!isRecord(value)) {
+		return value;
+	}
+	return Object.fromEntries(Object.entries(value).map(([key, entryValue]) => [key, redactExactSensitiveValue(entryValue, sensitiveValues)]));
+}
+function redactToolDetails(details: Record<string, unknown>, sensitiveValues: string[]): Record<string, unknown> {
+	return redactSensitiveValue(redactExactSensitiveValue(details, sensitiveValues)) as Record<string, unknown>;
+}
 function validateStdinCommandContract(options: { command?: string; commandTokens: string[]; stdin?: string }): string | undefined {
 	if (options.stdin === undefined) {
 		return undefined;
@@ -946,8 +985,11 @@ function validateStdinCommandContract(options: { command?: string; commandTokens
 	if (options.command === "eval" && options.commandTokens.includes("--stdin")) {
 		return undefined;
 	}
+	if (isPasswordStdinAuthSave(options)) {
+		return undefined;
+	}
 	const commandLabel = options.command ? `\`${options.command}\`` : "the requested command";
-	return `agent_browser stdin is only supported for \`batch\` and \`eval --stdin\`; remove stdin from ${commandLabel} or use one of those command forms.`;
+	return `agent_browser stdin is only supported for \`batch\`, \`eval --stdin\`, and \`auth save --password-stdin\`; remove stdin from ${commandLabel} or use one of those command forms.`;
 }
 function supportsPinnedStdinCommand(options: { command?: string; commandTokens: string[]; stdin?: string }): boolean {
@@ -1029,6 +1071,17 @@ function parseUserBatchStdin(stdin: string | undefined): { error?: string; steps
 	}
 }
+function getStaleRefArgs(commandTokens: string[], stdin?: string): string[] {
+	if (commandTokens[0] !== "batch" || stdin === undefined) {
+		return commandTokens;
+	}
+	const parsed = parseUserBatchStdin(stdin);
+	if (parsed.error || parsed.steps === undefined) {
+		return commandTokens;
+	}
+	return parsed.steps.flatMap((step) => step);
+}
 function buildPinnedBatchPlan(options: {
 	command?: string;
 	commandTokens: string[];
@@ -1293,6 +1346,7 @@ function getPersistentSessionArtifactStore(ctx: {
 async function preserveParseFailureOutput(options: {
 	artifactManifest?: SessionArtifactManifest;
+	exactSensitiveValues?: string[];
 	persistentArtifactStore?: PersistentSessionArtifactStore;
 	stdoutSpillPath?: string;
 }): Promise<{
@@ -1306,7 +1360,7 @@ async function preserveParseFailureOutput(options: {
 	}
 	try {
-		const rawOutput = await readFile(options.stdoutSpillPath);
+		const rawOutput = redactExactSensitiveText(await readFile(options.stdoutSpillPath, "utf8"), options.exactSensitiveValues ?? []);
 		const nowMs = Date.now();
 		let evictedArtifacts: PersistentSessionArtifactEviction[] = [];
 		let fullOutputPath: string;
@@ -1546,6 +1600,11 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 				}
 				const commandTokens = extractCommandTokens(preparedArgs.args);
+				const exactSensitiveValues = getExactSensitiveStdinValues({
+					command: executionPlan.commandInfo.command,
+					commandTokens,
+					stdin: params.stdin,
+				});
 				const traceOwnerGuardMessage = getTraceOwnerGuardMessage({
 					command: executionPlan.commandInfo.command,
 					sessionName: executionPlan.sessionName,
@@ -1755,9 +1814,13 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 					presentationEnvelope = repairedBatchScreenshots.envelope;
 					const screenshotArtifactRequest = repairedScreenshot.request;
 					const batchScreenshotArtifactRequests = repairedBatchScreenshots.requests;
+					if (presentationEnvelope && exactSensitiveValues.length > 0) {
+						presentationEnvelope = redactExactSensitiveValue(presentationEnvelope, exactSensitiveValues) as AgentBrowserEnvelope;
+					}
 					const parseFailureOutput = parseError
 						? await preserveParseFailureOutput({
 								artifactManifest,
+								exactSensitiveValues,
 								persistentArtifactStore,
 								stdoutSpillPath: processResult.stdoutSpillPath,
 							})
@@ -1934,6 +1997,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 						exitCode: processResult.exitCode,
 						parseError,
 						plainTextInspection,
+						staleRefArgs: getStaleRefArgs(commandTokens, params.stdin),
 						spawnError: processResult.spawnError,
 						stderr: processResult.stderr,
 						timedOut: processResult.timedOut,
@@ -2009,54 +2073,55 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 							contentWithSessionWarnings.unshift({ type: "text", text: warningText });
 						}
 					}
-					const redactedContent = contentWithSessionWarnings.map((item) =>
-						item.type === "text" && !(userRequestedJson && !plainTextInspection) ? { ...item, text: redactSensitiveText(item.text) } : item,
-					);
+					const redactedContent = contentWithSessionWarnings.map((item) => {
+						if (item.type !== "text") return item;
+						const exactRedactedText = redactExactSensitiveText(item.text, exactSensitiveValues);
+						return userRequestedJson && !plainTextInspection
+							? { ...item, text: exactRedactedText }
+							: { ...item, text: redactSensitiveText(exactRedactedText) };
+					});
+					const details = {
+						args: redactedArgs,
+						artifactManifest: presentation.artifactManifest,
+						artifactRetentionSummary: presentation.artifactRetentionSummary,
+						artifacts: presentation.artifacts,
+						batchFailure: presentation.batchFailure,
+						batchSteps: presentation.batchSteps,
+						command: executionPlan.commandInfo.command,
+						compatibilityWorkaround,
+						subcommand: executionPlan.commandInfo.subcommand,
+						data: presentation.data,
+						error: plainTextInspection ? undefined : presentationEnvelope?.error,
+						inspection: plainTextInspection || undefined,
+						navigationSummary,
+						aboutBlankSessionMismatch,
+						openResultTabCorrection,
+						effectiveArgs: redactedProcessArgs,
+						exitCode: processResult.exitCode,
+						fullOutputPath: parseFailureOutput.fullOutputPath ?? presentation.fullOutputPath,
+						fullOutputPaths: presentation.fullOutputPaths,
+						fullOutputUnavailable: parseFailureOutput.fullOutputUnavailable,
+						imagePath: presentation.imagePath,
+						imagePaths: presentation.imagePaths,
+						parseError: plainTextInspection ? undefined : parseError,
+						savedFile: presentation.savedFile,
+						savedFilePath: presentation.savedFilePath,
+						sessionMode,
+						sessionTabCorrection,
+						sessionTabTarget: currentSessionTabTarget,
+						...buildSessionDetailFields(executionPlan.sessionName, executionPlan.usedImplicitSession),
+						sessionRecoveryHint: redactedRecoveryHint,
+						startupScopedFlags: executionPlan.startupScopedFlags,
+						stderr: processResult.stderr,
+						stdout: plainTextInspection ? inspectionText ?? "" : parseSucceeded ? undefined : processResult.stdout,
+						summary: presentation.summary,
+						timedOut: processResult.timedOut || undefined,
+						timeoutMs: processResult.timeoutMs,
+					};
 					return {
 						content: redactedContent,
-						details: {
-							args: redactedArgs,
-							artifactManifest: redactSensitiveValue(presentation.artifactManifest),
-							artifactRetentionSummary: presentation.artifactRetentionSummary,
-							artifacts: redactSensitiveValue(presentation.artifacts),
-							batchFailure: redactSensitiveValue(presentation.batchFailure),
-							batchSteps: redactSensitiveValue(presentation.batchSteps),
-							command: executionPlan.commandInfo.command,
-							compatibilityWorkaround,
-							subcommand: executionPlan.commandInfo.subcommand,
-							data: redactSensitiveValue(presentation.data),
-							error: plainTextInspection ? undefined : redactSensitiveValue(presentationEnvelope?.error),
-							inspection: plainTextInspection || undefined,
-							navigationSummary: redactSensitiveValue(navigationSummary),
-							aboutBlankSessionMismatch: redactSensitiveValue(aboutBlankSessionMismatch),
-							openResultTabCorrection: redactSensitiveValue(openResultTabCorrection),
-							effectiveArgs: redactedProcessArgs,
-							exitCode: processResult.exitCode,
-							fullOutputPath: parseFailureOutput.fullOutputPath ?? presentation.fullOutputPath,
-							fullOutputPaths: presentation.fullOutputPaths,
-							fullOutputUnavailable: parseFailureOutput.fullOutputUnavailable,
-							imagePath: presentation.imagePath,
-							imagePaths: presentation.imagePaths,
-							parseError: plainTextInspection ? undefined : parseError,
-							savedFile: redactSensitiveValue(presentation.savedFile),
-							savedFilePath: presentation.savedFilePath ? redactSensitiveText(presentation.savedFilePath) : undefined,
-							sessionMode,
-							sessionTabCorrection: redactSensitiveValue(sessionTabCorrection),
-							sessionTabTarget: redactSensitiveValue(currentSessionTabTarget),
-							...buildSessionDetailFields(executionPlan.sessionName, executionPlan.usedImplicitSession),
-							sessionRecoveryHint: redactedRecoveryHint,
-							startupScopedFlags: executionPlan.startupScopedFlags,
-							stderr: processResult.stderr ? redactSensitiveText(processResult.stderr) : undefined,
-							stdout: plainTextInspection
-								? redactSensitiveText(inspectionText ?? "")
-								: parseSucceeded
-									? undefined
-									: redactSensitiveText(processResult.stdout),
-							summary: redactSensitiveText(presentation.summary),
-							timedOut: processResult.timedOut || undefined,
-							timeoutMs: processResult.timeoutMs,
-						},
+						details: redactToolDetails(details, exactSensitiveValues),
 						isError: !succeeded,
 					};
 				} finally {

package/extensions/agent-browser/lib/playbook.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  * Responsibilities: Define stable guidance bullets, native tool-call examples, and wrapper-behavior notes without importing runtime/browser process code.
  * Scope: Agent-facing documentation and prompt-guidance text only; command execution and wrapper state behavior live in runtime modules.
  * Usage: Imported by the extension entrypoint for promptGuidelines and by the documentation drift-check script for generated Markdown blocks.
- * Invariants/Assumptions: The native pi tool receives args after the agent-browser binary, stdin is only for batch/eval --stdin, and wrapper behavior documented here must match implemented behavior.
+ * Invariants/Assumptions: The native pi tool receives args after the agent-browser binary, stdin is only for batch/eval --stdin/auth save --password-stdin, and wrapper behavior documented here must match implemented behavior.
  */
 export const PROJECT_RULE_PROMPT =
@@ -14,9 +14,9 @@ export const TOOL_PROMPT_GUIDELINES_PREFIX = [
 ] as const;
 export const QUICK_START_GUIDELINES = [
-	"Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch and eval --stdin, and other command/stdin combinations are rejected before launch; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new --profile, --session-name, --cdp, --state, --auto-connect, --init-script, or --enable state.",
+	"Quick start mental model: args are the exact agent-browser CLI args after the binary; stdin is only for batch, eval --stdin, and auth save --password-stdin, and other command/stdin combinations are rejected before launch; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new --profile, --session-name, --cdp, --state, --auto-connect, --init-script, or --enable state.",
 	"Common first calls: { args: [\"open\", \"https://example.com\"] } then { args: [\"snapshot\", \"-i\"] }; after navigation, use { args: [\"click\", \"@e2\"] } then { args: [\"snapshot\", \"-i\"] }.",
-	"Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }, and { args: [\"open\", \"--enable\", \"react-devtools\", \"https://example.com\"], sessionMode: \"fresh\" }.",
+	"Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, { args: [\"auth\", \"save\", \"name\", \"--password-stdin\"], stdin: \"<password from user-approved secret source>\" }, { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }, and { args: [\"open\", \"--enable\", \"react-devtools\", \"https://example.com\"], sessionMode: \"fresh\" }.",
 	"High-value command reference: download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation.",
 	"For artifact-producing commands, read the visible artifact block for requested path, absolute path, existence, size, type, cwd, and session; details.artifacts contains the same machine-readable metadata. For annotated screenshots inside batch, put --annotate in top-level args (for example { args: [\"--annotate\", \"batch\"], stdin: \"[[\\\"screenshot\\\",\\\"/tmp/page.png\\\"]]\" }) rather than inside the screenshot step.",
 ] as const;
@@ -47,7 +47,7 @@ export const TOOL_PROMPT_GUIDELINES_SUFFIX = [
 	"Prefer agent_browser over bash for opening sites, reading docs on the web, clicking, filling, screenshots, eval, and batch workflows.",
 	"Do not fall back to osascript, AppleScript, or generic browser-driving bash commands when agent_browser can do the job.",
 	"Pass exact agent-browser CLI arguments in args, excluding the binary name.",
-	"Use stdin only for eval --stdin and batch instead of shell heredocs; other command/stdin combinations are rejected before launch.",
+	"Use stdin only for eval --stdin, batch, and auth save --password-stdin instead of shell heredocs or password args; other command/stdin combinations are rejected before launch.",
 	"Let the extension-managed session handle the common path unless you explicitly need a fresh launch for upstream flags like --profile, --session-name, --cdp, --state, --auto-connect, --init-script, or --enable.",
 	"Use sessionMode=fresh when switching from an existing implicit session to a new profile/debug/init-script launch without inventing a fixed explicit session name; later auto calls will follow that new session.",
 ] as const;

package/extensions/agent-browser/lib/results/envelope.ts CHANGED Viewed

@@ -135,6 +135,17 @@ function buildUpstreamIpcReadTimeoutMessage(): string {
 	].join(" ");
 }
+function maybeAppendStaleRefHint(message: string, args?: string[]): string {
+	const usedRef = args?.some((arg) => /^@e\d+\b/.test(arg)) ?? false;
+	if (!usedRef || !/could not locate element|element not found|no element/i.test(message)) {
+		return message;
+	}
+	return [
+		message,
+		"This @ref may be stale after navigation, scrolling, or a DOM update. Run `agent_browser` with `{ \"args\": [\"snapshot\", \"-i\"] }` again and retry with a current ref, or use a stable `find` locator.",
+	].join("\n");
+}
 export function getAgentBrowserErrorText(options: {
 	aborted: boolean;
 	command?: string;
@@ -144,6 +155,7 @@ export function getAgentBrowserErrorText(options: {
 	parseError?: string;
 	plainTextInspection: boolean;
 	spawnError?: Error;
+	staleRefArgs?: string[];
 	stderr: string;
 	timedOut?: boolean;
 	timeoutMs?: number;
@@ -163,7 +175,8 @@ export function getAgentBrowserErrorText(options: {
 		if (envelopeErrorText && isUpstreamIpcReadTimeoutMessage(envelopeErrorText)) {
 			return buildUpstreamIpcReadTimeoutMessage();
 		}
-		return envelopeErrorText ?? (stderr.trim() || buildFailureFallback(options));
+		const fallback = envelopeErrorText ?? (stderr.trim() || buildFailureFallback(options));
+		return maybeAppendStaleRefHint(fallback, options.staleRefArgs ?? options.effectiveArgs);
 	}
 	if (exitCode !== 0) {
 		return stderr.trim() || buildExitCodeFallback(options);

package/extensions/agent-browser/lib/results/presentation.ts CHANGED Viewed

@@ -349,6 +349,9 @@ function splitShellWords(input: string): string[] | undefined {
 			current += input[index];
 			continue;
 		}
+		if (char === "#" && current.length === 0) {
+			break;
+		}
 		if (/\s/.test(char)) {
 			if (current.length > 0) {
 				words.push(current);
@@ -384,7 +387,7 @@ function formatNativeSkillContent(content: string): string {
 		const heredocMatch = /^(.*?)\s+(<<-?)['"]?([A-Za-z_][A-Za-z0-9_]*)['"]?\s*$/.exec(rawArgsText);
 		const argsText = heredocMatch?.[1] ?? rawArgsText;
 		const args = splitShellWords(argsText);
-		if (!args) {
+		if (!args || args.length === 0) {
 			output.push(line);
 			continue;
 		}
@@ -419,7 +422,7 @@ function formatSkillsText(commandInfo: CommandInfo, data: unknown): string | und
 	if (content) {
 		const note = [
 			"Pi native-tool note: upstream skill text was adapted for this native tool.",
-			"Use args for CLI tokens and stdin only for batch or eval --stdin; do not pipe heredocs through bash unless the user explicitly asks for a bash workflow.",
+			"Use args for CLI tokens and stdin only for batch, eval --stdin, or auth save --password-stdin; do not pipe heredocs through bash unless the user explicitly asks for a bash workflow.",
 		].join("\n");
 		return `${note}\n\n${redactModelFacingText(formatNativeSkillContent(content))}`;
 	}

package/extensions/agent-browser/lib/runtime.ts CHANGED Viewed

@@ -87,11 +87,29 @@ const LEGACY_BASH_ALLOW_PATTERNS = [
 const BROWSER_PROMPT_PATTERNS = [
 	/\b(?:agent[_ -]?browser|browser automation|eval\s+--stdin|screenshot|snapshot|tab\s+list)\b/i,
 	/\b(?:react\s+(?:tree|inspect|renders|suspense)|web\s+vitals|core\s+web\s+vitals|pushstate)\b/i,
+	/\b(?:live\s+docs?|online\s+research|research\s+(?:online|the\s+web)|search\s+(?:online|the\s+web)|web\s+research)\b/i,
 	/\bbrowser\b.*\b(?:automation|click|fill|navigate|open|page|screenshot|site|snapshot|tab|url|visit|web(?:site| page)?)\b/i,
 	/\b(?:browse|click|fill|login|navigate|open|visit)\b.*\b(?:https?:\/\/\S+|page|site|tab|url|web(?:site| page)?)\b/i,
 ];
 const INSPECTION_FLAGS = new Set(["--help", "-h", "--version", "-V"]);
-const SENSITIVE_VALUE_FLAGS = new Set(["--headers", "--proxy"]);
+const SENSITIVE_VALUE_FLAGS = new Set(["--headers", "--password", "--proxy"]);
+const GLOBAL_VALUE_FLAGS_ALLOWING_DASH_VALUE = new Set(["--args"]);
+const GLOBAL_BOOLEAN_FLAGS_WITH_OPTIONAL_VALUES = new Set([
+	"--allow-file-access",
+	"--annotate",
+	"--auto-connect",
+	"--confirm-interactive",
+	"--content-boundaries",
+	"--debug",
+	"--headed",
+	"--ignore-https-errors",
+	"--json",
+	"--no-auto-dialog",
+	"--quiet",
+	"-q",
+	"--verbose",
+	"-v",
+]);
 const SENSITIVE_QUERY_PARAM_PATTERN =
 	/^(?:access(?:_|-)?token|api(?:_|-)?key|auth|authorization|bearer|client(?:_|-)?secret|code|cookie|id(?:_|-)?token|key|pass(?:word)?|refresh(?:_|-)?token|secret|session(?:_|-)?id|sig(?:nature)?|token)$/i;
 const SENSITIVE_FIELD_NAME_PATTERN =
@@ -425,6 +443,15 @@ export function redactInvocationArgs(args: string[]): string[] {
 		redacted.push(redactUrlToken(token));
 	}
+	const commandStartIndex = findCommandStartIndex(args);
+	if (commandStartIndex !== undefined && args[commandStartIndex] === "set" && args[commandStartIndex + 1] === "credentials") {
+		for (const index of [commandStartIndex + 2, commandStartIndex + 3]) {
+			if (redacted[index] !== undefined) {
+				redacted[index] = "[REDACTED]";
+			}
+		}
+	}
 	return redacted;
 }
@@ -654,8 +681,14 @@ export function validateToolArgs(args: string[]): string | undefined {
 	return undefined;
 }
+function isBooleanLiteral(token: string | undefined): boolean {
+	const normalized = token?.trim().toLowerCase();
+	return normalized === "true" || normalized === "false";
+}
 function getInvalidValueFlagDetails(args: string[]): InvalidValueFlagDetails | undefined {
-	for (const [index, token] of args.entries()) {
+	for (let index = 0; index < args.length; index += 1) {
+		const token = args[index];
 		if (!token.startsWith("-")) {
 			continue;
 		}
@@ -682,7 +715,7 @@ function getInvalidValueFlagDetails(args: string[]): InvalidValueFlagDetails | u
 				reason: "missing-value",
 			};
 		}
-		if (receivedToken.startsWith("-")) {
+		if (receivedToken.startsWith("-") && !GLOBAL_VALUE_FLAGS_ALLOWING_DASH_VALUE.has(normalizedToken)) {
 			return {
 				flag: normalizedToken,
 				index,
@@ -690,7 +723,7 @@ function getInvalidValueFlagDetails(args: string[]): InvalidValueFlagDetails | u
 				receivedToken,
 			};
 		}
-		continue;
+		index += 1;
 	}
 	return undefined;
 }
@@ -794,7 +827,7 @@ function getCompatibilityWorkaround(args: string[], commandInfo: CommandInfo): C
 	if (isBooleanFlagEnabled(args, "--headed")) {
 		return undefined;
 	}
-	if (hasFlagToken(args, "--cdp") || hasFlagToken(args, "--provider") || hasFlagToken(args, "-p") || hasFlagToken(args, "--auto-connect")) {
+	if (hasFlagToken(args, "--cdp") || hasFlagToken(args, "--provider") || hasFlagToken(args, "-p") || isBooleanFlagEnabled(args, "--auto-connect")) {
 		return undefined;
 	}
 	const engine = getFlagValue(args, "--engine");
@@ -831,7 +864,7 @@ export function extractExplicitSessionName(args: string[]): string | undefined {
 export function getStartupScopedFlags(args: string[]): string[] {
 	return LAUNCH_SCOPED_FLAG_DEFINITIONS
 		.map((definition) => definition.flag)
-		.filter((flag) => hasFlagToken(args, flag));
+		.filter((flag) => flag === "--auto-connect" ? isBooleanFlagEnabled(args, flag) : hasFlagToken(args, flag));
 }
 export function hasLaunchScopedTabCorrectionFlag(args: string[]): boolean {
@@ -1039,7 +1072,7 @@ export function parseCommandInfo(args: string[]): CommandInfo {
 	};
 }
-export function extractCommandTokens(args: string[]): string[] {
+function findCommandStartIndex(args: string[]): number | undefined {
 	for (let index = 0; index < args.length; index += 1) {
 		const token = args[index];
 		if (token.startsWith("--session=")) {
@@ -1049,10 +1082,21 @@ export function extractCommandTokens(args: string[]): string[] {
 			const normalizedToken = token.split("=", 1)[0] ?? token;
 			if (GLOBAL_FLAGS_WITH_VALUES.has(normalizedToken) && !token.includes("=")) {
 				index += 1;
+			} else if (
+				GLOBAL_BOOLEAN_FLAGS_WITH_OPTIONAL_VALUES.has(normalizedToken) &&
+				!token.includes("=") &&
+				isBooleanLiteral(args[index + 1])
+			) {
+				index += 1;
 			}
 			continue;
 		}
-		return args.slice(index);
+		return index;
 	}
-	return [];
+	return undefined;
+}
+export function extractCommandTokens(args: string[]): string[] {
+	const commandStartIndex = findCommandStartIndex(args);
+	return commandStartIndex === undefined ? [] : args.slice(commandStartIndex);
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-agent-browser-native",
-  "version": "0.2.22",
+  "version": "0.2.23",
   "description": "pi extension that exposes agent-browser as a native tool for browser automation",
   "type": "module",
   "author": "Mitch Fultz (https://github.com/fitchmultz)",