npm - pi-agent-browser-native - Versions diffs - 0.2.54 → 0.2.56 - Mend

pi-agent-browser-native 0.2.54 → 0.2.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/CHANGELOG.md +29 -0
package/dist/extensions/agent-browser/lib/config-policy.js +3 -2
package/dist/extensions/agent-browser/lib/playbook.js +2 -2
package/dist/extensions/agent-browser/lib/prompt-policy.js +16 -8
package/docs/ARCHITECTURE.md +1 -1
package/docs/COMMAND_REFERENCE.md +37 -16
package/docs/SUPPORT_MATRIX.md +18 -17
package/docs/TOOL_CONTRACT.md +2 -2
package/package.json +22 -6
package/scripts/agent-browser-capability-baseline.mjs +14 -3
package/scripts/platform-smoke.mjs +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,35 @@
 ## Unreleased
+## 0.2.56 - 2026-06-21
+### Fixed
+- Corrected the local Pi development baseline to `@earendil-works/*` `0.79.9`, matching the installed `pi 0.79.9` runtime used for release validation.
+### Validation
+- Re-ran `npm install` and `npm audit --json`; dependency install completed and audit reported zero vulnerabilities.
+## 0.2.55 - 2026-06-21
+### Changed
+- Rebaselined upstream capability metadata, command reference, support docs, playbook guidance, platform smoke image tag, and real-upstream output-shape metadata for `agent-browser` `0.29.1` / vercel-labs/agent-browser@4572acf0d71c0086009206c9c1e2136fc54ec9e5.
+- Documented the new upstream `@agent-browser/sandbox` package guidance, `installSystemDependencies: false`, and stricter `install --with-deps` nonzero behavior while keeping sandbox support outside this thin Pi wrapper.
+- Updated local Pi development dependencies to `@earendil-works/*` `0.79.8`, kept Pi core package peers host-provided, and marked those peers optional to avoid install-time peer noise for package consumers.
+### Fixed
+- Kept optional recording paths from being misclassified as required screenshots when release-smoke prompts are collapsed into one line for tmux automation.
+- Added npm overrides for vulnerable transitive dev dependencies so `npm audit` reports zero vulnerabilities without adding runtime dependencies.
+### Validation
+- Ran `npm run verify -- release` against `agent-browser` `0.29.1`; after rebuilding the Ubuntu image and refreshing the Windows `crabbox-ready` snapshot, the gate passed default verification, command-reference checks, build, lifecycle verification, packaged Pi smoke, and macOS/Ubuntu/Windows-native platform smoke.
+- Ran `npm run verify -- real-upstream`, `npm run verify -- dogfood`, `npm run verify -- benchmark`, `npm run verify -- startup-profile --samples 3`, `npm run docs`, `npm run doctor`, `npm audit --json`, `npm run check:platform-smoke`, `npm run smoke:platform:ubuntu-image`, `npm run smoke:platform:doctor`, focused prompt-guard tests, and `git diff --check`.
+- Ran tmux-driven Pi checkout dogfood with `pi --approve --no-extensions --no-skills -e .`, covering the public Sauce Demo checkout-overview flow with screenshot/recording evidence and no order placement; then verified the collapsed one-line screenshot-plus-recording close guard on `https://example.com` after rebuilding `dist/`.
 ## 0.2.54 - 2026-06-19
 ### Fixed

package/dist/extensions/agent-browser/lib/config-policy.js CHANGED Viewed

@@ -24,11 +24,12 @@ import { join, resolve } from "node:path";
 /** @typedef {{ cwd?: string; env?: NodeJS.ProcessEnv; includeProjectConfig?: boolean }} AgentBrowserConfigLoadOptions */
 /** @typedef {{ browserDefaultProfile?: Required<BrowserDefaultProfileConfig>; browserDefaultProfileScope?: ConfigLayerScope; browserExecutablePath?: string; browserExecutablePathScope?: ConfigLayerScope; trustedBrowserDefaultProfile?: Required<BrowserDefaultProfileConfig>; trustedBrowserDefaultProfileScope?: ConfigLayerScope; trustedBrowserExecutablePath?: string; trustedBrowserExecutablePathScope?: ConfigLayerScope; config: AgentBrowserConfig; webSearchCredentialSources: Partial<Record<WebSearchProvider, CredentialSource>>; webSearchEnabled: boolean; webSearchPreferredProvider: WebSearchProvider; errors: string[]; layers: ConfigLayer[]; paths: AgentBrowserConfigPaths; projectConfigIncluded: boolean; warnings: string[] }} AgentBrowserConfigState */
 /** @typedef {{ scope: string; path: string; exists: boolean }} ConfigFileSummary */
+const CONFIG_DIR_NAME = ".pi";
 export const AGENT_BROWSER_CONFIG_ENV = "PI_AGENT_BROWSER_CONFIG";
 export const BRAVE_API_KEY_ENV = "BRAVE_API_KEY";
 export const EXA_API_KEY_ENV = "EXA_API_KEY";
-export const CONFIG_RELATIVE_PATH = /** @type {const} */ ([".pi", "config", "pi-agent-browser-native", "config.json"]);
-export const GLOBAL_CONFIG_RELATIVE_PATH = /** @type {const} */ ([".pi", "config", "pi-agent-browser-native", "config.json"]);
+export const CONFIG_RELATIVE_PATH = /** @type {const} */ ([CONFIG_DIR_NAME, "config", "pi-agent-browser-native", "config.json"]);
+export const GLOBAL_CONFIG_RELATIVE_PATH = /** @type {const} */ ([CONFIG_DIR_NAME, "config", "pi-agent-browser-native", "config.json"]);
 export const SECRET_COMMAND_TIMEOUT_MS = 15_000;
 /** @type {Readonly<Record<WebSearchProvider, WebSearchProviderDescriptor>>} */
 export const WEB_SEARCH_PROVIDER_DESCRIPTORS = Object.freeze({

package/dist/extensions/agent-browser/lib/playbook.js CHANGED Viewed

@@ -43,9 +43,9 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
     "For batch chains that touch cookies, storage, auth, or other secret-bearing commands, use details.batchSteps for per-step artifacts, categories, spill paths, and full structured errors; top-level details.data on batch is only a compact redacted step matrix (success, argv-redacted command, redacted result or scrubbed error text) built from the same presentation rules as standalone calls.",
     "For non-core families, pass current upstream commands through the native tool directly: network route/requests/har (including request filters like --type/--method/--status), diff snapshot/screenshot/url with scoped/baseline options, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, device list for iOS simulator inventory, and chat. For compact network requests output, prefer details.nextActions for request detail, route-mock diagnostics, actionable failed-request networkSourceLookup, filtering, clearing the aggregate buffer before repro, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done; stream enable already-enabled outcomes are treated as idempotent success with status/disable follow-ups.",
     "For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: \"tabs\" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close commands (`close`, `quit`, or `exit`) only close the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.",
-    "For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
+    "For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Hosted sandbox workflows should use upstream @agent-browser/sandbox helpers outside this wrapper. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
     "For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; dialog commands and eval snippets that look like alert/confirm/prompt/dialog triggers are shorter-bounded than normal browser calls, and timed-out dialog-like interactions may add inspect-dialog-after-timeout, dismiss-dialog-after-timeout, or recover-fresh-session-after-dialog-timeout nextActions. When --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.",
-    "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.27.3, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort: use explicit --timeout or top-level timeoutMs for legitimately slow waits, and treat a successful payload like \"waited\":\"timeout\" as elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
+    "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.29.1, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort: use explicit --timeout or top-level timeoutMs for legitimately slow waits, and treat a successful payload like \"waited\":\"timeout\" as elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
     "For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
     "For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
     "For downloads, prefer download <selector> <path> when an element click should save a file; simple loopback anchor downloads are saved to the requested path when the wrapper can resolve an HTTP(S) href. Do not rely on click alone when you need the downloaded file on disk.",

package/dist/extensions/agent-browser/lib/prompt-policy.js CHANGED Viewed

@@ -18,23 +18,31 @@ const LEGACY_BASH_ALLOW_PATTERNS = [
     /\bdebug(?:ging)?\b.*\b(?:agent[_ -]?browser|agent_browser|browser integration)\b/i,
 ];
 const PROMPT_ARTIFACT_PATH_PATTERN = /(?:^|[\s"'`(:])((?:\/[^\s"'`),;]+|[A-Za-z]:[\\/][^\s"'`),;]+|\.{1,2}[\\/][^\s"'`),;]+|[^\s"'`),;:\\/]+(?:[\\/][^\s"'`),;]+)+|[^\s"'`),;:\\/]+)\.(?:png|jpe?g|webp|gif|webm|mp4|har|pdf|trace|json))(?:[\s"'`),;.]|$)/gi;
+function inferPromptArtifactKind(line, path) {
+    const lowerPath = path.toLowerCase();
+    if (/\.(?:webm|mp4)$/.test(lowerPath))
+        return "recording";
+    if (/\.(?:png|jpe?g|webp|gif)$/.test(lowerPath))
+        return "screenshot";
+    const lowerLine = line.toLowerCase();
+    if (lowerLine.includes("screenshot"))
+        return "screenshot";
+    if (/\b(?:screen\s+recording|recording|webm|video)\b/.test(lowerLine))
+        return "recording";
+    return undefined;
+}
 function extractPromptRequestedArtifacts(prompt) {
     const artifacts = [];
     const seen = new Set();
     for (const line of prompt.split(/\r?\n/)) {
-        const lowerLine = line.toLowerCase();
-        const kind = lowerLine.includes("screenshot")
-            ? "screenshot"
-            : /\b(?:screen\s+recording|recording|webm|video)\b/.test(lowerLine)
-                ? "recording"
-                : undefined;
-        if (!kind)
-            continue;
         PROMPT_ARTIFACT_PATH_PATTERN.lastIndex = 0;
         for (const match of line.matchAll(PROMPT_ARTIFACT_PATH_PATTERN)) {
             const path = match[1]?.trim();
             if (!path)
                 continue;
+            const kind = inferPromptArtifactKind(line, path);
+            if (!kind)
+                continue;
             const key = `${kind}:${path}`;
             if (seen.has(key))
                 continue;

package/docs/ARCHITECTURE.md CHANGED Viewed

@@ -177,7 +177,7 @@ That failure should include a structured recovery hint pointing to `sessionMode:
 Implementation detail lives in `extensions/agent-browser/lib/launch-scoped-flags.ts` (canonical flag metadata shared with playbook/docs assertions), `extensions/agent-browser/lib/argv-descriptor.ts` and `extensions/agent-browser/lib/argv-grammar.ts` (command discovery, `VALUE_FLAGS`, `parseArgvDescriptor`) plus `extensions/agent-browser/lib/runtime.ts` (`getStartupScopedFlags`, `buildExecutionPlan`):
 - **Command discovery:** Leading argv is scanned with a value-taking allowlist so known global flags and documented command flags consume their values before the upstream command word is identified. Missing-value prevalidation is intentionally limited to upstream global value flags; command-scoped flags and literal text are left to upstream parsing so values like `fill #field --password` are not rejected by wrapper heuristics before the CLI sees them. When upstream adds new global flags that take values ahead of the command, extend both the command-discovery and prevalidation allowlists; when it adds command-specific flags, extend only command discovery/redaction as needed. A smaller set of global boolean flags may be followed by an optional `true`/`false` literal; when present, that literal is consumed as the flag value before command discovery continues.
-- **`--state` disambiguation:** Persisted browser `--state` before the command participates in launch-scoped validation and tab-correction hints. The same flag spelling after a `wait` command is excluded from startup-scoped detection so upstream help examples such as `wait @ref --state hidden` do not spuriously require `sessionMode: "fresh"` while an implicit session is active. As of upstream `agent-browser 0.27.3`, the parser still does not implement those `wait --state` examples as distinct wait modes, so agent-facing docs recommend `wait --fn` predicates for disappearance checks instead.
+- **`--state` disambiguation:** Persisted browser `--state` before the command participates in launch-scoped validation and tab-correction hints. The same flag spelling after a `wait` command is excluded from startup-scoped detection so upstream help examples such as `wait @ref --state hidden` do not spuriously require `sessionMode: "fresh"` while an implicit session is active. As of upstream `agent-browser 0.29.1`, the parser still does not implement those `wait --state` examples as distinct wait modes, so agent-facing docs recommend `wait --fn` predicates for disappearance checks instead.
 - **`--auto-connect`:** Treated as launch-scoped only when enabled (`--auto-connect` bare or `true`). `--auto-connect false` is ignored for startup-scoped blocking so disabled attach hints do not force a fresh launch.
 **Sessionless inspection and local commands:** Plain-text global help and version probes (`--help`, `-h`, `--version`, `-V`) must never allocate or bind the extension-managed session. The same session-ownership rule applies to read-only upstream `skills list`, `skills get …`, and `skills path …`, local auth profile management (`auth save/list/show/delete/remove`), plus local/setup surfaces such as `profiles`, `dashboard start/stop`, `device list`, `doctor`, `install`, `upgrade`, `session list`, and targeted/all local saved-state maintenance (`state list/show`, `state clear --all`, `state clear -a`, `state clear <session-name>`, `state clean --older-than <days>`, `state rename`). Non-plain-text sessionless commands still run with `--json` for machine-readable output, but the planner does not prepend the implicit managed `--session`, so an agent can inspect local capabilities or start/stop the standalone dashboard without consuming the implicit session slot before a real `open`. Browser-backed, context-dependent, or incomplete commands such as root `session`, untargeted `state clear`, bare `state clean`, `auth login`, `state save`, and `state load` keep normal managed-session injection. Command-shape allowlisting lives in `extensions/agent-browser/lib/command-policy.ts` (`needsManagedSession`), while `extensions/agent-browser/lib/runtime.ts` (`isPlainTextInspectionArgs`, `buildExecutionPlan`) applies that decision to execution planning.

package/docs/COMMAND_REFERENCE.md CHANGED Viewed

@@ -18,23 +18,22 @@ This project intentionally blocks normal `agent-browser` bash usage in most agen
 <!-- agent-browser-capability-baseline:start upstream-baseline -->
 <!-- Generated from scripts/agent-browser-capability-baseline.mjs. Run `npm run docs -- command-reference write` to update. Do not edit manually. -->
-This reference is baselined to the locally installed `agent-browser 0.28.0` command/help surface, audited against vercel-labs/agent-browser@6323df571ffd17d14e60ec19fcb56cc1caf498ab. Upstream `agent-browser` remains the source of truth for command semantics; this file is the local fallback for Pi agent sessions where direct binary help is blocked or discouraged.
+This reference is baselined to the locally installed `agent-browser 0.29.1` command/help surface, audited against vercel-labs/agent-browser@4572acf0d71c0086009206c9c1e2136fc54ec9e5. Upstream `agent-browser` remains the source of truth for command semantics; this file is the local fallback for Pi agent sessions where direct binary help is blocked or discouraged.
 The lightweight drift check is `npm run verify -- command-reference`. Run it whenever the installed upstream `agent-browser` version changes or this reference is edited.
 Use `npm run benchmark:agent-browser` or `npm run verify -- benchmark` before and after agent-facing workflow abstractions to measure task success, tool calls, model-visible output size, stale-ref behavior, artifact success, failure-category coverage, and elapsed-time estimates.
 <!-- agent-browser-capability-baseline:end upstream-baseline -->
-### Upstream 0.27.3 install-only rebaseline
+### Upstream 0.29.1 rebaseline
-The 0.27.3 rebaseline is an install-only compatibility update: upstream changed Windows ARM64 installation fallback behavior and did not change the CLI/help surface or browser-command semantics. This wrapper adds no compatibility shim for older upstream releases. The wrapper must still not hide these prior upstream fixes:
+The 0.29.1 rebaseline adds no new core browser CLI commands. It captures upstream's new hosted-sandbox helper package and install behavior:
-- click reliability: upstream now scrolls off-viewport elements before coordinate resolution, handles JavaScript dialogs promptly, recovers mouse state after dialog-opening clicks, and reports overlay interception before dispatching input
-- frame-scoped CSS selectors and waits, including cross-process iframe click-coordinate translation
-- wait timeout handling: documented 25s default, honored `--timeout` across wait variants, and appropriate client read budgets for long waits; the native wrapper forwards explicit long waits and derives a subprocess watchdog when top-level `timeoutMs` is omitted
-- form commands: `find label` matches `aria-label` / `aria-labelledby`, `select` errors when no option matches, and `type` parses `--clear` / `--delay` instead of typing them as literal text
-- warm CLI command latency and batch daemon respawn/retry improvements
-- GNU Linux release artifacts pinned to glibc 2.28
+- `@agent-browser/sandbox` is the upstream helper package for Eve and Vercel Sandbox workflows. It is not bundled by this pi extension; load `skills get vercel-sandbox --full` when a task needs that hosted-sandbox guidance.
+- Fresh Eve and Vercel Sandbox helpers install Chromium system dependencies by default; pass `installSystemDependencies: false` only when the sandbox image already has those libraries.
+- `install --with-deps` now exits nonzero when the package manager cannot install required browser libraries (`install --with-deps exits nonzero`).
+Runtime probes on 2026-06-21 confirm two old caveats still stand in `agent-browser 0.29.1`: `find ... uncheck` and `wait <selector> --state hidden|detached` remain advertised by help but fail at runtime, and `wait --url` glob behavior remains narrow. Keep the wrapper's direct `uncheck` passthrough, `wait --fn` disappearance guidance, and `job.assertUrl` glob workaround.
 ### Upstream 0.28.0 rebaseline
@@ -47,6 +46,17 @@ The 0.28.0 rebaseline tracks new local/infra upstream surfaces and does not chan
 The wrapper adds no compatibility shim for older upstream releases.
+### Upstream 0.27.3 install-only rebaseline
+The 0.27.3 rebaseline is an install-only compatibility update: upstream changed Windows ARM64 installation fallback behavior and did not change the CLI/help surface or browser-command semantics. This wrapper adds no compatibility shim for older upstream releases. The wrapper must still not hide these prior upstream fixes:
+- click reliability: upstream now scrolls off-viewport elements before coordinate resolution, handles JavaScript dialogs promptly, recovers mouse state after dialog-opening clicks, and reports overlay interception before dispatching input
+- frame-scoped CSS selectors and waits, including cross-process iframe click-coordinate translation
+- wait timeout handling: documented 25s default, honored `--timeout` across wait variants, and appropriate client read budgets for long waits; the native wrapper forwards explicit long waits and derives a subprocess watchdog when top-level `timeoutMs` is omitted
+- form commands: `find label` matches `aria-label` / `aria-labelledby`, `select` errors when no option matches, and `type` parses `--clear` / `--delay` instead of typing them as literal text
+- warm CLI command latency and batch daemon respawn/retry improvements
+- GNU Linux release artifacts pinned to glibc 2.28
 ## Core mental model
 Input mode chooser (one per call): **`args`** for the default open → snapshot -i → click/fill `@refs` flow; **`semanticAction`** for stable role/text/label targets; **`job`** / **`qa`** for multi-step checks; **`electron`** for desktop apps only; **`sourceLookup`** / **`networkSourceLookup`** are **experimental candidates-only** helpers (not authoritative mappings). Do not pass `--json` in `args`—the wrapper injects it. Match link and button text to the latest snapshot (on `https://example.com/` the main link is `Learn more`, not legacy `More information...` copy). See [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#input-mode-chooser) for snapshot variants (`-i` vs `--compact` vs full) and batching three or more getters.
@@ -650,7 +660,7 @@ For dense pages, the wrapper also accepts `snapshot -i --search <text>` and `sna
 | `wait --download [path]` | Wait for a download started by a previous action and optionally save it to `path`; successful wrapper results include upstream-reported `savedFilePath`/`savedFile`, while `details.artifacts[].exists` is the wrapper's on-disk verification signal. |
 | `wait --download [path] --timeout <ms>` | Set download-start timeout in milliseconds. The native Pi wrapper forwards explicit wait timeouts and extends the subprocess watchdog unless the caller supplies top-level `timeoutMs`. |
-Current upstream source still does not parse `wait <selector> --state hidden` / `wait <selector> --state detached` as distinct wait modes even though upstream help mentions those examples. Use `wait --fn "!document.querySelector('#spinner')"` or another explicit JavaScript predicate for disappearance/detach checks until upstream parser support exists.
+Current upstream 0.29.1 source still does not parse `wait <selector> --state hidden` / `wait <selector> --state detached` as distinct wait modes even though upstream help mentions those examples. Use `wait --fn "!document.querySelector('#spinner')"` or another explicit JavaScript predicate for disappearance/detach checks until upstream parser support exists.
 ### Diff, debug, and streaming
@@ -708,7 +718,7 @@ Long-running or lifecycle commands should be explicitly paired with cleanup call
 | `dashboard stop` | Stop the dashboard server. |
 | `device list` | List available iOS simulators. Use with `-p ios` when exercising iOS provider flows. |
 | `install` | Install browser binaries. |
-| `install --with-deps` | Install browser binaries plus Linux system dependencies. |
+| `install --with-deps` | Install browser binaries plus Linux system dependencies; exits nonzero when required libraries cannot be installed. |
 | `upgrade` | Upgrade `agent-browser` to the latest version. |
 | `doctor [--fix]` | Diagnose install issues and optionally auto-clean stale files. Use `doctor --offline --quick` for a fast local-only check and `doctor --json` for structured output. |
 | `plugin add <ref>` | Add a plugin from npm or GitHub (`<owner>/<repo>` or `@scope/<name>`); writes `agent-browser.json`. Flags such as `--name`, `--capability`, `--global`, and `--no-manifest` shape discovery. |
@@ -879,14 +889,14 @@ Other useful environment variables include `AGENT_BROWSER_DEFAULT_TIMEOUT`, `AGE
 <!-- agent-browser-capability-baseline:start capability-token-baseline -->
 <!-- Generated from scripts/agent-browser-capability-baseline.mjs. Run `npm run docs -- command-reference write` to update. Do not edit manually. -->
 <details>
-<summary>Generated verifier capability baseline for agent-browser 0.28.0</summary>
+<summary>Generated verifier capability baseline for agent-browser 0.29.1</summary>
 This generated block is review data for maintainers. The human-authored reference sections above remain the readable command guide.
 #### Source evidence
 - repository: `vercel-labs/agent-browser`
-- upstream HEAD: `6323df571ffd17d14e60ec19fcb56cc1caf498ab`
-- upstream package version: `0.28.0`
+- upstream HEAD: `4572acf0d71c0086009206c9c1e2136fc54ec9e5`
+- upstream package version: `0.29.1`
 - inspected: `agent-browser --version`
 - inspected: `agent-browser --help`
 - inspected: `selected agent-browser <command> --help output`
@@ -897,12 +907,17 @@ This generated block is review data for maintainers. The human-authored referenc
 - inspected: `agent-browser.schema.json`
 - inspected: `cli/src/commands.rs`
 - inspected: `cli/src/flags.rs`
+- inspected: `packages/@agent-browser/sandbox/README.md`
+- inspected: `packages/@agent-browser/sandbox/src/shared.ts`
+- inspected: `packages/@agent-browser/sandbox/src/vercel.ts`
+- inspected: `packages/@agent-browser/sandbox/src/eve.ts`
 #### Upstream help commands sampled
 - root help: `agent-browser --help`
 - skills help: `agent-browser skills --help`
 - skills list: `agent-browser skills list`
 - core skill full: `agent-browser skills get core --full`
+- vercel sandbox skill full: `agent-browser skills get vercel-sandbox --full`
 - open help: `agent-browser open --help`
 - click help: `agent-browser click --help`
 - key help: `agent-browser key --help`
@@ -954,11 +969,11 @@ This generated block is review data for maintainers. The human-authored referenc
 - plugin help: `agent-browser plugin --help`
 #### Inventory sections
-- Built-in skills: 13 human-doc token(s), 13 upstream token(s)
+- Built-in skills: 15 human-doc token(s), 15 upstream token(s)
 - Core page, element, navigation, and extraction commands: 74 human-doc token(s), 74 upstream token(s)
 - Sessions, state, tabs, frames, dialogs, and windows: 20 human-doc token(s), 16 upstream token(s)
 - Network, storage, artifacts, diagnostics, and performance: 43 human-doc token(s), 53 upstream token(s)
-- Batch, auth, confirmations, setup, dashboard, devices, and AI commands: 30 human-doc token(s), 34 upstream token(s)
+- Batch, auth, confirmations, setup, dashboard, devices, and AI commands: 31 human-doc token(s), 35 upstream token(s)
 - Global flags, config, providers, policy, and environment: 121 human-doc token(s), 91 upstream token(s)
 #### Human-authored doc tokens required
@@ -974,6 +989,8 @@ This generated block is review data for maintainers. The human-authored referenc
 - `skills get dogfood`
 - `skills get vercel-sandbox`
 - `skills get agentcore`
+- `@agent-browser/sandbox`
+- `installSystemDependencies: false`
 - `skills path [name]`
 - `AGENT_BROWSER_SKILLS_DIR`
@@ -1140,6 +1157,7 @@ This generated block is review data for maintainers. The human-authored referenc
 - `device list`
 - `install`
 - `install --with-deps`
+- `install --with-deps exits nonzero`
 - `upgrade`
 - `doctor [--fix]`
 - `doctor --offline --quick`
@@ -1287,6 +1305,8 @@ This generated block is review data for maintainers. The human-authored referenc
 - skills list: `dogfood`
 - skills list: `vercel-sandbox`
 - skills list: `agentcore`
+- vercel sandbox skill full: `@agent-browser/sandbox`
+- vercel sandbox skill full: `installSystemDependencies: false`
 - core skill full: `agent-browser frame @e3`
 - core skill full: `agent-browser dialog accept`
 - core skill full: `agent-browser state save ./auth.json`
@@ -1450,6 +1470,7 @@ This generated block is review data for maintainers. The human-authored referenc
 - root help: `dashboard start --port <n>`
 - device help: `device list`
 - root help: `install --with-deps`
+- install help: `fails if deps fail`
 - root help: `upgrade`
 - root help: `doctor [--fix]`
 - root help: `profiles`

package/docs/SUPPORT_MATRIX.md CHANGED Viewed

@@ -26,10 +26,10 @@ When upstream ships a new `agent-browser` or the inventory changes:
 ## Audit result
-- Target upstream: `agent-browser 0.28.0` (must match `CAPABILITY_BASELINE.targetVersion` in [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs)).
+- Target upstream: `agent-browser 0.29.1` (must match `CAPABILITY_BASELINE.targetVersion` in [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs)).
 - Source of truth: `CAPABILITY_BASELINE.inventorySections` in the same file (stable `id` keys: `skills`, `core-commands`, `state-tabs-frames-dialogs`, `network-storage-artifacts-diagnostics`, `batch-auth-setup-ai`, `options-and-env`).
-- Status: supported for the current wrapper contract after the 2026-05-26 all-command audit.
-- High-priority support gaps: 2026-05-26 audit found sessionless local commands and command-scoped value flags needed sharper wrapper handling; runtime/tests/docs now cover those paths. The 0.28.0 rebaseline adds local `mcp` and `plugin` surfaces plus plugin-backed credential login; wrapper docs/tests mark `mcp` and known `plugin` commands sessionless, with no compatibility shim for older upstream releases. The prior rebaseline preserves thin support for upstream click reliability, frame-scoped selectors/waits, form-command fixes, daemon retry improvements, and glibc-pinned release artifacts; wrapper wait planning forwards explicit long `wait <ms>` / `wait --timeout <ms>` calls instead of rejecting them before spawn. Remaining upstream-owned caveat: current help mentions `wait <selector> --state hidden`, but source parsing does not implement that distinct wait mode, so wrapper docs steer agents to `wait --fn` predicates.
+- Status: command-surface supported for the current wrapper contract after the 2026-06-21 0.29.1 audit; release-gate evidence below was refreshed against `agent-browser 0.29.1`.
+- High-priority support gaps: 2026-05-26 audit found sessionless local commands and command-scoped value flags needed sharper wrapper handling; runtime/tests/docs now cover those paths. The 0.28.0 rebaseline added local `mcp` and `plugin` surfaces plus plugin-backed credential login; wrapper docs/tests mark `mcp` and known `plugin` commands sessionless, with no compatibility shim for older upstream releases. The 0.29.1 rebaseline adds upstream `@agent-browser/sandbox` helper-package guidance and stricter `install --with-deps` failure semantics; no new wrapper runtime mode or bundled dependency is required. Prior upstream fixes for click reliability, frame-scoped selectors/waits, form-command fixes, daemon retry improvements, and glibc-pinned release artifacts remain thin passthrough. Remaining upstream-owned caveat: current help still mentions `wait <selector> --state hidden` / `detached` and `find ... uncheck`, but runtime probes show those advertised shapes still fail, so wrapper docs keep `wait --fn` predicates and direct `uncheck` passthrough guidance.
 - Post-`v0.2.29` review state: commits `eb55320` through `86abbfb` add browser guidance/smoke coverage plus `RQ-0086` click-probe reduction, `RQ-0087` same-snapshot form fill batching, `RQ-0088` current-ref fallback on locator misses, `RQ-0089` direct-upstream click mutation investigation, and `RQ-0090` stop-boundary/artifact-path guidance. Verification gates below were rerun on 2026-05-18 after those tasks landed. Constrained `job` (`RQ-0064`), the lightweight `qa` preset (`RQ-0065`), the experimental `sourceLookup` helper (`RQ-0066`), the experimental `networkSourceLookup` helper (`RQ-0067`), optional Exa/Brave-backed `agent_browser_web_search` with Pi-scoped package config (`RQ-0121`), and agent recovery for search/profile configuration failures (`RQ-0122`) are implemented; see [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#job), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#qa), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sourcelookup), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#networksourcelookup), and [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#optional-companion-web-search). Reusable browser recipes (`RQ-0068`) are intentionally not adopted as a runtime surface; see [`ARCHITECTURE.md`](ARCHITECTURE.md#no-reusable-recipe-layer-yet).
 ## Open UX/reliability follow-ups from 2026-05-29 agent feedback
@@ -44,24 +44,25 @@ Current summary:
 | RQ-0123–RQ-0127 | Stress-report wrapper fixes shipped; prompt-derived business-action blocking remains intentionally out of scope. | [`docs/support-notes.md`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/docs/support-notes.md) |
 | RQ-0101 | Upstream `agent-browser 0.27.2` rebaseline shipped. | [`docs/support-notes.md`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/docs/support-notes.md) |
 | RQ-0128 | Upstream `agent-browser 0.27.3` install-only rebaseline shipped; no new wrapper capability adopted. | [`docs/support-notes.md`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/docs/support-notes.md) |
+| RQ-0129 | Upstream `agent-browser 0.29.1` rebaseline shipped; sandbox helpers are documented upstream package guidance, not a wrapper runtime. | [`docs/support-notes.md`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/docs/support-notes.md) |
 ## Verification evidence
-Re-run the gates below before each release; this table records what the closure audit exercised. Rows marked **Current for 0.28.0** were rerun after the `agent-browser 0.28.0` rebaseline. Rows marked **Historical / pending refresh** are useful prior evidence but must not be treated as current release proof until rerun under the named condition.
+Re-run the gates below before each release; this table records what the closure audit exercised. Rows marked current were rerun under `agent-browser 0.29.1` on 2026-06-21.
 | Gate | Evidence | Status |
 | --- | --- | --- |
-| Default local gate | `npm run verify` checks generated playbook drift, clean-builds generated `dist/`, runs `tsc --noEmit`, unit/fake tests, generated command-reference blocks, and live command-reference sampling. | **Current for 0.28.0:** pass on 2026-06-18 as part of `npm run verify -- release` (clean build, TypeScript, 571 passed, 1 skipped, generated docs check, and live command-reference sampling passed with `agent-browser 0.28.0` on `PATH`). |
+| Default local gate | `npm run verify` checks generated playbook drift, clean-builds generated `dist/`, runs `tsc --noEmit`, unit/fake tests, generated command-reference blocks, and live command-reference sampling. | **Current for 0.29.1:** pass on 2026-06-21 inside `npm run verify -- release`; command-reference verification also passed standalone after the rebaseline. |
 | Pre-PR local gate | `npm run verify -- pre-pr` composes the default gate with package-content verification. Use before larger local handoffs or PR-ready claims when lifecycle/platform/live dogfood cost is not warranted. | Added 2026-06-10; orchestration is locked by `test/project-verify.test.ts` and does not change release mode. |
-| Real upstream contract | `npm run verify -- real-upstream` runs the localhost fixture matrix against the real installed `agent-browser` matching the baseline. | **Current for 0.28.0:** pass on 2026-06-18 (`npm run verify -- real-upstream`; localhost fixture matrix passed against installed `agent-browser 0.28.0`). |
-| Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads the packaged `agent_browser` tool without requiring optional Brave config, and executes fake-upstream `--version`. | **Current for 0.28.0:** pass on 2026-06-18 as part of `npm run verify -- release` (`verify-package.mjs --smoke-pi`; packed 118 files, packaged `agent_browser --version` invocation passed). |
-| Startup profile | `npm run verify -- startup-profile --samples <n>` clean-builds generated `dist/`, records direct package entrypoint import/factory timing in fresh Node processes, and writes `.artifacts/startup-profile/latest.json`. It must not launch Pi, tmux, mise, npm, browsers, or `agent-browser`; full Pi TUI ready-prompt profiling is intentionally excluded after it proved too invasive for routine verification. Run this opt-in evidence when package layout, the compiled entrypoint, top-level imports, schema registration, or prompt/config startup logic changes. | **Current for compiled entrypoint:** pass on 2026-06-11 with direct compiled entrypoint import+factory median 47.136 ms in earlier samples, below the 250 ms direct-import guard and below the prior ~96 ms TypeScript-entrypoint baseline. Full-Pi startup numbers from the unsafe tmux profiler are not accepted as ongoing release evidence. |
-| Deterministic dogfood smoke | `npm run verify -- dogfood` (`scripts/verify-agent-browser-dogfood.ts`) drives the native wrapper against a local file fixture through top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close with the real `agent-browser` on `PATH`. | **Current for 0.28.0:** pass on 2026-06-18 (`npm run verify -- dogfood`; `qa-url`, fresh/current opens, semantic click, job screenshot artifact verification, and close all passed). |
-| Efficiency benchmark | `npm run verify -- benchmark` runs deterministic browser workflow accounting plus focused benchmark tests, including JSONL sampling fixtures and job/qa/sourceLookup/networkSourceLookup/Electron scenario coverage. | **Historical / pending refresh:** pass on 2026-05-29 (`npm run verify -- benchmark`). This deterministic gate is not upstream-version-specific, but rerun before claiming current benchmark evidence after benchmark or workflow-scenario edits. |
-| Crabbox platform smoke | `npm run check:platform-smoke` syntax-checks the harness and cheap invariants. `npm run smoke:platform:ubuntu-image` builds the project-owned Linux image, `npm run smoke:platform:doctor` checks Crabbox 0.26.0+ and local target readiness, and `npm run smoke:platform:all` runs doctor first, then fast target-local `platform-build` (`npm run verify -- platform-target`, pack, clean Pi install) plus `browser-dogfood-smoke` on Crabbox `macos`, `ubuntu`, and `windows-native`; see [`platform-smoke.md`](platform-smoke.md). Target artifacts include Crabbox/provider/work-root metadata, and release review also checks provider-specific `crabbox list` commands for leftover leases/clones. | **Current for 0.28.0:** pass on 2026-06-18 inside `npm run verify -- release`; rebuilt Ubuntu image `pi-agent-browser-native-platform:node24-agent-browser0.28.0`, refreshed the Windows `crabbox-ready` template snapshot to `agent-browser 0.28.0`, doctor passed, then Crabbox platform smoke passed for macOS, Ubuntu, and native Windows. |
-| `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with the configured-source lifecycle harness, packaged Pi smoke, and the release-blocking Crabbox platform matrix (`verifySteps` `release` in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits standalone real-upstream, host-only dogfood, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). | **Current for 0.28.0:** pass on 2026-06-18 (`npm run verify -- release`), including default unit/fake gate, generated docs checks, live command-reference sampling, lifecycle harness, packaged Pi smoke, and macOS/Ubuntu/native-Windows Crabbox platform smoke. |
-| Configured-source lifecycle | `npm run verify -- lifecycle` (`scripts/verify-lifecycle.mjs`) drives `/reload`, closes and relaunches Pi with the same exact `--session-id`, checks the JSONL session header id, session continuity, slash-command sentinel tokens (`v1` before reload and `v2` after full relaunch because compiled JS package modules are process-cached), persisted spill reachability, and real Pi `tool_result` failure-patch semantics for a QA reclassification with a fake upstream on `PATH`. Default Pi model is `zai/glm-5.2`; default per-step wait is **180000 ms** (`DEFAULT_TIMEOUT_MS`); override model with `--model <id>` and waits with `--timeout-ms <ms>`. Passthrough flags in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs): `--keep-artifacts`, `--model`, `--verbose`, and `--timeout-ms` plus a value (for example `npm run verify -- lifecycle --model openai-codex/gpt-5.5:minimal --keep-artifacts --verbose --timeout-ms 600000`). | **Current for 0.28.0:** pass on 2026-06-18 as part of `npm run verify -- release`; managed browser session continuity and persisted full output verified before cleanup. |
-| Quick isolated Pi smoke | `pi --approve --no-extensions --no-skills -e . --tools agent_browser` from trusted repo root; native `agent_browser` only. | **Historical / pending refresh:** pass on 2026-06-11 via tmux with `pi --approve --no-extensions --no-skills -e .`; native `agent_browser` only. Covered `qa` with `sessionMode: "fresh"` against `https://example.com`, `open` and compact `snapshot -i` on `https://react.dev`, `semanticAction` link click to `https://react.dev/learn`, screenshot artifact verification at `/tmp/piab-release-smoke-react.png`, and `close`; explicit screenshot and temporary session artifacts were removed after evidence capture. Broader historical coverage also includes version/help/skills, eval stdin, batch stdin, explicit session, network requests, console/errors, diff snapshot, stream status/disable, dashboard start/stop, and chat credential-failure pass-through during RQ-0055. Not rerun for 0.28.0 unless noted in release evidence. |
+| Real upstream contract | `npm run verify -- real-upstream` runs the localhost fixture matrix against the real installed `agent-browser` matching the baseline. | **Current for 0.29.1:** pass on 2026-06-21 (`npm run verify -- real-upstream`; localhost fixture matrix and plugin list probe passed against installed `agent-browser 0.29.1`). |
+| Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads the packaged `agent_browser` tool without requiring optional Brave config, and executes fake-upstream `--version`. | **Current for 0.29.1:** pass on 2026-06-21 as part of `npm run verify -- release` (`verify-package.mjs --smoke-pi`; packaged `agent_browser --version` invocation passed). |
+| Startup profile | `npm run verify -- startup-profile --samples <n>` clean-builds generated `dist/`, records direct package entrypoint import/factory timing in fresh Node processes, and writes `.artifacts/startup-profile/latest.json`. It must not launch Pi, tmux, mise, npm, browsers, or `agent-browser`; full Pi TUI ready-prompt profiling is intentionally excluded after it proved too invasive for routine verification. Run this opt-in evidence when package layout, the compiled entrypoint, top-level imports, schema registration, or prompt/config startup logic changes. | **Current for compiled entrypoint:** pass on 2026-06-21 (`npm run verify -- startup-profile --samples 3`; direct compiled entrypoint import+factory median 47.3 ms, below the 250 ms budget). Full-Pi startup numbers from the unsafe tmux profiler are not accepted as ongoing release evidence. |
+| Deterministic dogfood smoke | `npm run verify -- dogfood` (`scripts/verify-agent-browser-dogfood.ts`) drives the native wrapper against a local file fixture through top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close with the real `agent-browser` on `PATH`. | **Current for 0.29.1:** pass on 2026-06-21 (`npm run verify -- dogfood`; `qa-url`, fresh/current opens, semantic click, job screenshot artifact verification, and close all passed). |
+| Efficiency benchmark | `npm run verify -- benchmark` runs deterministic browser workflow accounting plus focused benchmark tests, including JSONL sampling fixtures and job/qa/sourceLookup/networkSourceLookup/Electron scenario coverage. | **Current:** pass on 2026-06-21 (`npm run verify -- benchmark`; 13/13 deterministic scenarios passed). |
+| Crabbox platform smoke | `npm run check:platform-smoke` syntax-checks the harness and cheap invariants. `npm run smoke:platform:ubuntu-image` builds the project-owned Linux image, `npm run smoke:platform:doctor` checks Crabbox 0.26.0+ and local target readiness, and `npm run smoke:platform:all` runs doctor first, then fast target-local `platform-build` (`npm run verify -- platform-target`, pack, clean Pi install) plus `browser-dogfood-smoke` on Crabbox `macos`, `ubuntu`, and `windows-native`; see [`platform-smoke.md`](platform-smoke.md). Target artifacts include Crabbox/provider/work-root metadata, and release review also checks provider-specific `crabbox list` commands for leftover leases/clones. | **Current for 0.29.1:** pass on 2026-06-21 inside `npm run verify -- release`; rebuilt Ubuntu image `pi-agent-browser-native-platform:node24-agent-browser0.29.1`, refreshed the Windows `crabbox-ready` template snapshot to `agent-browser 0.29.1`, doctor passed, then Crabbox platform smoke passed for macOS, Ubuntu, and native Windows. |
+| `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with the configured-source lifecycle harness, packaged Pi smoke, and the release-blocking Crabbox platform matrix (`verifySteps` `release` in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits standalone real-upstream, host-only dogfood, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). | **Current for 0.29.1 / Pi 0.79.9:** pass on 2026-06-21 (`npm run verify -- release`), including default unit/fake gate, generated docs checks, live command-reference sampling, lifecycle harness, packaged Pi smoke, and macOS/Ubuntu/native-Windows Crabbox platform smoke. |
+| Configured-source lifecycle | `npm run verify -- lifecycle` (`scripts/verify-lifecycle.mjs`) drives `/reload`, closes and relaunches Pi with the same exact `--session-id`, checks the JSONL session header id, session continuity, slash-command sentinel tokens (`v1` before reload and `v2` after full relaunch because compiled JS package modules are process-cached), persisted spill reachability, and real Pi `tool_result` failure-patch semantics for a QA reclassification with a fake upstream on `PATH`. Default Pi model is `zai/glm-5.2`; default per-step wait is **180000 ms** (`DEFAULT_TIMEOUT_MS`); override model with `--model <id>` and waits with `--timeout-ms <ms>`. Passthrough flags in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs): `--keep-artifacts`, `--model`, `--verbose`, and `--timeout-ms` plus a value (for example `npm run verify -- lifecycle --model openai-codex/gpt-5.5:minimal --keep-artifacts --verbose --timeout-ms 600000`). | **Current for 0.29.1:** pass on 2026-06-21 as part of `npm run verify -- release`; managed browser session continuity and persisted full output verified before cleanup. |
+| Quick isolated Pi smoke | `pi --approve --no-extensions --no-skills -e . --tools agent_browser` from trusted repo root; native `agent_browser` only. | **Current for 0.29.1 / Pi 0.79.9:** pass on 2026-06-21 via tmux with `pi --approve --no-extensions --no-skills -e . --model openai-codex/gpt-5.5:minimal --tools agent_browser`. Covered the public Sauce Demo checkout-overview flow with clean context, native sorting/click/fill flow, screenshot and recording evidence, console/page-error/network diagnostics, and no order placement. A one-line screenshot-plus-recording close-guard smoke on `https://example.com` passed after rebuilding `dist/`, proving close succeeds after both artifact paths are verified. Temp artifacts and tmux sessions were cleaned after evidence capture. |
 Runtime floor note: package metadata keeps Pi core package peer ranges wildcard per installed Pi package docs, but `pi-agent-browser-doctor` / `npm run doctor` treats `pi --version` below 0.79.0 as a setup failure. This keeps package dependency shape aligned with Pi package loading while still making unsupported host Pi versions a release and first-run blocker.
@@ -69,11 +70,11 @@ Runtime floor note: package metadata keeps Pi core package peer ranges wildcard
 | Baseline section | Baseline items | Documentation | Runtime handling | Test coverage | Validation status |
 | --- | --- | --- | --- | --- | --- |
-| Built-in skills | 13 canonical tokens from baseline section `skills`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#built-in-skills). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#built-in-skills), generated baseline block, README proof section, release docs. | `needsManagedSession` keeps read-only skills inspection sessionless while preserving thin upstream passthrough. | Runtime and extension-validation skills/provider matrix; real-upstream inspection/skills group. | Supported. |
+| Built-in skills | 15 canonical tokens from baseline section `skills`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#built-in-skills). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#built-in-skills), generated baseline block, README proof section, release docs. | `needsManagedSession` keeps read-only skills inspection sessionless while preserving thin upstream passthrough; upstream `@agent-browser/sandbox` remains external package guidance, not a bundled wrapper dependency. | Runtime and extension-validation skills/provider matrix; real-upstream inspection/skills group. | Supported. |
 | Core page, element, navigation, and extraction commands | 74 canonical tokens from baseline section `core-commands`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#core-page-and-element-commands). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#core-page-and-element-commands), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md), README quick start. | Thin passthrough with wrapper-owned JSON/session planning, ref guidance, artifact verification, page-change summaries, click-dispatch diagnostics, no-op scroll/focus diagnostics, shorthand compilers, and redaction. | Real-upstream core matrix plus fake core matrix for passthrough, ordering, diagnostics, and compiler validation. | Supported. Upstream semantics remain upstream-owned. |
 | Sessions, state, tabs, frames, dialogs, and windows | 20 canonical tokens from baseline section `state-tabs-frames-dialogs`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#session-state-frames-dialogs-windows-and-inspection-commands). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#session-state-frames-dialogs-windows-and-inspection-commands), stateful workflow notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Stateful summaries/redaction, state artifact handling, sessionless local command planning, managed-session restore, tab target pinning, and close alias cleanup. | Extension-validation stateful matrix, runtime session/resume tests, presentation redaction tests, lifecycle harness. | Supported. External profile/auth state remains operator-owned. |
-| Network, storage, artifacts, diagnostics, and performance | 42 canonical tokens from baseline section `network-storage-artifacts-diagnostics`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage), diagnostic sections, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Thin passthrough plus compact diagnostics, route-mock warnings, useful-but-redacted storage output, stream idempotency normalization, artifact metadata, missing-ffmpeg warnings, sensitive-data redaction, timeout bounds, and cleanup-pair guidance. | Fake non-core matrix and safe real-upstream coverage for network/HAR, diff, trace/profiler, console/errors/highlight, stream, vitals, and React missing-renderer. | Supported. Environment-sensitive operations need suitable local/browser state. |
-| Batch, auth, confirmations, setup, dashboard, devices, and AI commands | 30 canonical tokens from baseline section `batch-auth-setup-ai`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup), README security notes, release docs. | Native-tool batch stdin, generated `job`/`qa`/lookup batch plans, auth/confirmation redaction, sessionless local auth/setup/dashboard/doctor/plugin planning, plugin list/show JSON envelope normalization, bare-`mcp` validation with `mcp --help` preserved, timeout/cleanup guidance. | Parser/runtime plugin and MCP unit coverage; fake-upstream plugin list/show and MCP help/blocking coverage; real-upstream plugin list shape probe; structured input-mode tests; efficiency benchmark scenarios. | Supported. Interactive side-effecting setup/auth/chat remains upstream-owned. `plugin` is local/sessionless; `mcp` is external-client-only except help; `auth login --credential-provider` resolves credentials via a plugin. |
+| Network, storage, artifacts, diagnostics, and performance | 43 canonical tokens from baseline section `network-storage-artifacts-diagnostics`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage), diagnostic sections, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Thin passthrough plus compact diagnostics, route-mock warnings, useful-but-redacted storage output, stream idempotency normalization, artifact metadata, missing-ffmpeg warnings, sensitive-data redaction, timeout bounds, and cleanup-pair guidance. | Fake non-core matrix and safe real-upstream coverage for network/HAR, diff, trace/profiler, console/errors/highlight, stream, vitals, and React missing-renderer. | Supported. Environment-sensitive operations need suitable local/browser state. |
+| Batch, auth, confirmations, setup, dashboard, devices, and AI commands | 31 canonical tokens from baseline section `batch-auth-setup-ai`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup), README security notes, release docs. | Native-tool batch stdin, generated `job`/`qa`/lookup batch plans, auth/confirmation redaction, sessionless local auth/setup/dashboard/doctor/plugin planning, plugin list/show JSON envelope normalization, bare-`mcp` validation with `mcp --help` preserved, timeout/cleanup guidance. | Parser/runtime plugin and MCP unit coverage; fake-upstream plugin list/show and MCP help/blocking coverage; real-upstream plugin list shape probe; structured input-mode tests; efficiency benchmark scenarios. | Supported. Interactive side-effecting setup/auth/chat remains upstream-owned. `plugin` is local/sessionless; `mcp` is external-client-only except help; `auth login --credential-provider` resolves credentials via a plugin; `install --with-deps` failures remain upstream-owned. |
 | Global flags, config, providers, policy, and environment | 121 canonical tokens from baseline section `options-and-env`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#important-global-flags-config-and-environment). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#important-global-flags-config-and-environment), README provider/setup notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sessionmode), architecture/runtime docs. | Runtime handles command discovery, value-flag prevalidation, launch-scoped flags, redacted echoes, fresh-session recovery hints, explicit sessions, provider/device launch-scoping, parent env forwarding with wrapper overrides, subprocess completion, and package-owned Pi-scoped config for optional companion features. | Runtime tests for flags/planning/redaction/session behavior; process tests for env and stdio-linger completion; config/web-search/CLI tests; fake provider/specialized-skill matrix; package doctor. | Supported. Provider clouds, iOS/Appium, proxies, profiles, and credentials require external setup. |
 ## Follow-up decision after closure

package/docs/TOOL_CONTRACT.md CHANGED Viewed

@@ -157,9 +157,9 @@ The extension always plans normal browser commands with `--json` prepended in `e
 - For batch chains that touch cookies, storage, auth, or other secret-bearing commands, use details.batchSteps for per-step artifacts, categories, spill paths, and full structured errors; top-level details.data on batch is only a compact redacted step matrix (success, argv-redacted command, redacted result or scrubbed error text) built from the same presentation rules as standalone calls.
 - For non-core families, pass current upstream commands through the native tool directly: network route/requests/har (including request filters like --type/--method/--status), diff snapshot/screenshot/url with scoped/baseline options, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, device list for iOS simulator inventory, and chat. For compact network requests output, prefer details.nextActions for request detail, route-mock diagnostics, actionable failed-request networkSourceLookup, filtering, clearing the aggregate buffer before repro, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done; stream enable already-enabled outcomes are treated as idempotent success with status/disable follow-ups.
 - For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: "tabs" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close commands (`close`, `quit`, or `exit`) only close the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.
-- For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.
+- For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Hosted sandbox workflows should use upstream @agent-browser/sandbox helpers outside this wrapper. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.
 - For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; dialog commands and eval snippets that look like alert/confirm/prompt/dialog triggers are shorter-bounded than normal browser calls, and timed-out dialog-like interactions may add inspect-dialog-after-timeout, dismiss-dialog-after-timeout, or recover-fresh-session-after-dialog-timeout nextActions. When --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.
-- If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.27.3, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort: use explicit --timeout or top-level timeoutMs for legitimately slow waits, and treat a successful payload like "waited":"timeout" as elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.
+- If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.29.1, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort: use explicit --timeout or top-level timeoutMs for legitimately slow waits, and treat a successful payload like "waited":"timeout" as elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.
 - For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.
 - For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.
 - For downloads, prefer download <selector> <path> when an element click should save a file; simple loopback anchor downloads are saved to the requested path when the wrapper can resolve an HTTP(S) href. Do not rely on click alone when you need the downloaded file on disk.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-agent-browser-native",
-  "version": "0.2.54",
+  "version": "0.2.56",
   "description": "pi extension that exposes agent-browser as a native tool for browser automation",
   "type": "module",
   "author": "Mitch Fultz (https://github.com/fitchmultz)",
@@ -63,16 +63,18 @@
     "typebox": "*"
   },
   "devDependencies": {
-    "@earendil-works/pi-ai": "0.79.4",
-    "@earendil-works/pi-coding-agent": "0.79.4",
-    "@earendil-works/pi-tui": "0.79.4",
+    "@earendil-works/pi-ai": "^0.79.9",
+    "@earendil-works/pi-coding-agent": "^0.79.9",
+    "@earendil-works/pi-tui": "^0.79.9",
     "@types/node": "^25.9.3",
     "tsx": "^4.21.0",
     "typebox": "^1.1.38",
     "typescript": "^6.0.3"
   },
   "overrides": {
-    "basic-ftp": "6.0.1"
+    "basic-ftp": "6.0.1",
+    "esbuild": "0.28.1",
+    "protobufjs": "7.6.4"
   },
   "scripts": {
     "docs": "node ./scripts/project.mjs docs",
@@ -95,5 +97,19 @@
     "prepack": "npm run build",
     "prepare": "node ./scripts/prepare.mjs"
   },
-  "packageManager": "npm@11.14.0"
+  "packageManager": "npm@11.14.0",
+  "peerDependenciesMeta": {
+    "@earendil-works/pi-ai": {
+      "optional": true
+    },
+    "@earendil-works/pi-coding-agent": {
+      "optional": true
+    },
+    "@earendil-works/pi-tui": {
+      "optional": true
+    },
+    "typebox": {
+      "optional": true
+    }
+  }
 }

package/scripts/agent-browser-capability-baseline.mjs CHANGED Viewed

@@ -14,8 +14,8 @@ export const COMMAND_REFERENCE_BASELINE_BLOCK_IDS = Object.freeze(["upstream-bas
 const sourceEvidence = Object.freeze({
   repository: "vercel-labs/agent-browser",
-  upstreamHead: "6323df571ffd17d14e60ec19fcb56cc1caf498ab",
-  upstreamPackageVersion: "0.28.0",
+  upstreamHead: "4572acf0d71c0086009206c9c1e2136fc54ec9e5",
+  upstreamPackageVersion: "0.29.1",
   inspectedSources: Object.freeze([
     "agent-browser --version",
     "agent-browser --help",
@@ -27,6 +27,10 @@ const sourceEvidence = Object.freeze({
     "agent-browser.schema.json",
     "cli/src/commands.rs",
     "cli/src/flags.rs",
+    "packages/@agent-browser/sandbox/README.md",
+    "packages/@agent-browser/sandbox/src/shared.ts",
+    "packages/@agent-browser/sandbox/src/vercel.ts",
+    "packages/@agent-browser/sandbox/src/eve.ts",
   ]),
 });
@@ -46,6 +50,7 @@ const helpCommands = Object.freeze([
   helpCommand("skills help", ["skills", "--help"]),
   helpCommand("skills list", ["skills", "list"]),
   helpCommand("core skill full", ["skills", "get", "core", "--full"]),
+  helpCommand("vercel sandbox skill full", ["skills", "get", "vercel-sandbox", "--full"]),
   helpCommand("open help", ["open", "--help"]),
   helpCommand("click help", ["click", "--help"]),
   helpCommand("key help", ["key", "--help"]),
@@ -113,6 +118,8 @@ const inventorySections = Object.freeze([
       "skills get dogfood",
       "skills get vercel-sandbox",
       "skills get agentcore",
+      "@agent-browser/sandbox",
+      "installSystemDependencies: false",
       "skills path [name]",
       "AGENT_BROWSER_SKILLS_DIR",
     ],
@@ -127,6 +134,8 @@ const inventorySections = Object.freeze([
       ["skills list", "dogfood"],
       ["skills list", "vercel-sandbox"],
       ["skills list", "agentcore"],
+      ["vercel sandbox skill full", "@agent-browser/sandbox"],
+      ["vercel sandbox skill full", "installSystemDependencies: false"],
       ["core skill full", "agent-browser frame @e3"],
       ["core skill full", "agent-browser dialog accept"],
       ["core skill full", "agent-browser state save ./auth.json"],
@@ -459,6 +468,7 @@ const inventorySections = Object.freeze([
       "device list",
       "install",
       "install --with-deps",
+      "install --with-deps exits nonzero",
       "upgrade",
       "doctor [--fix]",
       "doctor --offline --quick",
@@ -481,6 +491,7 @@ const inventorySections = Object.freeze([
       root("dashboard start --port <n>"),
       ["device help", "device list"],
       root("install --with-deps"),
+      ["install help", "fails if deps fail"],
       root("upgrade"),
       root("doctor [--fix]"),
       root("profiles"),
@@ -731,7 +742,7 @@ const inventorySections = Object.freeze([
 ]);
 export const CAPABILITY_BASELINE = Object.freeze({
-  targetVersion: "0.28.0",
+  targetVersion: "0.29.1",
   sourceEvidence,
   helpCommands,
   inventorySections,

package/scripts/platform-smoke.mjs CHANGED Viewed

@@ -62,7 +62,7 @@ Environment:
   PLATFORM_SMOKE_MAC_USER             macOS SSH user; default $USER
   PLATFORM_SMOKE_MAC_WORK_ROOT        macOS Crabbox work root
   PLATFORM_SMOKE_MAC_PORT             macOS SSH port; default 22
-  PLATFORM_SMOKE_UBUNTU_IMAGE         Ubuntu local-container image; default pi-agent-browser-native-platform:node24-agent-browser0.27.3
+  PLATFORM_SMOKE_UBUNTU_IMAGE         Ubuntu local-container image; default ${config?.ubuntuContainerImage ?? "pi-agent-browser-native-platform:node24-agent-browser<target>"}
   PLATFORM_SMOKE_WINDOWS_VM           Parallels Windows template VM
   PLATFORM_SMOKE_WINDOWS_SNAPSHOT     Parallels snapshot name
   PLATFORM_SMOKE_WINDOWS_USER         Windows SSH user