pi-agent-browser-native 0.2.54 → 0.2.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,35 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## 0.2.56 - 2026-06-21
6
+
7
+ ### Fixed
8
+
9
+ - Corrected the local Pi development baseline to `@earendil-works/*` `0.79.9`, matching the installed `pi 0.79.9` runtime used for release validation.
10
+
11
+ ### Validation
12
+
13
+ - Re-ran `npm install` and `npm audit --json`; dependency install completed and audit reported zero vulnerabilities.
14
+
15
+ ## 0.2.55 - 2026-06-21
16
+
17
+ ### Changed
18
+
19
+ - Rebaselined upstream capability metadata, command reference, support docs, playbook guidance, platform smoke image tag, and real-upstream output-shape metadata for `agent-browser` `0.29.1` / vercel-labs/agent-browser@4572acf0d71c0086009206c9c1e2136fc54ec9e5.
20
+ - Documented the new upstream `@agent-browser/sandbox` package guidance, `installSystemDependencies: false`, and stricter `install --with-deps` nonzero behavior while keeping sandbox support outside this thin Pi wrapper.
21
+ - Updated local Pi development dependencies to `@earendil-works/*` `0.79.8`, kept Pi core package peers host-provided, and marked those peers optional to avoid install-time peer noise for package consumers.
22
+
23
+ ### Fixed
24
+
25
+ - Kept optional recording paths from being misclassified as required screenshots when release-smoke prompts are collapsed into one line for tmux automation.
26
+ - Added npm overrides for vulnerable transitive dev dependencies so `npm audit` reports zero vulnerabilities without adding runtime dependencies.
27
+
28
+ ### Validation
29
+
30
+ - Ran `npm run verify -- release` against `agent-browser` `0.29.1`; after rebuilding the Ubuntu image and refreshing the Windows `crabbox-ready` snapshot, the gate passed default verification, command-reference checks, build, lifecycle verification, packaged Pi smoke, and macOS/Ubuntu/Windows-native platform smoke.
31
+ - Ran `npm run verify -- real-upstream`, `npm run verify -- dogfood`, `npm run verify -- benchmark`, `npm run verify -- startup-profile --samples 3`, `npm run docs`, `npm run doctor`, `npm audit --json`, `npm run check:platform-smoke`, `npm run smoke:platform:ubuntu-image`, `npm run smoke:platform:doctor`, focused prompt-guard tests, and `git diff --check`.
32
+ - Ran tmux-driven Pi checkout dogfood with `pi --approve --no-extensions --no-skills -e .`, covering the public Sauce Demo checkout-overview flow with screenshot/recording evidence and no order placement; then verified the collapsed one-line screenshot-plus-recording close guard on `https://example.com` after rebuilding `dist/`.
33
+
5
34
  ## 0.2.54 - 2026-06-19
6
35
 
7
36
  ### Fixed
@@ -24,11 +24,12 @@ import { join, resolve } from "node:path";
24
24
  /** @typedef {{ cwd?: string; env?: NodeJS.ProcessEnv; includeProjectConfig?: boolean }} AgentBrowserConfigLoadOptions */
25
25
  /** @typedef {{ browserDefaultProfile?: Required<BrowserDefaultProfileConfig>; browserDefaultProfileScope?: ConfigLayerScope; browserExecutablePath?: string; browserExecutablePathScope?: ConfigLayerScope; trustedBrowserDefaultProfile?: Required<BrowserDefaultProfileConfig>; trustedBrowserDefaultProfileScope?: ConfigLayerScope; trustedBrowserExecutablePath?: string; trustedBrowserExecutablePathScope?: ConfigLayerScope; config: AgentBrowserConfig; webSearchCredentialSources: Partial<Record<WebSearchProvider, CredentialSource>>; webSearchEnabled: boolean; webSearchPreferredProvider: WebSearchProvider; errors: string[]; layers: ConfigLayer[]; paths: AgentBrowserConfigPaths; projectConfigIncluded: boolean; warnings: string[] }} AgentBrowserConfigState */
26
26
  /** @typedef {{ scope: string; path: string; exists: boolean }} ConfigFileSummary */
27
+ const CONFIG_DIR_NAME = ".pi";
27
28
  export const AGENT_BROWSER_CONFIG_ENV = "PI_AGENT_BROWSER_CONFIG";
28
29
  export const BRAVE_API_KEY_ENV = "BRAVE_API_KEY";
29
30
  export const EXA_API_KEY_ENV = "EXA_API_KEY";
30
- export const CONFIG_RELATIVE_PATH = /** @type {const} */ ([".pi", "config", "pi-agent-browser-native", "config.json"]);
31
- export const GLOBAL_CONFIG_RELATIVE_PATH = /** @type {const} */ ([".pi", "config", "pi-agent-browser-native", "config.json"]);
31
+ export const CONFIG_RELATIVE_PATH = /** @type {const} */ ([CONFIG_DIR_NAME, "config", "pi-agent-browser-native", "config.json"]);
32
+ export const GLOBAL_CONFIG_RELATIVE_PATH = /** @type {const} */ ([CONFIG_DIR_NAME, "config", "pi-agent-browser-native", "config.json"]);
32
33
  export const SECRET_COMMAND_TIMEOUT_MS = 15_000;
33
34
  /** @type {Readonly<Record<WebSearchProvider, WebSearchProviderDescriptor>>} */
34
35
  export const WEB_SEARCH_PROVIDER_DESCRIPTORS = Object.freeze({
@@ -43,9 +43,9 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
43
43
  "For batch chains that touch cookies, storage, auth, or other secret-bearing commands, use details.batchSteps for per-step artifacts, categories, spill paths, and full structured errors; top-level details.data on batch is only a compact redacted step matrix (success, argv-redacted command, redacted result or scrubbed error text) built from the same presentation rules as standalone calls.",
44
44
  "For non-core families, pass current upstream commands through the native tool directly: network route/requests/har (including request filters like --type/--method/--status), diff snapshot/screenshot/url with scoped/baseline options, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, device list for iOS simulator inventory, and chat. For compact network requests output, prefer details.nextActions for request detail, route-mock diagnostics, actionable failed-request networkSourceLookup, filtering, clearing the aggregate buffer before repro, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done; stream enable already-enabled outcomes are treated as idempotent success with status/disable follow-ups.",
45
45
  "For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: \"tabs\" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close commands (`close`, `quit`, or `exit`) only close the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.",
46
- "For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
46
+ "For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Hosted sandbox workflows should use upstream @agent-browser/sandbox helpers outside this wrapper. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
47
47
  "For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; dialog commands and eval snippets that look like alert/confirm/prompt/dialog triggers are shorter-bounded than normal browser calls, and timed-out dialog-like interactions may add inspect-dialog-after-timeout, dismiss-dialog-after-timeout, or recover-fresh-session-after-dialog-timeout nextActions. When --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.",
48
- "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.27.3, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort: use explicit --timeout or top-level timeoutMs for legitimately slow waits, and treat a successful payload like \"waited\":\"timeout\" as elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
48
+ "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.29.1, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort: use explicit --timeout or top-level timeoutMs for legitimately slow waits, and treat a successful payload like \"waited\":\"timeout\" as elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
49
49
  "For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
50
50
  "For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
51
51
  "For downloads, prefer download <selector> <path> when an element click should save a file; simple loopback anchor downloads are saved to the requested path when the wrapper can resolve an HTTP(S) href. Do not rely on click alone when you need the downloaded file on disk.",
@@ -18,23 +18,31 @@ const LEGACY_BASH_ALLOW_PATTERNS = [
18
18
  /\bdebug(?:ging)?\b.*\b(?:agent[_ -]?browser|agent_browser|browser integration)\b/i,
19
19
  ];
20
20
  const PROMPT_ARTIFACT_PATH_PATTERN = /(?:^|[\s"'`(:])((?:\/[^\s"'`),;]+|[A-Za-z]:[\\/][^\s"'`),;]+|\.{1,2}[\\/][^\s"'`),;]+|[^\s"'`),;:\\/]+(?:[\\/][^\s"'`),;]+)+|[^\s"'`),;:\\/]+)\.(?:png|jpe?g|webp|gif|webm|mp4|har|pdf|trace|json))(?:[\s"'`),;.]|$)/gi;
21
+ function inferPromptArtifactKind(line, path) {
22
+ const lowerPath = path.toLowerCase();
23
+ if (/\.(?:webm|mp4)$/.test(lowerPath))
24
+ return "recording";
25
+ if (/\.(?:png|jpe?g|webp|gif)$/.test(lowerPath))
26
+ return "screenshot";
27
+ const lowerLine = line.toLowerCase();
28
+ if (lowerLine.includes("screenshot"))
29
+ return "screenshot";
30
+ if (/\b(?:screen\s+recording|recording|webm|video)\b/.test(lowerLine))
31
+ return "recording";
32
+ return undefined;
33
+ }
21
34
  function extractPromptRequestedArtifacts(prompt) {
22
35
  const artifacts = [];
23
36
  const seen = new Set();
24
37
  for (const line of prompt.split(/\r?\n/)) {
25
- const lowerLine = line.toLowerCase();
26
- const kind = lowerLine.includes("screenshot")
27
- ? "screenshot"
28
- : /\b(?:screen\s+recording|recording|webm|video)\b/.test(lowerLine)
29
- ? "recording"
30
- : undefined;
31
- if (!kind)
32
- continue;
33
38
  PROMPT_ARTIFACT_PATH_PATTERN.lastIndex = 0;
34
39
  for (const match of line.matchAll(PROMPT_ARTIFACT_PATH_PATTERN)) {
35
40
  const path = match[1]?.trim();
36
41
  if (!path)
37
42
  continue;
43
+ const kind = inferPromptArtifactKind(line, path);
44
+ if (!kind)
45
+ continue;
38
46
  const key = `${kind}:${path}`;
39
47
  if (seen.has(key))
40
48
  continue;
@@ -177,7 +177,7 @@ That failure should include a structured recovery hint pointing to `sessionMode:
177
177
  Implementation detail lives in `extensions/agent-browser/lib/launch-scoped-flags.ts` (canonical flag metadata shared with playbook/docs assertions), `extensions/agent-browser/lib/argv-descriptor.ts` and `extensions/agent-browser/lib/argv-grammar.ts` (command discovery, `VALUE_FLAGS`, `parseArgvDescriptor`) plus `extensions/agent-browser/lib/runtime.ts` (`getStartupScopedFlags`, `buildExecutionPlan`):
178
178
 
179
179
  - **Command discovery:** Leading argv is scanned with a value-taking allowlist so known global flags and documented command flags consume their values before the upstream command word is identified. Missing-value prevalidation is intentionally limited to upstream global value flags; command-scoped flags and literal text are left to upstream parsing so values like `fill #field --password` are not rejected by wrapper heuristics before the CLI sees them. When upstream adds new global flags that take values ahead of the command, extend both the command-discovery and prevalidation allowlists; when it adds command-specific flags, extend only command discovery/redaction as needed. A smaller set of global boolean flags may be followed by an optional `true`/`false` literal; when present, that literal is consumed as the flag value before command discovery continues.
180
- - **`--state` disambiguation:** Persisted browser `--state` before the command participates in launch-scoped validation and tab-correction hints. The same flag spelling after a `wait` command is excluded from startup-scoped detection so upstream help examples such as `wait @ref --state hidden` do not spuriously require `sessionMode: "fresh"` while an implicit session is active. As of upstream `agent-browser 0.27.3`, the parser still does not implement those `wait --state` examples as distinct wait modes, so agent-facing docs recommend `wait --fn` predicates for disappearance checks instead.
180
+ - **`--state` disambiguation:** Persisted browser `--state` before the command participates in launch-scoped validation and tab-correction hints. The same flag spelling after a `wait` command is excluded from startup-scoped detection so upstream help examples such as `wait @ref --state hidden` do not spuriously require `sessionMode: "fresh"` while an implicit session is active. As of upstream `agent-browser 0.29.1`, the parser still does not implement those `wait --state` examples as distinct wait modes, so agent-facing docs recommend `wait --fn` predicates for disappearance checks instead.
181
181
  - **`--auto-connect`:** Treated as launch-scoped only when enabled (`--auto-connect` bare or `true`). `--auto-connect false` is ignored for startup-scoped blocking so disabled attach hints do not force a fresh launch.
182
182
 
183
183
  **Sessionless inspection and local commands:** Plain-text global help and version probes (`--help`, `-h`, `--version`, `-V`) must never allocate or bind the extension-managed session. The same session-ownership rule applies to read-only upstream `skills list`, `skills get …`, and `skills path …`, local auth profile management (`auth save/list/show/delete/remove`), plus local/setup surfaces such as `profiles`, `dashboard start/stop`, `device list`, `doctor`, `install`, `upgrade`, `session list`, and targeted/all local saved-state maintenance (`state list/show`, `state clear --all`, `state clear -a`, `state clear <session-name>`, `state clean --older-than <days>`, `state rename`). Non-plain-text sessionless commands still run with `--json` for machine-readable output, but the planner does not prepend the implicit managed `--session`, so an agent can inspect local capabilities or start/stop the standalone dashboard without consuming the implicit session slot before a real `open`. Browser-backed, context-dependent, or incomplete commands such as root `session`, untargeted `state clear`, bare `state clean`, `auth login`, `state save`, and `state load` keep normal managed-session injection. Command-shape allowlisting lives in `extensions/agent-browser/lib/command-policy.ts` (`needsManagedSession`), while `extensions/agent-browser/lib/runtime.ts` (`isPlainTextInspectionArgs`, `buildExecutionPlan`) applies that decision to execution planning.
@@ -18,23 +18,22 @@ This project intentionally blocks normal `agent-browser` bash usage in most agen
18
18
 
19
19
  <!-- agent-browser-capability-baseline:start upstream-baseline -->
20
20
  <!-- Generated from scripts/agent-browser-capability-baseline.mjs. Run `npm run docs -- command-reference write` to update. Do not edit manually. -->
21
- This reference is baselined to the locally installed `agent-browser 0.28.0` command/help surface, audited against vercel-labs/agent-browser@6323df571ffd17d14e60ec19fcb56cc1caf498ab. Upstream `agent-browser` remains the source of truth for command semantics; this file is the local fallback for Pi agent sessions where direct binary help is blocked or discouraged.
21
+ This reference is baselined to the locally installed `agent-browser 0.29.1` command/help surface, audited against vercel-labs/agent-browser@4572acf0d71c0086009206c9c1e2136fc54ec9e5. Upstream `agent-browser` remains the source of truth for command semantics; this file is the local fallback for Pi agent sessions where direct binary help is blocked or discouraged.
22
22
 
23
23
  The lightweight drift check is `npm run verify -- command-reference`. Run it whenever the installed upstream `agent-browser` version changes or this reference is edited.
24
24
 
25
25
  Use `npm run benchmark:agent-browser` or `npm run verify -- benchmark` before and after agent-facing workflow abstractions to measure task success, tool calls, model-visible output size, stale-ref behavior, artifact success, failure-category coverage, and elapsed-time estimates.
26
26
  <!-- agent-browser-capability-baseline:end upstream-baseline -->
27
27
 
28
- ### Upstream 0.27.3 install-only rebaseline
28
+ ### Upstream 0.29.1 rebaseline
29
29
 
30
- The 0.27.3 rebaseline is an install-only compatibility update: upstream changed Windows ARM64 installation fallback behavior and did not change the CLI/help surface or browser-command semantics. This wrapper adds no compatibility shim for older upstream releases. The wrapper must still not hide these prior upstream fixes:
30
+ The 0.29.1 rebaseline adds no new core browser CLI commands. It captures upstream's new hosted-sandbox helper package and install behavior:
31
31
 
32
- - click reliability: upstream now scrolls off-viewport elements before coordinate resolution, handles JavaScript dialogs promptly, recovers mouse state after dialog-opening clicks, and reports overlay interception before dispatching input
33
- - frame-scoped CSS selectors and waits, including cross-process iframe click-coordinate translation
34
- - wait timeout handling: documented 25s default, honored `--timeout` across wait variants, and appropriate client read budgets for long waits; the native wrapper forwards explicit long waits and derives a subprocess watchdog when top-level `timeoutMs` is omitted
35
- - form commands: `find label` matches `aria-label` / `aria-labelledby`, `select` errors when no option matches, and `type` parses `--clear` / `--delay` instead of typing them as literal text
36
- - warm CLI command latency and batch daemon respawn/retry improvements
37
- - GNU Linux release artifacts pinned to glibc 2.28
32
+ - `@agent-browser/sandbox` is the upstream helper package for Eve and Vercel Sandbox workflows. It is not bundled by this pi extension; load `skills get vercel-sandbox --full` when a task needs that hosted-sandbox guidance.
33
+ - Fresh Eve and Vercel Sandbox helpers install Chromium system dependencies by default; pass `installSystemDependencies: false` only when the sandbox image already has those libraries.
34
+ - `install --with-deps` now exits nonzero when the package manager cannot install required browser libraries (`install --with-deps exits nonzero`).
35
+
36
+ Runtime probes on 2026-06-21 confirm two old caveats still stand in `agent-browser 0.29.1`: `find ... uncheck` and `wait <selector> --state hidden|detached` remain advertised by help but fail at runtime, and `wait --url` glob behavior remains narrow. Keep the wrapper's direct `uncheck` passthrough, `wait --fn` disappearance guidance, and `job.assertUrl` glob workaround.
38
37
 
39
38
  ### Upstream 0.28.0 rebaseline
40
39
 
@@ -47,6 +46,17 @@ The 0.28.0 rebaseline tracks new local/infra upstream surfaces and does not chan
47
46
 
48
47
  The wrapper adds no compatibility shim for older upstream releases.
49
48
 
49
+ ### Upstream 0.27.3 install-only rebaseline
50
+
51
+ The 0.27.3 rebaseline is an install-only compatibility update: upstream changed Windows ARM64 installation fallback behavior and did not change the CLI/help surface or browser-command semantics. This wrapper adds no compatibility shim for older upstream releases. The wrapper must still not hide these prior upstream fixes:
52
+
53
+ - click reliability: upstream now scrolls off-viewport elements before coordinate resolution, handles JavaScript dialogs promptly, recovers mouse state after dialog-opening clicks, and reports overlay interception before dispatching input
54
+ - frame-scoped CSS selectors and waits, including cross-process iframe click-coordinate translation
55
+ - wait timeout handling: documented 25s default, honored `--timeout` across wait variants, and appropriate client read budgets for long waits; the native wrapper forwards explicit long waits and derives a subprocess watchdog when top-level `timeoutMs` is omitted
56
+ - form commands: `find label` matches `aria-label` / `aria-labelledby`, `select` errors when no option matches, and `type` parses `--clear` / `--delay` instead of typing them as literal text
57
+ - warm CLI command latency and batch daemon respawn/retry improvements
58
+ - GNU Linux release artifacts pinned to glibc 2.28
59
+
50
60
  ## Core mental model
51
61
 
52
62
  Input mode chooser (one per call): **`args`** for the default open → snapshot -i → click/fill `@refs` flow; **`semanticAction`** for stable role/text/label targets; **`job`** / **`qa`** for multi-step checks; **`electron`** for desktop apps only; **`sourceLookup`** / **`networkSourceLookup`** are **experimental candidates-only** helpers (not authoritative mappings). Do not pass `--json` in `args`—the wrapper injects it. Match link and button text to the latest snapshot (on `https://example.com/` the main link is `Learn more`, not legacy `More information...` copy). See [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#input-mode-chooser) for snapshot variants (`-i` vs `--compact` vs full) and batching three or more getters.
@@ -650,7 +660,7 @@ For dense pages, the wrapper also accepts `snapshot -i --search <text>` and `sna
650
660
  | `wait --download [path]` | Wait for a download started by a previous action and optionally save it to `path`; successful wrapper results include upstream-reported `savedFilePath`/`savedFile`, while `details.artifacts[].exists` is the wrapper's on-disk verification signal. |
651
661
  | `wait --download [path] --timeout <ms>` | Set download-start timeout in milliseconds. The native Pi wrapper forwards explicit wait timeouts and extends the subprocess watchdog unless the caller supplies top-level `timeoutMs`. |
652
662
 
653
- Current upstream source still does not parse `wait <selector> --state hidden` / `wait <selector> --state detached` as distinct wait modes even though upstream help mentions those examples. Use `wait --fn "!document.querySelector('#spinner')"` or another explicit JavaScript predicate for disappearance/detach checks until upstream parser support exists.
663
+ Current upstream 0.29.1 source still does not parse `wait <selector> --state hidden` / `wait <selector> --state detached` as distinct wait modes even though upstream help mentions those examples. Use `wait --fn "!document.querySelector('#spinner')"` or another explicit JavaScript predicate for disappearance/detach checks until upstream parser support exists.
654
664
 
655
665
  ### Diff, debug, and streaming
656
666
 
@@ -708,7 +718,7 @@ Long-running or lifecycle commands should be explicitly paired with cleanup call
708
718
  | `dashboard stop` | Stop the dashboard server. |
709
719
  | `device list` | List available iOS simulators. Use with `-p ios` when exercising iOS provider flows. |
710
720
  | `install` | Install browser binaries. |
711
- | `install --with-deps` | Install browser binaries plus Linux system dependencies. |
721
+ | `install --with-deps` | Install browser binaries plus Linux system dependencies; exits nonzero when required libraries cannot be installed. |
712
722
  | `upgrade` | Upgrade `agent-browser` to the latest version. |
713
723
  | `doctor [--fix]` | Diagnose install issues and optionally auto-clean stale files. Use `doctor --offline --quick` for a fast local-only check and `doctor --json` for structured output. |
714
724
  | `plugin add <ref>` | Add a plugin from npm or GitHub (`<owner>/<repo>` or `@scope/<name>`); writes `agent-browser.json`. Flags such as `--name`, `--capability`, `--global`, and `--no-manifest` shape discovery. |
@@ -879,14 +889,14 @@ Other useful environment variables include `AGENT_BROWSER_DEFAULT_TIMEOUT`, `AGE
879
889
  <!-- agent-browser-capability-baseline:start capability-token-baseline -->
880
890
  <!-- Generated from scripts/agent-browser-capability-baseline.mjs. Run `npm run docs -- command-reference write` to update. Do not edit manually. -->
881
891
  <details>
882
- <summary>Generated verifier capability baseline for agent-browser 0.28.0</summary>
892
+ <summary>Generated verifier capability baseline for agent-browser 0.29.1</summary>
883
893
 
884
894
  This generated block is review data for maintainers. The human-authored reference sections above remain the readable command guide.
885
895
 
886
896
  #### Source evidence
887
897
  - repository: `vercel-labs/agent-browser`
888
- - upstream HEAD: `6323df571ffd17d14e60ec19fcb56cc1caf498ab`
889
- - upstream package version: `0.28.0`
898
+ - upstream HEAD: `4572acf0d71c0086009206c9c1e2136fc54ec9e5`
899
+ - upstream package version: `0.29.1`
890
900
  - inspected: `agent-browser --version`
891
901
  - inspected: `agent-browser --help`
892
902
  - inspected: `selected agent-browser <command> --help output`
@@ -897,12 +907,17 @@ This generated block is review data for maintainers. The human-authored referenc
897
907
  - inspected: `agent-browser.schema.json`
898
908
  - inspected: `cli/src/commands.rs`
899
909
  - inspected: `cli/src/flags.rs`
910
+ - inspected: `packages/@agent-browser/sandbox/README.md`
911
+ - inspected: `packages/@agent-browser/sandbox/src/shared.ts`
912
+ - inspected: `packages/@agent-browser/sandbox/src/vercel.ts`
913
+ - inspected: `packages/@agent-browser/sandbox/src/eve.ts`
900
914
 
901
915
  #### Upstream help commands sampled
902
916
  - root help: `agent-browser --help`
903
917
  - skills help: `agent-browser skills --help`
904
918
  - skills list: `agent-browser skills list`
905
919
  - core skill full: `agent-browser skills get core --full`
920
+ - vercel sandbox skill full: `agent-browser skills get vercel-sandbox --full`
906
921
  - open help: `agent-browser open --help`
907
922
  - click help: `agent-browser click --help`
908
923
  - key help: `agent-browser key --help`
@@ -954,11 +969,11 @@ This generated block is review data for maintainers. The human-authored referenc
954
969
  - plugin help: `agent-browser plugin --help`
955
970
 
956
971
  #### Inventory sections
957
- - Built-in skills: 13 human-doc token(s), 13 upstream token(s)
972
+ - Built-in skills: 15 human-doc token(s), 15 upstream token(s)
958
973
  - Core page, element, navigation, and extraction commands: 74 human-doc token(s), 74 upstream token(s)
959
974
  - Sessions, state, tabs, frames, dialogs, and windows: 20 human-doc token(s), 16 upstream token(s)
960
975
  - Network, storage, artifacts, diagnostics, and performance: 43 human-doc token(s), 53 upstream token(s)
961
- - Batch, auth, confirmations, setup, dashboard, devices, and AI commands: 30 human-doc token(s), 34 upstream token(s)
976
+ - Batch, auth, confirmations, setup, dashboard, devices, and AI commands: 31 human-doc token(s), 35 upstream token(s)
962
977
  - Global flags, config, providers, policy, and environment: 121 human-doc token(s), 91 upstream token(s)
963
978
 
964
979
  #### Human-authored doc tokens required
@@ -974,6 +989,8 @@ This generated block is review data for maintainers. The human-authored referenc
974
989
  - `skills get dogfood`
975
990
  - `skills get vercel-sandbox`
976
991
  - `skills get agentcore`
992
+ - `@agent-browser/sandbox`
993
+ - `installSystemDependencies: false`
977
994
  - `skills path [name]`
978
995
  - `AGENT_BROWSER_SKILLS_DIR`
979
996
 
@@ -1140,6 +1157,7 @@ This generated block is review data for maintainers. The human-authored referenc
1140
1157
  - `device list`
1141
1158
  - `install`
1142
1159
  - `install --with-deps`
1160
+ - `install --with-deps exits nonzero`
1143
1161
  - `upgrade`
1144
1162
  - `doctor [--fix]`
1145
1163
  - `doctor --offline --quick`
@@ -1287,6 +1305,8 @@ This generated block is review data for maintainers. The human-authored referenc
1287
1305
  - skills list: `dogfood`
1288
1306
  - skills list: `vercel-sandbox`
1289
1307
  - skills list: `agentcore`
1308
+ - vercel sandbox skill full: `@agent-browser/sandbox`
1309
+ - vercel sandbox skill full: `installSystemDependencies: false`
1290
1310
  - core skill full: `agent-browser frame @e3`
1291
1311
  - core skill full: `agent-browser dialog accept`
1292
1312
  - core skill full: `agent-browser state save ./auth.json`
@@ -1450,6 +1470,7 @@ This generated block is review data for maintainers. The human-authored referenc
1450
1470
  - root help: `dashboard start --port <n>`
1451
1471
  - device help: `device list`
1452
1472
  - root help: `install --with-deps`
1473
+ - install help: `fails if deps fail`
1453
1474
  - root help: `upgrade`
1454
1475
  - root help: `doctor [--fix]`
1455
1476
  - root help: `profiles`
@@ -26,10 +26,10 @@ When upstream ships a new `agent-browser` or the inventory changes:
26
26
 
27
27
  ## Audit result
28
28
 
29
- - Target upstream: `agent-browser 0.28.0` (must match `CAPABILITY_BASELINE.targetVersion` in [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs)).
29
+ - Target upstream: `agent-browser 0.29.1` (must match `CAPABILITY_BASELINE.targetVersion` in [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs)).
30
30
  - Source of truth: `CAPABILITY_BASELINE.inventorySections` in the same file (stable `id` keys: `skills`, `core-commands`, `state-tabs-frames-dialogs`, `network-storage-artifacts-diagnostics`, `batch-auth-setup-ai`, `options-and-env`).
31
- - Status: supported for the current wrapper contract after the 2026-05-26 all-command audit.
32
- - High-priority support gaps: 2026-05-26 audit found sessionless local commands and command-scoped value flags needed sharper wrapper handling; runtime/tests/docs now cover those paths. The 0.28.0 rebaseline adds local `mcp` and `plugin` surfaces plus plugin-backed credential login; wrapper docs/tests mark `mcp` and known `plugin` commands sessionless, with no compatibility shim for older upstream releases. The prior rebaseline preserves thin support for upstream click reliability, frame-scoped selectors/waits, form-command fixes, daemon retry improvements, and glibc-pinned release artifacts; wrapper wait planning forwards explicit long `wait <ms>` / `wait --timeout <ms>` calls instead of rejecting them before spawn. Remaining upstream-owned caveat: current help mentions `wait <selector> --state hidden`, but source parsing does not implement that distinct wait mode, so wrapper docs steer agents to `wait --fn` predicates.
31
+ - Status: command-surface supported for the current wrapper contract after the 2026-06-21 0.29.1 audit; release-gate evidence below was refreshed against `agent-browser 0.29.1`.
32
+ - High-priority support gaps: 2026-05-26 audit found sessionless local commands and command-scoped value flags needed sharper wrapper handling; runtime/tests/docs now cover those paths. The 0.28.0 rebaseline added local `mcp` and `plugin` surfaces plus plugin-backed credential login; wrapper docs/tests mark `mcp` and known `plugin` commands sessionless, with no compatibility shim for older upstream releases. The 0.29.1 rebaseline adds upstream `@agent-browser/sandbox` helper-package guidance and stricter `install --with-deps` failure semantics; no new wrapper runtime mode or bundled dependency is required. Prior upstream fixes for click reliability, frame-scoped selectors/waits, form-command fixes, daemon retry improvements, and glibc-pinned release artifacts remain thin passthrough. Remaining upstream-owned caveat: current help still mentions `wait <selector> --state hidden` / `detached` and `find ... uncheck`, but runtime probes show those advertised shapes still fail, so wrapper docs keep `wait --fn` predicates and direct `uncheck` passthrough guidance.
33
33
  - Post-`v0.2.29` review state: commits `eb55320` through `86abbfb` add browser guidance/smoke coverage plus `RQ-0086` click-probe reduction, `RQ-0087` same-snapshot form fill batching, `RQ-0088` current-ref fallback on locator misses, `RQ-0089` direct-upstream click mutation investigation, and `RQ-0090` stop-boundary/artifact-path guidance. Verification gates below were rerun on 2026-05-18 after those tasks landed. Constrained `job` (`RQ-0064`), the lightweight `qa` preset (`RQ-0065`), the experimental `sourceLookup` helper (`RQ-0066`), the experimental `networkSourceLookup` helper (`RQ-0067`), optional Exa/Brave-backed `agent_browser_web_search` with Pi-scoped package config (`RQ-0121`), and agent recovery for search/profile configuration failures (`RQ-0122`) are implemented; see [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#job), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#qa), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sourcelookup), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#networksourcelookup), and [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#optional-companion-web-search). Reusable browser recipes (`RQ-0068`) are intentionally not adopted as a runtime surface; see [`ARCHITECTURE.md`](ARCHITECTURE.md#no-reusable-recipe-layer-yet).
34
34
 
35
35
  ## Open UX/reliability follow-ups from 2026-05-29 agent feedback
@@ -44,24 +44,25 @@ Current summary:
44
44
  | RQ-0123–RQ-0127 | Stress-report wrapper fixes shipped; prompt-derived business-action blocking remains intentionally out of scope. | [`docs/support-notes.md`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/docs/support-notes.md) |
45
45
  | RQ-0101 | Upstream `agent-browser 0.27.2` rebaseline shipped. | [`docs/support-notes.md`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/docs/support-notes.md) |
46
46
  | RQ-0128 | Upstream `agent-browser 0.27.3` install-only rebaseline shipped; no new wrapper capability adopted. | [`docs/support-notes.md`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/docs/support-notes.md) |
47
+ | RQ-0129 | Upstream `agent-browser 0.29.1` rebaseline shipped; sandbox helpers are documented upstream package guidance, not a wrapper runtime. | [`docs/support-notes.md`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/docs/support-notes.md) |
47
48
 
48
49
  ## Verification evidence
49
50
 
50
- Re-run the gates below before each release; this table records what the closure audit exercised. Rows marked **Current for 0.28.0** were rerun after the `agent-browser 0.28.0` rebaseline. Rows marked **Historical / pending refresh** are useful prior evidence but must not be treated as current release proof until rerun under the named condition.
51
+ Re-run the gates below before each release; this table records what the closure audit exercised. Rows marked current were rerun under `agent-browser 0.29.1` on 2026-06-21.
51
52
 
52
53
  | Gate | Evidence | Status |
53
54
  | --- | --- | --- |
54
- | Default local gate | `npm run verify` checks generated playbook drift, clean-builds generated `dist/`, runs `tsc --noEmit`, unit/fake tests, generated command-reference blocks, and live command-reference sampling. | **Current for 0.28.0:** pass on 2026-06-18 as part of `npm run verify -- release` (clean build, TypeScript, 571 passed, 1 skipped, generated docs check, and live command-reference sampling passed with `agent-browser 0.28.0` on `PATH`). |
55
+ | Default local gate | `npm run verify` checks generated playbook drift, clean-builds generated `dist/`, runs `tsc --noEmit`, unit/fake tests, generated command-reference blocks, and live command-reference sampling. | **Current for 0.29.1:** pass on 2026-06-21 inside `npm run verify -- release`; command-reference verification also passed standalone after the rebaseline. |
55
56
  | Pre-PR local gate | `npm run verify -- pre-pr` composes the default gate with package-content verification. Use before larger local handoffs or PR-ready claims when lifecycle/platform/live dogfood cost is not warranted. | Added 2026-06-10; orchestration is locked by `test/project-verify.test.ts` and does not change release mode. |
56
- | Real upstream contract | `npm run verify -- real-upstream` runs the localhost fixture matrix against the real installed `agent-browser` matching the baseline. | **Current for 0.28.0:** pass on 2026-06-18 (`npm run verify -- real-upstream`; localhost fixture matrix passed against installed `agent-browser 0.28.0`). |
57
- | Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads the packaged `agent_browser` tool without requiring optional Brave config, and executes fake-upstream `--version`. | **Current for 0.28.0:** pass on 2026-06-18 as part of `npm run verify -- release` (`verify-package.mjs --smoke-pi`; packed 118 files, packaged `agent_browser --version` invocation passed). |
58
- | Startup profile | `npm run verify -- startup-profile --samples <n>` clean-builds generated `dist/`, records direct package entrypoint import/factory timing in fresh Node processes, and writes `.artifacts/startup-profile/latest.json`. It must not launch Pi, tmux, mise, npm, browsers, or `agent-browser`; full Pi TUI ready-prompt profiling is intentionally excluded after it proved too invasive for routine verification. Run this opt-in evidence when package layout, the compiled entrypoint, top-level imports, schema registration, or prompt/config startup logic changes. | **Current for compiled entrypoint:** pass on 2026-06-11 with direct compiled entrypoint import+factory median 47.136 ms in earlier samples, below the 250 ms direct-import guard and below the prior ~96 ms TypeScript-entrypoint baseline. Full-Pi startup numbers from the unsafe tmux profiler are not accepted as ongoing release evidence. |
59
- | Deterministic dogfood smoke | `npm run verify -- dogfood` (`scripts/verify-agent-browser-dogfood.ts`) drives the native wrapper against a local file fixture through top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close with the real `agent-browser` on `PATH`. | **Current for 0.28.0:** pass on 2026-06-18 (`npm run verify -- dogfood`; `qa-url`, fresh/current opens, semantic click, job screenshot artifact verification, and close all passed). |
60
- | Efficiency benchmark | `npm run verify -- benchmark` runs deterministic browser workflow accounting plus focused benchmark tests, including JSONL sampling fixtures and job/qa/sourceLookup/networkSourceLookup/Electron scenario coverage. | **Historical / pending refresh:** pass on 2026-05-29 (`npm run verify -- benchmark`). This deterministic gate is not upstream-version-specific, but rerun before claiming current benchmark evidence after benchmark or workflow-scenario edits. |
61
- | Crabbox platform smoke | `npm run check:platform-smoke` syntax-checks the harness and cheap invariants. `npm run smoke:platform:ubuntu-image` builds the project-owned Linux image, `npm run smoke:platform:doctor` checks Crabbox 0.26.0+ and local target readiness, and `npm run smoke:platform:all` runs doctor first, then fast target-local `platform-build` (`npm run verify -- platform-target`, pack, clean Pi install) plus `browser-dogfood-smoke` on Crabbox `macos`, `ubuntu`, and `windows-native`; see [`platform-smoke.md`](platform-smoke.md). Target artifacts include Crabbox/provider/work-root metadata, and release review also checks provider-specific `crabbox list` commands for leftover leases/clones. | **Current for 0.28.0:** pass on 2026-06-18 inside `npm run verify -- release`; rebuilt Ubuntu image `pi-agent-browser-native-platform:node24-agent-browser0.28.0`, refreshed the Windows `crabbox-ready` template snapshot to `agent-browser 0.28.0`, doctor passed, then Crabbox platform smoke passed for macOS, Ubuntu, and native Windows. |
62
- | `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with the configured-source lifecycle harness, packaged Pi smoke, and the release-blocking Crabbox platform matrix (`verifySteps` `release` in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits standalone real-upstream, host-only dogfood, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). | **Current for 0.28.0:** pass on 2026-06-18 (`npm run verify -- release`), including default unit/fake gate, generated docs checks, live command-reference sampling, lifecycle harness, packaged Pi smoke, and macOS/Ubuntu/native-Windows Crabbox platform smoke. |
63
- | Configured-source lifecycle | `npm run verify -- lifecycle` (`scripts/verify-lifecycle.mjs`) drives `/reload`, closes and relaunches Pi with the same exact `--session-id`, checks the JSONL session header id, session continuity, slash-command sentinel tokens (`v1` before reload and `v2` after full relaunch because compiled JS package modules are process-cached), persisted spill reachability, and real Pi `tool_result` failure-patch semantics for a QA reclassification with a fake upstream on `PATH`. Default Pi model is `zai/glm-5.2`; default per-step wait is **180000 ms** (`DEFAULT_TIMEOUT_MS`); override model with `--model <id>` and waits with `--timeout-ms <ms>`. Passthrough flags in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs): `--keep-artifacts`, `--model`, `--verbose`, and `--timeout-ms` plus a value (for example `npm run verify -- lifecycle --model openai-codex/gpt-5.5:minimal --keep-artifacts --verbose --timeout-ms 600000`). | **Current for 0.28.0:** pass on 2026-06-18 as part of `npm run verify -- release`; managed browser session continuity and persisted full output verified before cleanup. |
64
- | Quick isolated Pi smoke | `pi --approve --no-extensions --no-skills -e . --tools agent_browser` from trusted repo root; native `agent_browser` only. | **Historical / pending refresh:** pass on 2026-06-11 via tmux with `pi --approve --no-extensions --no-skills -e .`; native `agent_browser` only. Covered `qa` with `sessionMode: "fresh"` against `https://example.com`, `open` and compact `snapshot -i` on `https://react.dev`, `semanticAction` link click to `https://react.dev/learn`, screenshot artifact verification at `/tmp/piab-release-smoke-react.png`, and `close`; explicit screenshot and temporary session artifacts were removed after evidence capture. Broader historical coverage also includes version/help/skills, eval stdin, batch stdin, explicit session, network requests, console/errors, diff snapshot, stream status/disable, dashboard start/stop, and chat credential-failure pass-through during RQ-0055. Not rerun for 0.28.0 unless noted in release evidence. |
57
+ | Real upstream contract | `npm run verify -- real-upstream` runs the localhost fixture matrix against the real installed `agent-browser` matching the baseline. | **Current for 0.29.1:** pass on 2026-06-21 (`npm run verify -- real-upstream`; localhost fixture matrix and plugin list probe passed against installed `agent-browser 0.29.1`). |
58
+ | Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads the packaged `agent_browser` tool without requiring optional Brave config, and executes fake-upstream `--version`. | **Current for 0.29.1:** pass on 2026-06-21 as part of `npm run verify -- release` (`verify-package.mjs --smoke-pi`; packaged `agent_browser --version` invocation passed). |
59
+ | Startup profile | `npm run verify -- startup-profile --samples <n>` clean-builds generated `dist/`, records direct package entrypoint import/factory timing in fresh Node processes, and writes `.artifacts/startup-profile/latest.json`. It must not launch Pi, tmux, mise, npm, browsers, or `agent-browser`; full Pi TUI ready-prompt profiling is intentionally excluded after it proved too invasive for routine verification. Run this opt-in evidence when package layout, the compiled entrypoint, top-level imports, schema registration, or prompt/config startup logic changes. | **Current for compiled entrypoint:** pass on 2026-06-21 (`npm run verify -- startup-profile --samples 3`; direct compiled entrypoint import+factory median 47.3 ms, below the 250 ms budget). Full-Pi startup numbers from the unsafe tmux profiler are not accepted as ongoing release evidence. |
60
+ | Deterministic dogfood smoke | `npm run verify -- dogfood` (`scripts/verify-agent-browser-dogfood.ts`) drives the native wrapper against a local file fixture through top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close with the real `agent-browser` on `PATH`. | **Current for 0.29.1:** pass on 2026-06-21 (`npm run verify -- dogfood`; `qa-url`, fresh/current opens, semantic click, job screenshot artifact verification, and close all passed). |
61
+ | Efficiency benchmark | `npm run verify -- benchmark` runs deterministic browser workflow accounting plus focused benchmark tests, including JSONL sampling fixtures and job/qa/sourceLookup/networkSourceLookup/Electron scenario coverage. | **Current:** pass on 2026-06-21 (`npm run verify -- benchmark`; 13/13 deterministic scenarios passed). |
62
+ | Crabbox platform smoke | `npm run check:platform-smoke` syntax-checks the harness and cheap invariants. `npm run smoke:platform:ubuntu-image` builds the project-owned Linux image, `npm run smoke:platform:doctor` checks Crabbox 0.26.0+ and local target readiness, and `npm run smoke:platform:all` runs doctor first, then fast target-local `platform-build` (`npm run verify -- platform-target`, pack, clean Pi install) plus `browser-dogfood-smoke` on Crabbox `macos`, `ubuntu`, and `windows-native`; see [`platform-smoke.md`](platform-smoke.md). Target artifacts include Crabbox/provider/work-root metadata, and release review also checks provider-specific `crabbox list` commands for leftover leases/clones. | **Current for 0.29.1:** pass on 2026-06-21 inside `npm run verify -- release`; rebuilt Ubuntu image `pi-agent-browser-native-platform:node24-agent-browser0.29.1`, refreshed the Windows `crabbox-ready` template snapshot to `agent-browser 0.29.1`, doctor passed, then Crabbox platform smoke passed for macOS, Ubuntu, and native Windows. |
63
+ | `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with the configured-source lifecycle harness, packaged Pi smoke, and the release-blocking Crabbox platform matrix (`verifySteps` `release` in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits standalone real-upstream, host-only dogfood, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). | **Current for 0.29.1 / Pi 0.79.9:** pass on 2026-06-21 (`npm run verify -- release`), including default unit/fake gate, generated docs checks, live command-reference sampling, lifecycle harness, packaged Pi smoke, and macOS/Ubuntu/native-Windows Crabbox platform smoke. |
64
+ | Configured-source lifecycle | `npm run verify -- lifecycle` (`scripts/verify-lifecycle.mjs`) drives `/reload`, closes and relaunches Pi with the same exact `--session-id`, checks the JSONL session header id, session continuity, slash-command sentinel tokens (`v1` before reload and `v2` after full relaunch because compiled JS package modules are process-cached), persisted spill reachability, and real Pi `tool_result` failure-patch semantics for a QA reclassification with a fake upstream on `PATH`. Default Pi model is `zai/glm-5.2`; default per-step wait is **180000 ms** (`DEFAULT_TIMEOUT_MS`); override model with `--model <id>` and waits with `--timeout-ms <ms>`. Passthrough flags in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs): `--keep-artifacts`, `--model`, `--verbose`, and `--timeout-ms` plus a value (for example `npm run verify -- lifecycle --model openai-codex/gpt-5.5:minimal --keep-artifacts --verbose --timeout-ms 600000`). | **Current for 0.29.1:** pass on 2026-06-21 as part of `npm run verify -- release`; managed browser session continuity and persisted full output verified before cleanup. |
65
+ | Quick isolated Pi smoke | `pi --approve --no-extensions --no-skills -e . --tools agent_browser` from trusted repo root; native `agent_browser` only. | **Current for 0.29.1 / Pi 0.79.9:** pass on 2026-06-21 via tmux with `pi --approve --no-extensions --no-skills -e . --model openai-codex/gpt-5.5:minimal --tools agent_browser`. Covered the public Sauce Demo checkout-overview flow with clean context, native sorting/click/fill flow, screenshot and recording evidence, console/page-error/network diagnostics, and no order placement. A one-line screenshot-plus-recording close-guard smoke on `https://example.com` passed after rebuilding `dist/`, proving close succeeds after both artifact paths are verified. Temp artifacts and tmux sessions were cleaned after evidence capture. |
65
66
 
66
67
  Runtime floor note: package metadata keeps Pi core package peer ranges wildcard per installed Pi package docs, but `pi-agent-browser-doctor` / `npm run doctor` treats `pi --version` below 0.79.0 as a setup failure. This keeps package dependency shape aligned with Pi package loading while still making unsupported host Pi versions a release and first-run blocker.
67
68
 
@@ -69,11 +70,11 @@ Runtime floor note: package metadata keeps Pi core package peer ranges wildcard
69
70
 
70
71
  | Baseline section | Baseline items | Documentation | Runtime handling | Test coverage | Validation status |
71
72
  | --- | --- | --- | --- | --- | --- |
72
- | Built-in skills | 13 canonical tokens from baseline section `skills`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#built-in-skills). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#built-in-skills), generated baseline block, README proof section, release docs. | `needsManagedSession` keeps read-only skills inspection sessionless while preserving thin upstream passthrough. | Runtime and extension-validation skills/provider matrix; real-upstream inspection/skills group. | Supported. |
73
+ | Built-in skills | 15 canonical tokens from baseline section `skills`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#built-in-skills). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#built-in-skills), generated baseline block, README proof section, release docs. | `needsManagedSession` keeps read-only skills inspection sessionless while preserving thin upstream passthrough; upstream `@agent-browser/sandbox` remains external package guidance, not a bundled wrapper dependency. | Runtime and extension-validation skills/provider matrix; real-upstream inspection/skills group. | Supported. |
73
74
  | Core page, element, navigation, and extraction commands | 74 canonical tokens from baseline section `core-commands`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#core-page-and-element-commands). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#core-page-and-element-commands), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md), README quick start. | Thin passthrough with wrapper-owned JSON/session planning, ref guidance, artifact verification, page-change summaries, click-dispatch diagnostics, no-op scroll/focus diagnostics, shorthand compilers, and redaction. | Real-upstream core matrix plus fake core matrix for passthrough, ordering, diagnostics, and compiler validation. | Supported. Upstream semantics remain upstream-owned. |
74
75
  | Sessions, state, tabs, frames, dialogs, and windows | 20 canonical tokens from baseline section `state-tabs-frames-dialogs`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#session-state-frames-dialogs-windows-and-inspection-commands). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#session-state-frames-dialogs-windows-and-inspection-commands), stateful workflow notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Stateful summaries/redaction, state artifact handling, sessionless local command planning, managed-session restore, tab target pinning, and close alias cleanup. | Extension-validation stateful matrix, runtime session/resume tests, presentation redaction tests, lifecycle harness. | Supported. External profile/auth state remains operator-owned. |
75
- | Network, storage, artifacts, diagnostics, and performance | 42 canonical tokens from baseline section `network-storage-artifacts-diagnostics`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage), diagnostic sections, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Thin passthrough plus compact diagnostics, route-mock warnings, useful-but-redacted storage output, stream idempotency normalization, artifact metadata, missing-ffmpeg warnings, sensitive-data redaction, timeout bounds, and cleanup-pair guidance. | Fake non-core matrix and safe real-upstream coverage for network/HAR, diff, trace/profiler, console/errors/highlight, stream, vitals, and React missing-renderer. | Supported. Environment-sensitive operations need suitable local/browser state. |
76
- | Batch, auth, confirmations, setup, dashboard, devices, and AI commands | 30 canonical tokens from baseline section `batch-auth-setup-ai`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup), README security notes, release docs. | Native-tool batch stdin, generated `job`/`qa`/lookup batch plans, auth/confirmation redaction, sessionless local auth/setup/dashboard/doctor/plugin planning, plugin list/show JSON envelope normalization, bare-`mcp` validation with `mcp --help` preserved, timeout/cleanup guidance. | Parser/runtime plugin and MCP unit coverage; fake-upstream plugin list/show and MCP help/blocking coverage; real-upstream plugin list shape probe; structured input-mode tests; efficiency benchmark scenarios. | Supported. Interactive side-effecting setup/auth/chat remains upstream-owned. `plugin` is local/sessionless; `mcp` is external-client-only except help; `auth login --credential-provider` resolves credentials via a plugin. |
76
+ | Network, storage, artifacts, diagnostics, and performance | 43 canonical tokens from baseline section `network-storage-artifacts-diagnostics`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage), diagnostic sections, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Thin passthrough plus compact diagnostics, route-mock warnings, useful-but-redacted storage output, stream idempotency normalization, artifact metadata, missing-ffmpeg warnings, sensitive-data redaction, timeout bounds, and cleanup-pair guidance. | Fake non-core matrix and safe real-upstream coverage for network/HAR, diff, trace/profiler, console/errors/highlight, stream, vitals, and React missing-renderer. | Supported. Environment-sensitive operations need suitable local/browser state. |
77
+ | Batch, auth, confirmations, setup, dashboard, devices, and AI commands | 31 canonical tokens from baseline section `batch-auth-setup-ai`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup), README security notes, release docs. | Native-tool batch stdin, generated `job`/`qa`/lookup batch plans, auth/confirmation redaction, sessionless local auth/setup/dashboard/doctor/plugin planning, plugin list/show JSON envelope normalization, bare-`mcp` validation with `mcp --help` preserved, timeout/cleanup guidance. | Parser/runtime plugin and MCP unit coverage; fake-upstream plugin list/show and MCP help/blocking coverage; real-upstream plugin list shape probe; structured input-mode tests; efficiency benchmark scenarios. | Supported. Interactive side-effecting setup/auth/chat remains upstream-owned. `plugin` is local/sessionless; `mcp` is external-client-only except help; `auth login --credential-provider` resolves credentials via a plugin; `install --with-deps` failures remain upstream-owned. |
77
78
  | Global flags, config, providers, policy, and environment | 121 canonical tokens from baseline section `options-and-env`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#important-global-flags-config-and-environment). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#important-global-flags-config-and-environment), README provider/setup notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sessionmode), architecture/runtime docs. | Runtime handles command discovery, value-flag prevalidation, launch-scoped flags, redacted echoes, fresh-session recovery hints, explicit sessions, provider/device launch-scoping, parent env forwarding with wrapper overrides, subprocess completion, and package-owned Pi-scoped config for optional companion features. | Runtime tests for flags/planning/redaction/session behavior; process tests for env and stdio-linger completion; config/web-search/CLI tests; fake provider/specialized-skill matrix; package doctor. | Supported. Provider clouds, iOS/Appium, proxies, profiles, and credentials require external setup. |
78
79
 
79
80
  ## Follow-up decision after closure
@@ -157,9 +157,9 @@ The extension always plans normal browser commands with `--json` prepended in `e
157
157
  - For batch chains that touch cookies, storage, auth, or other secret-bearing commands, use details.batchSteps for per-step artifacts, categories, spill paths, and full structured errors; top-level details.data on batch is only a compact redacted step matrix (success, argv-redacted command, redacted result or scrubbed error text) built from the same presentation rules as standalone calls.
158
158
  - For non-core families, pass current upstream commands through the native tool directly: network route/requests/har (including request filters like --type/--method/--status), diff snapshot/screenshot/url with scoped/baseline options, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, device list for iOS simulator inventory, and chat. For compact network requests output, prefer details.nextActions for request detail, route-mock diagnostics, actionable failed-request networkSourceLookup, filtering, clearing the aggregate buffer before repro, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done; stream enable already-enabled outcomes are treated as idempotent success with status/disable follow-ups.
159
159
  - For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: "tabs" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close commands (`close`, `quit`, or `exit`) only close the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.
160
- - For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.
160
+ - For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Hosted sandbox workflows should use upstream @agent-browser/sandbox helpers outside this wrapper. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.
161
161
  - For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; dialog commands and eval snippets that look like alert/confirm/prompt/dialog triggers are shorter-bounded than normal browser calls, and timed-out dialog-like interactions may add inspect-dialog-after-timeout, dismiss-dialog-after-timeout, or recover-fresh-session-after-dialog-timeout nextActions. When --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.
162
- - If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.27.3, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort: use explicit --timeout or top-level timeoutMs for legitimately slow waits, and treat a successful payload like "waited":"timeout" as elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.
162
+ - If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.29.1, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort: use explicit --timeout or top-level timeoutMs for legitimately slow waits, and treat a successful payload like "waited":"timeout" as elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.
163
163
  - For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.
164
164
  - For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.
165
165
  - For downloads, prefer download <selector> <path> when an element click should save a file; simple loopback anchor downloads are saved to the requested path when the wrapper can resolve an HTTP(S) href. Do not rely on click alone when you need the downloaded file on disk.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-agent-browser-native",
3
- "version": "0.2.54",
3
+ "version": "0.2.56",
4
4
  "description": "pi extension that exposes agent-browser as a native tool for browser automation",
5
5
  "type": "module",
6
6
  "author": "Mitch Fultz (https://github.com/fitchmultz)",
@@ -63,16 +63,18 @@
63
63
  "typebox": "*"
64
64
  },
65
65
  "devDependencies": {
66
- "@earendil-works/pi-ai": "0.79.4",
67
- "@earendil-works/pi-coding-agent": "0.79.4",
68
- "@earendil-works/pi-tui": "0.79.4",
66
+ "@earendil-works/pi-ai": "^0.79.9",
67
+ "@earendil-works/pi-coding-agent": "^0.79.9",
68
+ "@earendil-works/pi-tui": "^0.79.9",
69
69
  "@types/node": "^25.9.3",
70
70
  "tsx": "^4.21.0",
71
71
  "typebox": "^1.1.38",
72
72
  "typescript": "^6.0.3"
73
73
  },
74
74
  "overrides": {
75
- "basic-ftp": "6.0.1"
75
+ "basic-ftp": "6.0.1",
76
+ "esbuild": "0.28.1",
77
+ "protobufjs": "7.6.4"
76
78
  },
77
79
  "scripts": {
78
80
  "docs": "node ./scripts/project.mjs docs",
@@ -95,5 +97,19 @@
95
97
  "prepack": "npm run build",
96
98
  "prepare": "node ./scripts/prepare.mjs"
97
99
  },
98
- "packageManager": "npm@11.14.0"
100
+ "packageManager": "npm@11.14.0",
101
+ "peerDependenciesMeta": {
102
+ "@earendil-works/pi-ai": {
103
+ "optional": true
104
+ },
105
+ "@earendil-works/pi-coding-agent": {
106
+ "optional": true
107
+ },
108
+ "@earendil-works/pi-tui": {
109
+ "optional": true
110
+ },
111
+ "typebox": {
112
+ "optional": true
113
+ }
114
+ }
99
115
  }
@@ -14,8 +14,8 @@ export const COMMAND_REFERENCE_BASELINE_BLOCK_IDS = Object.freeze(["upstream-bas
14
14
 
15
15
  const sourceEvidence = Object.freeze({
16
16
  repository: "vercel-labs/agent-browser",
17
- upstreamHead: "6323df571ffd17d14e60ec19fcb56cc1caf498ab",
18
- upstreamPackageVersion: "0.28.0",
17
+ upstreamHead: "4572acf0d71c0086009206c9c1e2136fc54ec9e5",
18
+ upstreamPackageVersion: "0.29.1",
19
19
  inspectedSources: Object.freeze([
20
20
  "agent-browser --version",
21
21
  "agent-browser --help",
@@ -27,6 +27,10 @@ const sourceEvidence = Object.freeze({
27
27
  "agent-browser.schema.json",
28
28
  "cli/src/commands.rs",
29
29
  "cli/src/flags.rs",
30
+ "packages/@agent-browser/sandbox/README.md",
31
+ "packages/@agent-browser/sandbox/src/shared.ts",
32
+ "packages/@agent-browser/sandbox/src/vercel.ts",
33
+ "packages/@agent-browser/sandbox/src/eve.ts",
30
34
  ]),
31
35
  });
32
36
 
@@ -46,6 +50,7 @@ const helpCommands = Object.freeze([
46
50
  helpCommand("skills help", ["skills", "--help"]),
47
51
  helpCommand("skills list", ["skills", "list"]),
48
52
  helpCommand("core skill full", ["skills", "get", "core", "--full"]),
53
+ helpCommand("vercel sandbox skill full", ["skills", "get", "vercel-sandbox", "--full"]),
49
54
  helpCommand("open help", ["open", "--help"]),
50
55
  helpCommand("click help", ["click", "--help"]),
51
56
  helpCommand("key help", ["key", "--help"]),
@@ -113,6 +118,8 @@ const inventorySections = Object.freeze([
113
118
  "skills get dogfood",
114
119
  "skills get vercel-sandbox",
115
120
  "skills get agentcore",
121
+ "@agent-browser/sandbox",
122
+ "installSystemDependencies: false",
116
123
  "skills path [name]",
117
124
  "AGENT_BROWSER_SKILLS_DIR",
118
125
  ],
@@ -127,6 +134,8 @@ const inventorySections = Object.freeze([
127
134
  ["skills list", "dogfood"],
128
135
  ["skills list", "vercel-sandbox"],
129
136
  ["skills list", "agentcore"],
137
+ ["vercel sandbox skill full", "@agent-browser/sandbox"],
138
+ ["vercel sandbox skill full", "installSystemDependencies: false"],
130
139
  ["core skill full", "agent-browser frame @e3"],
131
140
  ["core skill full", "agent-browser dialog accept"],
132
141
  ["core skill full", "agent-browser state save ./auth.json"],
@@ -459,6 +468,7 @@ const inventorySections = Object.freeze([
459
468
  "device list",
460
469
  "install",
461
470
  "install --with-deps",
471
+ "install --with-deps exits nonzero",
462
472
  "upgrade",
463
473
  "doctor [--fix]",
464
474
  "doctor --offline --quick",
@@ -481,6 +491,7 @@ const inventorySections = Object.freeze([
481
491
  root("dashboard start --port <n>"),
482
492
  ["device help", "device list"],
483
493
  root("install --with-deps"),
494
+ ["install help", "fails if deps fail"],
484
495
  root("upgrade"),
485
496
  root("doctor [--fix]"),
486
497
  root("profiles"),
@@ -731,7 +742,7 @@ const inventorySections = Object.freeze([
731
742
  ]);
732
743
 
733
744
  export const CAPABILITY_BASELINE = Object.freeze({
734
- targetVersion: "0.28.0",
745
+ targetVersion: "0.29.1",
735
746
  sourceEvidence,
736
747
  helpCommands,
737
748
  inventorySections,
@@ -62,7 +62,7 @@ Environment:
62
62
  PLATFORM_SMOKE_MAC_USER macOS SSH user; default $USER
63
63
  PLATFORM_SMOKE_MAC_WORK_ROOT macOS Crabbox work root
64
64
  PLATFORM_SMOKE_MAC_PORT macOS SSH port; default 22
65
- PLATFORM_SMOKE_UBUNTU_IMAGE Ubuntu local-container image; default pi-agent-browser-native-platform:node24-agent-browser0.27.3
65
+ PLATFORM_SMOKE_UBUNTU_IMAGE Ubuntu local-container image; default ${config?.ubuntuContainerImage ?? "pi-agent-browser-native-platform:node24-agent-browser<target>"}
66
66
  PLATFORM_SMOKE_WINDOWS_VM Parallels Windows template VM
67
67
  PLATFORM_SMOKE_WINDOWS_SNAPSHOT Parallels snapshot name
68
68
  PLATFORM_SMOKE_WINDOWS_USER Windows SSH user