pi-agent-browser-native 0.2.53 → 0.2.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -0
- package/README.md +1 -1
- package/dist/extensions/agent-browser/lib/config-policy.js +3 -2
- package/dist/extensions/agent-browser/lib/playbook.js +2 -2
- package/dist/extensions/agent-browser/lib/prompt-policy.js +16 -8
- package/dist/extensions/agent-browser/lib/results/envelope.js +7 -0
- package/dist/extensions/agent-browser/lib/runtime.js +9 -1
- package/docs/ARCHITECTURE.md +1 -1
- package/docs/COMMAND_REFERENCE.md +42 -20
- package/docs/SUPPORT_MATRIX.md +18 -17
- package/docs/TOOL_CONTRACT.md +4 -4
- package/package.json +22 -6
- package/scripts/agent-browser-capability-baseline.mjs +14 -3
- package/scripts/platform-smoke.mjs +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,37 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
## 0.2.55 - 2026-06-21
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
|
|
9
|
+
- Rebaselined upstream capability metadata, command reference, support docs, playbook guidance, platform smoke image tag, and real-upstream output-shape metadata for `agent-browser` `0.29.1` / vercel-labs/agent-browser@4572acf0d71c0086009206c9c1e2136fc54ec9e5.
|
|
10
|
+
- Documented the new upstream `@agent-browser/sandbox` package guidance, `installSystemDependencies: false`, and stricter `install --with-deps` nonzero behavior while keeping sandbox support outside this thin Pi wrapper.
|
|
11
|
+
- Updated local Pi development dependencies to `@earendil-works/*` `0.79.8`, kept Pi core package peers host-provided, and marked those peers optional to avoid install-time peer noise for package consumers.
|
|
12
|
+
|
|
13
|
+
### Fixed
|
|
14
|
+
|
|
15
|
+
- Kept optional recording paths from being misclassified as required screenshots when release-smoke prompts are collapsed into one line for tmux automation.
|
|
16
|
+
- Added npm overrides for vulnerable transitive dev dependencies so `npm audit` reports zero vulnerabilities without adding runtime dependencies.
|
|
17
|
+
|
|
18
|
+
### Validation
|
|
19
|
+
|
|
20
|
+
- Ran `npm run verify -- release` against `agent-browser` `0.29.1`; after rebuilding the Ubuntu image and refreshing the Windows `crabbox-ready` snapshot, the gate passed default verification, command-reference checks, build, lifecycle verification, packaged Pi smoke, and macOS/Ubuntu/Windows-native platform smoke.
|
|
21
|
+
- Ran `npm run verify -- real-upstream`, `npm run verify -- dogfood`, `npm run verify -- benchmark`, `npm run verify -- startup-profile --samples 3`, `npm run docs`, `npm run doctor`, `npm audit --json`, `npm run check:platform-smoke`, `npm run smoke:platform:ubuntu-image`, `npm run smoke:platform:doctor`, focused prompt-guard tests, and `git diff --check`.
|
|
22
|
+
- Ran tmux-driven Pi checkout dogfood with `pi --approve --no-extensions --no-skills -e .`, covering the public Sauce Demo checkout-overview flow with screenshot/recording evidence and no order placement; then verified the collapsed one-line screenshot-plus-recording close guard on `https://example.com` after rebuilding `dist/`.
|
|
23
|
+
|
|
24
|
+
## 0.2.54 - 2026-06-19
|
|
25
|
+
|
|
26
|
+
### Fixed
|
|
27
|
+
|
|
28
|
+
- Accepted upstream `plugin list` / `plugin show` JSON and blocked bare `mcp` native-tool calls while preserving `mcp --help`.
|
|
29
|
+
|
|
30
|
+
### Validation
|
|
31
|
+
|
|
32
|
+
- Ran `npm run verify -- release` against `agent-browser` `0.28.0`; the gate passed default verification, command-reference checks, build, lifecycle verification, packaged Pi smoke, and macOS/Ubuntu/Windows-native platform smoke.
|
|
33
|
+
- Ran `npm run verify -- real-upstream`, `npm run docs`, `npm run doctor`, `npm run check:platform-smoke`, `npm run smoke:platform:ubuntu-image`, `npm run smoke:platform:doctor`, and `git diff --check`.
|
|
34
|
+
- Ran a tmux-driven Pi checkout dogfood with `pi --approve --no-extensions --no-skills -e .`, covering `--version`, `mcp --help`, `plugin list`, fresh `example.com` open plus `snapshot -i`, `qa` on `react.dev`, and browser close.
|
|
35
|
+
|
|
5
36
|
## 0.2.53 - 2026-06-18
|
|
6
37
|
|
|
7
38
|
### Changed
|
package/README.md
CHANGED
|
@@ -85,7 +85,7 @@ The result is optimized for agent work:
|
|
|
85
85
|
| Recording workflows fail late when `ffmpeg` is missing | After successful `record start` / `record restart`, warns when `ffmpeg` is not on `PATH` so agents can install or fix PATH before `record stop` | [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#details), [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#diff-debug-and-streaming), `test/agent-browser.extension-validation.test.ts` |
|
|
86
86
|
| Direct binary help may be blocked in agent sessions | Publishes a repo-readable command reference and verifies it against the target upstream version | `npm run verify` |
|
|
87
87
|
| Desktop Electron apps need discovery, CDP attach, and safe teardown | Top-level `electron` runs host `list` / isolated `launch` (temp profile, OS-chosen debug port) / `status` / `probe` / `cleanup`, merges `launchId` plus managed `sessionName`, supports `handoff` `snapshot` / `tabs` / `connect`, and surfaces mismatch and post-command health guidance; wrapper cleanup applies only to launches it created | `extensions/agent-browser/lib/electron/discovery.ts`, `launch.ts`, `cleanup.ts`, [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#electron), [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#electron-desktop-apps) |
|
|
88
|
-
| Agents need bundled `skills` text and local setup/status commands without touching the live session | Treats `skills list`, `skills get …`, `skills path …`, local auth profile management (`auth save/list/show/delete/remove`), `profiles`, `dashboard`, `device list`, `doctor`, `install`, `upgrade`, `session list`, and targeted/all saved-state maintenance (`state clear --all`, `state clear -a`, named clear, or `state clean --older-than <days>`) as sessionless reads/actions: no implicit managed `--session` under default `sessionMode: "auto"` (same session-ownership goal as plain-text `--help` / `--version`), while
|
|
88
|
+
| Agents need bundled `skills` text and local setup/status commands without touching the live session | Treats `skills list`, `skills get …`, `skills path …`, local auth profile management (`auth save/list/show/delete/remove`), `profiles`, `dashboard`, `device list`, `doctor`, `install`, `upgrade`, `session list`, `plugin add/list/show/run`, `mcp --help`, and targeted/all saved-state maintenance (`state clear --all`, `state clear -a`, named clear, or `state clean --older-than <days>`) as sessionless reads/actions: no implicit managed `--session` under default `sessionMode: "auto"` (same session-ownership goal as plain-text `--help` / `--version`), while bare `mcp` server calls are blocked and provider/browser-backed workflows stay thin passthroughs that require upstream setup and credentials | [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#built-in-skills), `extensions/agent-browser/lib/command-policy.ts`, `extensions/agent-browser/lib/runtime.ts` |
|
|
89
89
|
|
|
90
90
|
## Fastest way to try it
|
|
91
91
|
|
|
@@ -24,11 +24,12 @@ import { join, resolve } from "node:path";
|
|
|
24
24
|
/** @typedef {{ cwd?: string; env?: NodeJS.ProcessEnv; includeProjectConfig?: boolean }} AgentBrowserConfigLoadOptions */
|
|
25
25
|
/** @typedef {{ browserDefaultProfile?: Required<BrowserDefaultProfileConfig>; browserDefaultProfileScope?: ConfigLayerScope; browserExecutablePath?: string; browserExecutablePathScope?: ConfigLayerScope; trustedBrowserDefaultProfile?: Required<BrowserDefaultProfileConfig>; trustedBrowserDefaultProfileScope?: ConfigLayerScope; trustedBrowserExecutablePath?: string; trustedBrowserExecutablePathScope?: ConfigLayerScope; config: AgentBrowserConfig; webSearchCredentialSources: Partial<Record<WebSearchProvider, CredentialSource>>; webSearchEnabled: boolean; webSearchPreferredProvider: WebSearchProvider; errors: string[]; layers: ConfigLayer[]; paths: AgentBrowserConfigPaths; projectConfigIncluded: boolean; warnings: string[] }} AgentBrowserConfigState */
|
|
26
26
|
/** @typedef {{ scope: string; path: string; exists: boolean }} ConfigFileSummary */
|
|
27
|
+
const CONFIG_DIR_NAME = ".pi";
|
|
27
28
|
export const AGENT_BROWSER_CONFIG_ENV = "PI_AGENT_BROWSER_CONFIG";
|
|
28
29
|
export const BRAVE_API_KEY_ENV = "BRAVE_API_KEY";
|
|
29
30
|
export const EXA_API_KEY_ENV = "EXA_API_KEY";
|
|
30
|
-
export const CONFIG_RELATIVE_PATH = /** @type {const} */ ([
|
|
31
|
-
export const GLOBAL_CONFIG_RELATIVE_PATH = /** @type {const} */ ([
|
|
31
|
+
export const CONFIG_RELATIVE_PATH = /** @type {const} */ ([CONFIG_DIR_NAME, "config", "pi-agent-browser-native", "config.json"]);
|
|
32
|
+
export const GLOBAL_CONFIG_RELATIVE_PATH = /** @type {const} */ ([CONFIG_DIR_NAME, "config", "pi-agent-browser-native", "config.json"]);
|
|
32
33
|
export const SECRET_COMMAND_TIMEOUT_MS = 15_000;
|
|
33
34
|
/** @type {Readonly<Record<WebSearchProvider, WebSearchProviderDescriptor>>} */
|
|
34
35
|
export const WEB_SEARCH_PROVIDER_DESCRIPTORS = Object.freeze({
|
|
@@ -43,9 +43,9 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
|
|
|
43
43
|
"For batch chains that touch cookies, storage, auth, or other secret-bearing commands, use details.batchSteps for per-step artifacts, categories, spill paths, and full structured errors; top-level details.data on batch is only a compact redacted step matrix (success, argv-redacted command, redacted result or scrubbed error text) built from the same presentation rules as standalone calls.",
|
|
44
44
|
"For non-core families, pass current upstream commands through the native tool directly: network route/requests/har (including request filters like --type/--method/--status), diff snapshot/screenshot/url with scoped/baseline options, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, device list for iOS simulator inventory, and chat. For compact network requests output, prefer details.nextActions for request detail, route-mock diagnostics, actionable failed-request networkSourceLookup, filtering, clearing the aggregate buffer before repro, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done; stream enable already-enabled outcomes are treated as idempotent success with status/disable follow-ups.",
|
|
45
45
|
"For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: \"tabs\" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close commands (`close`, `quit`, or `exit`) only close the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.",
|
|
46
|
-
"For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
|
|
46
|
+
"For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Hosted sandbox workflows should use upstream @agent-browser/sandbox helpers outside this wrapper. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
|
|
47
47
|
"For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; dialog commands and eval snippets that look like alert/confirm/prompt/dialog triggers are shorter-bounded than normal browser calls, and timed-out dialog-like interactions may add inspect-dialog-after-timeout, dismiss-dialog-after-timeout, or recover-fresh-session-after-dialog-timeout nextActions. When --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.",
|
|
48
|
-
"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.
|
|
48
|
+
"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.29.1, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort: use explicit --timeout or top-level timeoutMs for legitimately slow waits, and treat a successful payload like \"waited\":\"timeout\" as elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
|
|
49
49
|
"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
|
|
50
50
|
"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
|
|
51
51
|
"For downloads, prefer download <selector> <path> when an element click should save a file; simple loopback anchor downloads are saved to the requested path when the wrapper can resolve an HTTP(S) href. Do not rely on click alone when you need the downloaded file on disk.",
|
|
@@ -18,23 +18,31 @@ const LEGACY_BASH_ALLOW_PATTERNS = [
|
|
|
18
18
|
/\bdebug(?:ging)?\b.*\b(?:agent[_ -]?browser|agent_browser|browser integration)\b/i,
|
|
19
19
|
];
|
|
20
20
|
const PROMPT_ARTIFACT_PATH_PATTERN = /(?:^|[\s"'`(:])((?:\/[^\s"'`),;]+|[A-Za-z]:[\\/][^\s"'`),;]+|\.{1,2}[\\/][^\s"'`),;]+|[^\s"'`),;:\\/]+(?:[\\/][^\s"'`),;]+)+|[^\s"'`),;:\\/]+)\.(?:png|jpe?g|webp|gif|webm|mp4|har|pdf|trace|json))(?:[\s"'`),;.]|$)/gi;
|
|
21
|
+
function inferPromptArtifactKind(line, path) {
|
|
22
|
+
const lowerPath = path.toLowerCase();
|
|
23
|
+
if (/\.(?:webm|mp4)$/.test(lowerPath))
|
|
24
|
+
return "recording";
|
|
25
|
+
if (/\.(?:png|jpe?g|webp|gif)$/.test(lowerPath))
|
|
26
|
+
return "screenshot";
|
|
27
|
+
const lowerLine = line.toLowerCase();
|
|
28
|
+
if (lowerLine.includes("screenshot"))
|
|
29
|
+
return "screenshot";
|
|
30
|
+
if (/\b(?:screen\s+recording|recording|webm|video)\b/.test(lowerLine))
|
|
31
|
+
return "recording";
|
|
32
|
+
return undefined;
|
|
33
|
+
}
|
|
21
34
|
function extractPromptRequestedArtifacts(prompt) {
|
|
22
35
|
const artifacts = [];
|
|
23
36
|
const seen = new Set();
|
|
24
37
|
for (const line of prompt.split(/\r?\n/)) {
|
|
25
|
-
const lowerLine = line.toLowerCase();
|
|
26
|
-
const kind = lowerLine.includes("screenshot")
|
|
27
|
-
? "screenshot"
|
|
28
|
-
: /\b(?:screen\s+recording|recording|webm|video)\b/.test(lowerLine)
|
|
29
|
-
? "recording"
|
|
30
|
-
: undefined;
|
|
31
|
-
if (!kind)
|
|
32
|
-
continue;
|
|
33
38
|
PROMPT_ARTIFACT_PATH_PATTERN.lastIndex = 0;
|
|
34
39
|
for (const match of line.matchAll(PROMPT_ARTIFACT_PATH_PATTERN)) {
|
|
35
40
|
const path = match[1]?.trim();
|
|
36
41
|
if (!path)
|
|
37
42
|
continue;
|
|
43
|
+
const kind = inferPromptArtifactKind(line, path);
|
|
44
|
+
if (!kind)
|
|
45
|
+
continue;
|
|
38
46
|
const key = `${kind}:${path}`;
|
|
39
47
|
if (seen.has(key))
|
|
40
48
|
continue;
|
|
@@ -66,6 +66,13 @@ export async function parseAgentBrowserEnvelope(options) {
|
|
|
66
66
|
if (!isRecord(parsed)) {
|
|
67
67
|
return { parseError: "agent-browser returned JSON, but it was not an object envelope." };
|
|
68
68
|
}
|
|
69
|
+
const keys = Object.keys(parsed);
|
|
70
|
+
if (keys.length === 1 && keys[0] === "plugins" && Array.isArray(parsed.plugins)) {
|
|
71
|
+
return { envelope: { success: true, data: { plugins: parsed.plugins } } };
|
|
72
|
+
}
|
|
73
|
+
if (keys.length === 1 && keys[0] === "plugin" && isRecord(parsed.plugin) && !Array.isArray(parsed.plugin)) {
|
|
74
|
+
return { envelope: { success: true, data: { plugin: parsed.plugin } } };
|
|
75
|
+
}
|
|
69
76
|
if (!("success" in parsed)) {
|
|
70
77
|
return { parseError: "agent-browser returned an invalid JSON envelope: missing boolean success field." };
|
|
71
78
|
}
|
|
@@ -533,6 +533,14 @@ function getSingleKeyCommandValidationError(args) {
|
|
|
533
533
|
const label = command === "key" ? "key/press" : command;
|
|
534
534
|
return `agent-browser ${label} accepts exactly one key argument. Do not pass a selector or ref to ${label}; focus or click the target first, then run ${command} <key> (for example: focus @e1, then press Enter).`;
|
|
535
535
|
}
|
|
536
|
+
function getBareMcpValidationError(args) {
|
|
537
|
+
const { commandInfo, commandTokens } = parseArgvDescriptor(args);
|
|
538
|
+
if (commandInfo.command !== "mcp")
|
|
539
|
+
return undefined;
|
|
540
|
+
if (commandTokens.includes("--help") || commandTokens.includes("-h"))
|
|
541
|
+
return undefined;
|
|
542
|
+
return "agent-browser mcp starts a stdio MCP server for external MCP clients, not a one-shot native agent_browser tool workflow. Use the native agent_browser tool modes directly, or configure an MCP client to launch `agent-browser mcp`. Use `mcp --help` for help.";
|
|
543
|
+
}
|
|
536
544
|
export function validateToolArgs(args) {
|
|
537
545
|
if (args.length === 0) {
|
|
538
546
|
return "`args` must contain at least one agent-browser command token.";
|
|
@@ -545,7 +553,7 @@ export function validateToolArgs(args) {
|
|
|
545
553
|
if (sessionModeArg) {
|
|
546
554
|
return "Do not pass `--session-mode` in args. Use the top-level agent_browser `sessionMode` field instead, for example { args: [\"--profile\", \"Default\", \"open\", \"https://example.com\"], sessionMode: \"fresh\" }.";
|
|
547
555
|
}
|
|
548
|
-
return getSingleKeyCommandValidationError(args);
|
|
556
|
+
return getBareMcpValidationError(args) ?? getSingleKeyCommandValidationError(args);
|
|
549
557
|
}
|
|
550
558
|
function getInvalidValueFlagDetails(args) {
|
|
551
559
|
for (let index = 0; index < args.length; index += 1) {
|
package/docs/ARCHITECTURE.md
CHANGED
|
@@ -177,7 +177,7 @@ That failure should include a structured recovery hint pointing to `sessionMode:
|
|
|
177
177
|
Implementation detail lives in `extensions/agent-browser/lib/launch-scoped-flags.ts` (canonical flag metadata shared with playbook/docs assertions), `extensions/agent-browser/lib/argv-descriptor.ts` and `extensions/agent-browser/lib/argv-grammar.ts` (command discovery, `VALUE_FLAGS`, `parseArgvDescriptor`) plus `extensions/agent-browser/lib/runtime.ts` (`getStartupScopedFlags`, `buildExecutionPlan`):
|
|
178
178
|
|
|
179
179
|
- **Command discovery:** Leading argv is scanned with a value-taking allowlist so known global flags and documented command flags consume their values before the upstream command word is identified. Missing-value prevalidation is intentionally limited to upstream global value flags; command-scoped flags and literal text are left to upstream parsing so values like `fill #field --password` are not rejected by wrapper heuristics before the CLI sees them. When upstream adds new global flags that take values ahead of the command, extend both the command-discovery and prevalidation allowlists; when it adds command-specific flags, extend only command discovery/redaction as needed. A smaller set of global boolean flags may be followed by an optional `true`/`false` literal; when present, that literal is consumed as the flag value before command discovery continues.
|
|
180
|
-
- **`--state` disambiguation:** Persisted browser `--state` before the command participates in launch-scoped validation and tab-correction hints. The same flag spelling after a `wait` command is excluded from startup-scoped detection so upstream help examples such as `wait @ref --state hidden` do not spuriously require `sessionMode: "fresh"` while an implicit session is active. As of upstream `agent-browser 0.
|
|
180
|
+
- **`--state` disambiguation:** Persisted browser `--state` before the command participates in launch-scoped validation and tab-correction hints. The same flag spelling after a `wait` command is excluded from startup-scoped detection so upstream help examples such as `wait @ref --state hidden` do not spuriously require `sessionMode: "fresh"` while an implicit session is active. As of upstream `agent-browser 0.29.1`, the parser still does not implement those `wait --state` examples as distinct wait modes, so agent-facing docs recommend `wait --fn` predicates for disappearance checks instead.
|
|
181
181
|
- **`--auto-connect`:** Treated as launch-scoped only when enabled (`--auto-connect` bare or `true`). `--auto-connect false` is ignored for startup-scoped blocking so disabled attach hints do not force a fresh launch.
|
|
182
182
|
|
|
183
183
|
**Sessionless inspection and local commands:** Plain-text global help and version probes (`--help`, `-h`, `--version`, `-V`) must never allocate or bind the extension-managed session. The same session-ownership rule applies to read-only upstream `skills list`, `skills get …`, and `skills path …`, local auth profile management (`auth save/list/show/delete/remove`), plus local/setup surfaces such as `profiles`, `dashboard start/stop`, `device list`, `doctor`, `install`, `upgrade`, `session list`, and targeted/all local saved-state maintenance (`state list/show`, `state clear --all`, `state clear -a`, `state clear <session-name>`, `state clean --older-than <days>`, `state rename`). Non-plain-text sessionless commands still run with `--json` for machine-readable output, but the planner does not prepend the implicit managed `--session`, so an agent can inspect local capabilities or start/stop the standalone dashboard without consuming the implicit session slot before a real `open`. Browser-backed, context-dependent, or incomplete commands such as root `session`, untargeted `state clear`, bare `state clean`, `auth login`, `state save`, and `state load` keep normal managed-session injection. Command-shape allowlisting lives in `extensions/agent-browser/lib/command-policy.ts` (`needsManagedSession`), while `extensions/agent-browser/lib/runtime.ts` (`isPlainTextInspectionArgs`, `buildExecutionPlan`) applies that decision to execution planning.
|
|
@@ -18,23 +18,22 @@ This project intentionally blocks normal `agent-browser` bash usage in most agen
|
|
|
18
18
|
|
|
19
19
|
<!-- agent-browser-capability-baseline:start upstream-baseline -->
|
|
20
20
|
<!-- Generated from scripts/agent-browser-capability-baseline.mjs. Run `npm run docs -- command-reference write` to update. Do not edit manually. -->
|
|
21
|
-
This reference is baselined to the locally installed `agent-browser 0.
|
|
21
|
+
This reference is baselined to the locally installed `agent-browser 0.29.1` command/help surface, audited against vercel-labs/agent-browser@4572acf0d71c0086009206c9c1e2136fc54ec9e5. Upstream `agent-browser` remains the source of truth for command semantics; this file is the local fallback for Pi agent sessions where direct binary help is blocked or discouraged.
|
|
22
22
|
|
|
23
23
|
The lightweight drift check is `npm run verify -- command-reference`. Run it whenever the installed upstream `agent-browser` version changes or this reference is edited.
|
|
24
24
|
|
|
25
25
|
Use `npm run benchmark:agent-browser` or `npm run verify -- benchmark` before and after agent-facing workflow abstractions to measure task success, tool calls, model-visible output size, stale-ref behavior, artifact success, failure-category coverage, and elapsed-time estimates.
|
|
26
26
|
<!-- agent-browser-capability-baseline:end upstream-baseline -->
|
|
27
27
|
|
|
28
|
-
### Upstream 0.
|
|
28
|
+
### Upstream 0.29.1 rebaseline
|
|
29
29
|
|
|
30
|
-
The 0.
|
|
30
|
+
The 0.29.1 rebaseline adds no new core browser CLI commands. It captures upstream's new hosted-sandbox helper package and install behavior:
|
|
31
31
|
|
|
32
|
-
-
|
|
33
|
-
-
|
|
34
|
-
-
|
|
35
|
-
|
|
36
|
-
-
|
|
37
|
-
- GNU Linux release artifacts pinned to glibc 2.28
|
|
32
|
+
- `@agent-browser/sandbox` is the upstream helper package for Eve and Vercel Sandbox workflows. It is not bundled by this pi extension; load `skills get vercel-sandbox --full` when a task needs that hosted-sandbox guidance.
|
|
33
|
+
- Fresh Eve and Vercel Sandbox helpers install Chromium system dependencies by default; pass `installSystemDependencies: false` only when the sandbox image already has those libraries.
|
|
34
|
+
- `install --with-deps` now exits nonzero when the package manager cannot install required browser libraries (`install --with-deps exits nonzero`).
|
|
35
|
+
|
|
36
|
+
Runtime probes on 2026-06-21 confirm two old caveats still stand in `agent-browser 0.29.1`: `find ... uncheck` and `wait <selector> --state hidden|detached` remain advertised by help but fail at runtime, and `wait --url` glob behavior remains narrow. Keep the wrapper's direct `uncheck` passthrough, `wait --fn` disappearance guidance, and `job.assertUrl` glob workaround.
|
|
38
37
|
|
|
39
38
|
### Upstream 0.28.0 rebaseline
|
|
40
39
|
|
|
@@ -47,6 +46,17 @@ The 0.28.0 rebaseline tracks new local/infra upstream surfaces and does not chan
|
|
|
47
46
|
|
|
48
47
|
The wrapper adds no compatibility shim for older upstream releases.
|
|
49
48
|
|
|
49
|
+
### Upstream 0.27.3 install-only rebaseline
|
|
50
|
+
|
|
51
|
+
The 0.27.3 rebaseline is an install-only compatibility update: upstream changed Windows ARM64 installation fallback behavior and did not change the CLI/help surface or browser-command semantics. This wrapper adds no compatibility shim for older upstream releases. The wrapper must still not hide these prior upstream fixes:
|
|
52
|
+
|
|
53
|
+
- click reliability: upstream now scrolls off-viewport elements before coordinate resolution, handles JavaScript dialogs promptly, recovers mouse state after dialog-opening clicks, and reports overlay interception before dispatching input
|
|
54
|
+
- frame-scoped CSS selectors and waits, including cross-process iframe click-coordinate translation
|
|
55
|
+
- wait timeout handling: documented 25s default, honored `--timeout` across wait variants, and appropriate client read budgets for long waits; the native wrapper forwards explicit long waits and derives a subprocess watchdog when top-level `timeoutMs` is omitted
|
|
56
|
+
- form commands: `find label` matches `aria-label` / `aria-labelledby`, `select` errors when no option matches, and `type` parses `--clear` / `--delay` instead of typing them as literal text
|
|
57
|
+
- warm CLI command latency and batch daemon respawn/retry improvements
|
|
58
|
+
- GNU Linux release artifacts pinned to glibc 2.28
|
|
59
|
+
|
|
50
60
|
## Core mental model
|
|
51
61
|
|
|
52
62
|
Input mode chooser (one per call): **`args`** for the default open → snapshot -i → click/fill `@refs` flow; **`semanticAction`** for stable role/text/label targets; **`job`** / **`qa`** for multi-step checks; **`electron`** for desktop apps only; **`sourceLookup`** / **`networkSourceLookup`** are **experimental candidates-only** helpers (not authoritative mappings). Do not pass `--json` in `args`—the wrapper injects it. Match link and button text to the latest snapshot (on `https://example.com/` the main link is `Learn more`, not legacy `More information...` copy). See [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#input-mode-chooser) for snapshot variants (`-i` vs `--compact` vs full) and batching three or more getters.
|
|
@@ -650,7 +660,7 @@ For dense pages, the wrapper also accepts `snapshot -i --search <text>` and `sna
|
|
|
650
660
|
| `wait --download [path]` | Wait for a download started by a previous action and optionally save it to `path`; successful wrapper results include upstream-reported `savedFilePath`/`savedFile`, while `details.artifacts[].exists` is the wrapper's on-disk verification signal. |
|
|
651
661
|
| `wait --download [path] --timeout <ms>` | Set download-start timeout in milliseconds. The native Pi wrapper forwards explicit wait timeouts and extends the subprocess watchdog unless the caller supplies top-level `timeoutMs`. |
|
|
652
662
|
|
|
653
|
-
Current upstream source still does not parse `wait <selector> --state hidden` / `wait <selector> --state detached` as distinct wait modes even though upstream help mentions those examples. Use `wait --fn "!document.querySelector('#spinner')"` or another explicit JavaScript predicate for disappearance/detach checks until upstream parser support exists.
|
|
663
|
+
Current upstream 0.29.1 source still does not parse `wait <selector> --state hidden` / `wait <selector> --state detached` as distinct wait modes even though upstream help mentions those examples. Use `wait --fn "!document.querySelector('#spinner')"` or another explicit JavaScript predicate for disappearance/detach checks until upstream parser support exists.
|
|
654
664
|
|
|
655
665
|
### Diff, debug, and streaming
|
|
656
666
|
|
|
@@ -708,18 +718,19 @@ Long-running or lifecycle commands should be explicitly paired with cleanup call
|
|
|
708
718
|
| `dashboard stop` | Stop the dashboard server. |
|
|
709
719
|
| `device list` | List available iOS simulators. Use with `-p ios` when exercising iOS provider flows. |
|
|
710
720
|
| `install` | Install browser binaries. |
|
|
711
|
-
| `install --with-deps` | Install browser binaries plus Linux system dependencies. |
|
|
721
|
+
| `install --with-deps` | Install browser binaries plus Linux system dependencies; exits nonzero when required libraries cannot be installed. |
|
|
712
722
|
| `upgrade` | Upgrade `agent-browser` to the latest version. |
|
|
713
723
|
| `doctor [--fix]` | Diagnose install issues and optionally auto-clean stale files. Use `doctor --offline --quick` for a fast local-only check and `doctor --json` for structured output. |
|
|
714
724
|
| `plugin add <ref>` | Add a plugin from npm or GitHub (`<owner>/<repo>` or `@scope/<name>`); writes `agent-browser.json`. Flags such as `--name`, `--capability`, `--global`, and `--no-manifest` shape discovery. |
|
|
715
|
-
| `plugin [list]` | List configured plugins (default subcommand). |
|
|
716
|
-
| `plugin show <name>` | Show one configured plugin. |
|
|
725
|
+
| `plugin [list]` | List configured plugins (default subcommand); `{ "plugins": [...] }` is a successful sessionless result. |
|
|
726
|
+
| `plugin show <name>` | Show one configured plugin; `{ "plugin": {...} }` is a successful sessionless result. |
|
|
717
727
|
| `plugin run <name> <type>` | Run a `command.run` or custom plugin request over the agent-browser plugin stdio protocol. |
|
|
718
728
|
| `auth login <name> --credential-provider <plugin>` | Resolve credentials just-in-time from a configured credential plugin (e.g. a vault) instead of saved passwords; pair with `--item <ref>` and optional selector overrides. Credentials are not stored locally. |
|
|
719
|
-
| `mcp` |
|
|
729
|
+
| `mcp --help` | Show MCP server help through the native tool. |
|
|
730
|
+
| `mcp` | Start a local MCP stdio server for external MCP clients; bare native-tool calls are rejected before spawn. |
|
|
720
731
|
| `profiles` | List available Chrome profiles. |
|
|
721
732
|
|
|
722
|
-
When these commands are invoked through the native `agent_browser` tool, structured diagnostic/status outputs are rendered as compact summaries. Local inspection/setup calls (`auth save/list/show/delete/remove`, `dashboard start/stop`, `device list`, `doctor`, `install`, `upgrade`, `profiles`, `session list`, `plugin add/list/show/run`, `
|
|
733
|
+
When these commands are invoked through the native `agent_browser` tool, structured diagnostic/status outputs are rendered as compact summaries. Local inspection/setup calls (`auth save/list/show/delete/remove`, `dashboard start/stop`, `device list`, `doctor`, `install`, `upgrade`, `profiles`, `session list`, `plugin add/list/show/run`, `state list/show/rename`, `state clean --older-than <days>`, `state clear --all`, `state clear -a`, and `state clear <session-name>`) are sessionless unless you explicitly pass `--session`; bare `mcp` server calls are blocked except help. Context-dependent calls such as root `session`, untargeted `state clear`, `auth login`, `chat`, and `state save/load` keep normal session behavior. List-like outputs such as sessions, Chrome profiles, auth profiles, network requests, console messages, and page errors include counts and key fields; large outputs are previewed with a `Full output path:` spill file instead of dumping the entire payload into context. For `network requests`, the wrapper shows a failed-request summary split into actionable versus benign low-impact rows, then status, method, URL, resource/mime type, request id, and, when the installed upstream output includes body-like fields, bounded redacted payload, response, and failure/error snippets. Safe request IDs also produce `details.nextActions` for exact request details, actionable failed-request source lookup candidates, filtered request lists, or starting HAR capture before a repro. If the same session has active wrapper-observed network routes, failed/pending/CORS-looking matched request rows add `details.networkRouteDiagnostics` and executable route-mock next actions before the generic request actions. `data:image` artifact rows are omitted from compact request previews but remain in raw `details.data.requests`. `network request <requestId>` can expose upstream full-detail body fields such as response bodies using the same bounded model-facing preview; its request URL stays diagnostic-only and does not overwrite `details.sessionTabTarget` for later ref guards. Clipboard failures that mention `NotAllowedError` or permission denial are usually browser/OS capability limits, not proof that a read, paste, or page mutation happened; prefer page-native reads (`snapshot -i`, `get text`, `eval --stdin`) or direct typing (`keyboard inserttext` / `keyboard type`) when the workflow allows it, and retry true clipboard flows only from an allowed profile/session on a normal `http(s)` page. Header, cookie, auth, token, and other secret-like fields are not expanded in model-facing text or `details.data`; low-risk primitive storage values may remain visible, while command echoes still redact `--body`, `--headers`, `--password`, proxy credentials, auth-bearing URLs, `clipboard write` text, cookie/storage set values, and bearer/basic credential text in positional arguments. Use upstream HAR or full raw details only when complete data is required.
|
|
723
734
|
|
|
724
735
|
## Optional package config and companion web search
|
|
725
736
|
|
|
@@ -878,14 +889,14 @@ Other useful environment variables include `AGENT_BROWSER_DEFAULT_TIMEOUT`, `AGE
|
|
|
878
889
|
<!-- agent-browser-capability-baseline:start capability-token-baseline -->
|
|
879
890
|
<!-- Generated from scripts/agent-browser-capability-baseline.mjs. Run `npm run docs -- command-reference write` to update. Do not edit manually. -->
|
|
880
891
|
<details>
|
|
881
|
-
<summary>Generated verifier capability baseline for agent-browser 0.
|
|
892
|
+
<summary>Generated verifier capability baseline for agent-browser 0.29.1</summary>
|
|
882
893
|
|
|
883
894
|
This generated block is review data for maintainers. The human-authored reference sections above remain the readable command guide.
|
|
884
895
|
|
|
885
896
|
#### Source evidence
|
|
886
897
|
- repository: `vercel-labs/agent-browser`
|
|
887
|
-
- upstream HEAD: `
|
|
888
|
-
- upstream package version: `0.
|
|
898
|
+
- upstream HEAD: `4572acf0d71c0086009206c9c1e2136fc54ec9e5`
|
|
899
|
+
- upstream package version: `0.29.1`
|
|
889
900
|
- inspected: `agent-browser --version`
|
|
890
901
|
- inspected: `agent-browser --help`
|
|
891
902
|
- inspected: `selected agent-browser <command> --help output`
|
|
@@ -896,12 +907,17 @@ This generated block is review data for maintainers. The human-authored referenc
|
|
|
896
907
|
- inspected: `agent-browser.schema.json`
|
|
897
908
|
- inspected: `cli/src/commands.rs`
|
|
898
909
|
- inspected: `cli/src/flags.rs`
|
|
910
|
+
- inspected: `packages/@agent-browser/sandbox/README.md`
|
|
911
|
+
- inspected: `packages/@agent-browser/sandbox/src/shared.ts`
|
|
912
|
+
- inspected: `packages/@agent-browser/sandbox/src/vercel.ts`
|
|
913
|
+
- inspected: `packages/@agent-browser/sandbox/src/eve.ts`
|
|
899
914
|
|
|
900
915
|
#### Upstream help commands sampled
|
|
901
916
|
- root help: `agent-browser --help`
|
|
902
917
|
- skills help: `agent-browser skills --help`
|
|
903
918
|
- skills list: `agent-browser skills list`
|
|
904
919
|
- core skill full: `agent-browser skills get core --full`
|
|
920
|
+
- vercel sandbox skill full: `agent-browser skills get vercel-sandbox --full`
|
|
905
921
|
- open help: `agent-browser open --help`
|
|
906
922
|
- click help: `agent-browser click --help`
|
|
907
923
|
- key help: `agent-browser key --help`
|
|
@@ -953,11 +969,11 @@ This generated block is review data for maintainers. The human-authored referenc
|
|
|
953
969
|
- plugin help: `agent-browser plugin --help`
|
|
954
970
|
|
|
955
971
|
#### Inventory sections
|
|
956
|
-
- Built-in skills:
|
|
972
|
+
- Built-in skills: 15 human-doc token(s), 15 upstream token(s)
|
|
957
973
|
- Core page, element, navigation, and extraction commands: 74 human-doc token(s), 74 upstream token(s)
|
|
958
974
|
- Sessions, state, tabs, frames, dialogs, and windows: 20 human-doc token(s), 16 upstream token(s)
|
|
959
975
|
- Network, storage, artifacts, diagnostics, and performance: 43 human-doc token(s), 53 upstream token(s)
|
|
960
|
-
- Batch, auth, confirmations, setup, dashboard, devices, and AI commands:
|
|
976
|
+
- Batch, auth, confirmations, setup, dashboard, devices, and AI commands: 31 human-doc token(s), 35 upstream token(s)
|
|
961
977
|
- Global flags, config, providers, policy, and environment: 121 human-doc token(s), 91 upstream token(s)
|
|
962
978
|
|
|
963
979
|
#### Human-authored doc tokens required
|
|
@@ -973,6 +989,8 @@ This generated block is review data for maintainers. The human-authored referenc
|
|
|
973
989
|
- `skills get dogfood`
|
|
974
990
|
- `skills get vercel-sandbox`
|
|
975
991
|
- `skills get agentcore`
|
|
992
|
+
- `@agent-browser/sandbox`
|
|
993
|
+
- `installSystemDependencies: false`
|
|
976
994
|
- `skills path [name]`
|
|
977
995
|
- `AGENT_BROWSER_SKILLS_DIR`
|
|
978
996
|
|
|
@@ -1139,6 +1157,7 @@ This generated block is review data for maintainers. The human-authored referenc
|
|
|
1139
1157
|
- `device list`
|
|
1140
1158
|
- `install`
|
|
1141
1159
|
- `install --with-deps`
|
|
1160
|
+
- `install --with-deps exits nonzero`
|
|
1142
1161
|
- `upgrade`
|
|
1143
1162
|
- `doctor [--fix]`
|
|
1144
1163
|
- `doctor --offline --quick`
|
|
@@ -1286,6 +1305,8 @@ This generated block is review data for maintainers. The human-authored referenc
|
|
|
1286
1305
|
- skills list: `dogfood`
|
|
1287
1306
|
- skills list: `vercel-sandbox`
|
|
1288
1307
|
- skills list: `agentcore`
|
|
1308
|
+
- vercel sandbox skill full: `@agent-browser/sandbox`
|
|
1309
|
+
- vercel sandbox skill full: `installSystemDependencies: false`
|
|
1289
1310
|
- core skill full: `agent-browser frame @e3`
|
|
1290
1311
|
- core skill full: `agent-browser dialog accept`
|
|
1291
1312
|
- core skill full: `agent-browser state save ./auth.json`
|
|
@@ -1449,6 +1470,7 @@ This generated block is review data for maintainers. The human-authored referenc
|
|
|
1449
1470
|
- root help: `dashboard start --port <n>`
|
|
1450
1471
|
- device help: `device list`
|
|
1451
1472
|
- root help: `install --with-deps`
|
|
1473
|
+
- install help: `fails if deps fail`
|
|
1452
1474
|
- root help: `upgrade`
|
|
1453
1475
|
- root help: `doctor [--fix]`
|
|
1454
1476
|
- root help: `profiles`
|
package/docs/SUPPORT_MATRIX.md
CHANGED
|
@@ -26,10 +26,10 @@ When upstream ships a new `agent-browser` or the inventory changes:
|
|
|
26
26
|
|
|
27
27
|
## Audit result
|
|
28
28
|
|
|
29
|
-
- Target upstream: `agent-browser 0.
|
|
29
|
+
- Target upstream: `agent-browser 0.29.1` (must match `CAPABILITY_BASELINE.targetVersion` in [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs)).
|
|
30
30
|
- Source of truth: `CAPABILITY_BASELINE.inventorySections` in the same file (stable `id` keys: `skills`, `core-commands`, `state-tabs-frames-dialogs`, `network-storage-artifacts-diagnostics`, `batch-auth-setup-ai`, `options-and-env`).
|
|
31
|
-
- Status: supported for the current wrapper contract after the 2026-
|
|
32
|
-
- High-priority support gaps: 2026-05-26 audit found sessionless local commands and command-scoped value flags needed sharper wrapper handling; runtime/tests/docs now cover those paths. The 0.28.0 rebaseline
|
|
31
|
+
- Status: command-surface supported for the current wrapper contract after the 2026-06-21 0.29.1 audit; release-gate evidence below was refreshed against `agent-browser 0.29.1`.
|
|
32
|
+
- High-priority support gaps: 2026-05-26 audit found sessionless local commands and command-scoped value flags needed sharper wrapper handling; runtime/tests/docs now cover those paths. The 0.28.0 rebaseline added local `mcp` and `plugin` surfaces plus plugin-backed credential login; wrapper docs/tests mark `mcp` and known `plugin` commands sessionless, with no compatibility shim for older upstream releases. The 0.29.1 rebaseline adds upstream `@agent-browser/sandbox` helper-package guidance and stricter `install --with-deps` failure semantics; no new wrapper runtime mode or bundled dependency is required. Prior upstream fixes for click reliability, frame-scoped selectors/waits, form-command fixes, daemon retry improvements, and glibc-pinned release artifacts remain thin passthrough. Remaining upstream-owned caveat: current help still mentions `wait <selector> --state hidden` / `detached` and `find ... uncheck`, but runtime probes show those advertised shapes still fail, so wrapper docs keep `wait --fn` predicates and direct `uncheck` passthrough guidance.
|
|
33
33
|
- Post-`v0.2.29` review state: commits `eb55320` through `86abbfb` add browser guidance/smoke coverage plus `RQ-0086` click-probe reduction, `RQ-0087` same-snapshot form fill batching, `RQ-0088` current-ref fallback on locator misses, `RQ-0089` direct-upstream click mutation investigation, and `RQ-0090` stop-boundary/artifact-path guidance. Verification gates below were rerun on 2026-05-18 after those tasks landed. Constrained `job` (`RQ-0064`), the lightweight `qa` preset (`RQ-0065`), the experimental `sourceLookup` helper (`RQ-0066`), the experimental `networkSourceLookup` helper (`RQ-0067`), optional Exa/Brave-backed `agent_browser_web_search` with Pi-scoped package config (`RQ-0121`), and agent recovery for search/profile configuration failures (`RQ-0122`) are implemented; see [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#job), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#qa), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sourcelookup), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#networksourcelookup), and [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#optional-companion-web-search). Reusable browser recipes (`RQ-0068`) are intentionally not adopted as a runtime surface; see [`ARCHITECTURE.md`](ARCHITECTURE.md#no-reusable-recipe-layer-yet).
|
|
34
34
|
|
|
35
35
|
## Open UX/reliability follow-ups from 2026-05-29 agent feedback
|
|
@@ -44,24 +44,25 @@ Current summary:
|
|
|
44
44
|
| RQ-0123–RQ-0127 | Stress-report wrapper fixes shipped; prompt-derived business-action blocking remains intentionally out of scope. | [`docs/support-notes.md`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/docs/support-notes.md) |
|
|
45
45
|
| RQ-0101 | Upstream `agent-browser 0.27.2` rebaseline shipped. | [`docs/support-notes.md`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/docs/support-notes.md) |
|
|
46
46
|
| RQ-0128 | Upstream `agent-browser 0.27.3` install-only rebaseline shipped; no new wrapper capability adopted. | [`docs/support-notes.md`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/docs/support-notes.md) |
|
|
47
|
+
| RQ-0129 | Upstream `agent-browser 0.29.1` rebaseline shipped; sandbox helpers are documented upstream package guidance, not a wrapper runtime. | [`docs/support-notes.md`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/docs/support-notes.md) |
|
|
47
48
|
|
|
48
49
|
## Verification evidence
|
|
49
50
|
|
|
50
|
-
Re-run the gates below before each release; this table records what the closure audit exercised. Rows marked
|
|
51
|
+
Re-run the gates below before each release; this table records what the closure audit exercised. Rows marked current were rerun under `agent-browser 0.29.1` on 2026-06-21.
|
|
51
52
|
|
|
52
53
|
| Gate | Evidence | Status |
|
|
53
54
|
| --- | --- | --- |
|
|
54
|
-
| Default local gate | `npm run verify` checks generated playbook drift, clean-builds generated `dist/`, runs `tsc --noEmit`, unit/fake tests, generated command-reference blocks, and live command-reference sampling. | **Current for 0.
|
|
55
|
+
| Default local gate | `npm run verify` checks generated playbook drift, clean-builds generated `dist/`, runs `tsc --noEmit`, unit/fake tests, generated command-reference blocks, and live command-reference sampling. | **Current for 0.29.1:** pass on 2026-06-21 inside `npm run verify -- release`; command-reference verification also passed standalone after the rebaseline. |
|
|
55
56
|
| Pre-PR local gate | `npm run verify -- pre-pr` composes the default gate with package-content verification. Use before larger local handoffs or PR-ready claims when lifecycle/platform/live dogfood cost is not warranted. | Added 2026-06-10; orchestration is locked by `test/project-verify.test.ts` and does not change release mode. |
|
|
56
|
-
| Real upstream contract | `npm run verify -- real-upstream` runs the localhost fixture matrix against the real installed `agent-browser` matching the baseline. | **Current for 0.
|
|
57
|
-
| Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads the packaged `agent_browser` tool without requiring optional Brave config, and executes fake-upstream `--version`. | **Current for 0.
|
|
58
|
-
| Startup profile | `npm run verify -- startup-profile --samples <n>` clean-builds generated `dist/`, records direct package entrypoint import/factory timing in fresh Node processes, and writes `.artifacts/startup-profile/latest.json`. It must not launch Pi, tmux, mise, npm, browsers, or `agent-browser`; full Pi TUI ready-prompt profiling is intentionally excluded after it proved too invasive for routine verification. Run this opt-in evidence when package layout, the compiled entrypoint, top-level imports, schema registration, or prompt/config startup logic changes. | **Current for compiled entrypoint:** pass on 2026-06-
|
|
59
|
-
| Deterministic dogfood smoke | `npm run verify -- dogfood` (`scripts/verify-agent-browser-dogfood.ts`) drives the native wrapper against a local file fixture through top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close with the real `agent-browser` on `PATH`. | **Current for 0.
|
|
60
|
-
| Efficiency benchmark | `npm run verify -- benchmark` runs deterministic browser workflow accounting plus focused benchmark tests, including JSONL sampling fixtures and job/qa/sourceLookup/networkSourceLookup/Electron scenario coverage. | **
|
|
61
|
-
| Crabbox platform smoke | `npm run check:platform-smoke` syntax-checks the harness and cheap invariants. `npm run smoke:platform:ubuntu-image` builds the project-owned Linux image, `npm run smoke:platform:doctor` checks Crabbox 0.26.0+ and local target readiness, and `npm run smoke:platform:all` runs doctor first, then fast target-local `platform-build` (`npm run verify -- platform-target`, pack, clean Pi install) plus `browser-dogfood-smoke` on Crabbox `macos`, `ubuntu`, and `windows-native`; see [`platform-smoke.md`](platform-smoke.md). Target artifacts include Crabbox/provider/work-root metadata, and release review also checks provider-specific `crabbox list` commands for leftover leases/clones. | **Current for 0.
|
|
62
|
-
| `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with the configured-source lifecycle harness, packaged Pi smoke, and the release-blocking Crabbox platform matrix (`verifySteps` `release` in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits standalone real-upstream, host-only dogfood, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). | **Current for 0.
|
|
63
|
-
| Configured-source lifecycle | `npm run verify -- lifecycle` (`scripts/verify-lifecycle.mjs`) drives `/reload`, closes and relaunches Pi with the same exact `--session-id`, checks the JSONL session header id, session continuity, slash-command sentinel tokens (`v1` before reload and `v2` after full relaunch because compiled JS package modules are process-cached), persisted spill reachability, and real Pi `tool_result` failure-patch semantics for a QA reclassification with a fake upstream on `PATH`. Default Pi model is `zai/glm-5.2`; default per-step wait is **180000 ms** (`DEFAULT_TIMEOUT_MS`); override model with `--model <id>` and waits with `--timeout-ms <ms>`. Passthrough flags in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs): `--keep-artifacts`, `--model`, `--verbose`, and `--timeout-ms` plus a value (for example `npm run verify -- lifecycle --model openai-codex/gpt-5.5:minimal --keep-artifacts --verbose --timeout-ms 600000`). | **Current for 0.
|
|
64
|
-
| Quick isolated Pi smoke | `pi --approve --no-extensions --no-skills -e . --tools agent_browser` from trusted repo root; native `agent_browser` only. | **
|
|
57
|
+
| Real upstream contract | `npm run verify -- real-upstream` runs the localhost fixture matrix against the real installed `agent-browser` matching the baseline. | **Current for 0.29.1:** pass on 2026-06-21 (`npm run verify -- real-upstream`; localhost fixture matrix and plugin list probe passed against installed `agent-browser 0.29.1`). |
|
|
58
|
+
| Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads the packaged `agent_browser` tool without requiring optional Brave config, and executes fake-upstream `--version`. | **Current for 0.29.1:** pass on 2026-06-21 as part of `npm run verify -- release` (`verify-package.mjs --smoke-pi`; packaged `agent_browser --version` invocation passed). |
|
|
59
|
+
| Startup profile | `npm run verify -- startup-profile --samples <n>` clean-builds generated `dist/`, records direct package entrypoint import/factory timing in fresh Node processes, and writes `.artifacts/startup-profile/latest.json`. It must not launch Pi, tmux, mise, npm, browsers, or `agent-browser`; full Pi TUI ready-prompt profiling is intentionally excluded after it proved too invasive for routine verification. Run this opt-in evidence when package layout, the compiled entrypoint, top-level imports, schema registration, or prompt/config startup logic changes. | **Current for compiled entrypoint:** pass on 2026-06-21 (`npm run verify -- startup-profile --samples 3`; direct compiled entrypoint import+factory median 47.3 ms, below the 250 ms budget). Full-Pi startup numbers from the unsafe tmux profiler are not accepted as ongoing release evidence. |
|
|
60
|
+
| Deterministic dogfood smoke | `npm run verify -- dogfood` (`scripts/verify-agent-browser-dogfood.ts`) drives the native wrapper against a local file fixture through top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close with the real `agent-browser` on `PATH`. | **Current for 0.29.1:** pass on 2026-06-21 (`npm run verify -- dogfood`; `qa-url`, fresh/current opens, semantic click, job screenshot artifact verification, and close all passed). |
|
|
61
|
+
| Efficiency benchmark | `npm run verify -- benchmark` runs deterministic browser workflow accounting plus focused benchmark tests, including JSONL sampling fixtures and job/qa/sourceLookup/networkSourceLookup/Electron scenario coverage. | **Current:** pass on 2026-06-21 (`npm run verify -- benchmark`; 13/13 deterministic scenarios passed). |
|
|
62
|
+
| Crabbox platform smoke | `npm run check:platform-smoke` syntax-checks the harness and cheap invariants. `npm run smoke:platform:ubuntu-image` builds the project-owned Linux image, `npm run smoke:platform:doctor` checks Crabbox 0.26.0+ and local target readiness, and `npm run smoke:platform:all` runs doctor first, then fast target-local `platform-build` (`npm run verify -- platform-target`, pack, clean Pi install) plus `browser-dogfood-smoke` on Crabbox `macos`, `ubuntu`, and `windows-native`; see [`platform-smoke.md`](platform-smoke.md). Target artifacts include Crabbox/provider/work-root metadata, and release review also checks provider-specific `crabbox list` commands for leftover leases/clones. | **Current for 0.29.1:** pass on 2026-06-21 inside `npm run verify -- release`; rebuilt Ubuntu image `pi-agent-browser-native-platform:node24-agent-browser0.29.1`, refreshed the Windows `crabbox-ready` template snapshot to `agent-browser 0.29.1`, doctor passed, then Crabbox platform smoke passed for macOS, Ubuntu, and native Windows. |
|
|
63
|
+
| `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with the configured-source lifecycle harness, packaged Pi smoke, and the release-blocking Crabbox platform matrix (`verifySteps` `release` in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits standalone real-upstream, host-only dogfood, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). | **Current for 0.29.1:** pass on 2026-06-21 (`npm run verify -- release`), including default unit/fake gate, generated docs checks, live command-reference sampling, lifecycle harness, packaged Pi smoke, and macOS/Ubuntu/native-Windows Crabbox platform smoke. |
|
|
64
|
+
| Configured-source lifecycle | `npm run verify -- lifecycle` (`scripts/verify-lifecycle.mjs`) drives `/reload`, closes and relaunches Pi with the same exact `--session-id`, checks the JSONL session header id, session continuity, slash-command sentinel tokens (`v1` before reload and `v2` after full relaunch because compiled JS package modules are process-cached), persisted spill reachability, and real Pi `tool_result` failure-patch semantics for a QA reclassification with a fake upstream on `PATH`. Default Pi model is `zai/glm-5.2`; default per-step wait is **180000 ms** (`DEFAULT_TIMEOUT_MS`); override model with `--model <id>` and waits with `--timeout-ms <ms>`. Passthrough flags in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs): `--keep-artifacts`, `--model`, `--verbose`, and `--timeout-ms` plus a value (for example `npm run verify -- lifecycle --model openai-codex/gpt-5.5:minimal --keep-artifacts --verbose --timeout-ms 600000`). | **Current for 0.29.1:** pass on 2026-06-21 as part of `npm run verify -- release`; managed browser session continuity and persisted full output verified before cleanup. |
|
|
65
|
+
| Quick isolated Pi smoke | `pi --approve --no-extensions --no-skills -e . --tools agent_browser` from trusted repo root; native `agent_browser` only. | **Current for 0.29.1:** pass on 2026-06-21 via tmux with `pi --approve --no-extensions --no-skills -e . --model openai-codex/gpt-5.5:minimal --tools agent_browser`. Covered the public Sauce Demo checkout-overview flow with clean context, native sorting/click/fill flow, screenshot and recording evidence, console/page-error/network diagnostics, and no order placement. A one-line screenshot-plus-recording close-guard smoke on `https://example.com` passed after rebuilding `dist/`, proving close succeeds after both artifact paths are verified. Temp artifacts and tmux sessions were cleaned after evidence capture. |
|
|
65
66
|
|
|
66
67
|
Runtime floor note: package metadata keeps Pi core package peer ranges wildcard per installed Pi package docs, but `pi-agent-browser-doctor` / `npm run doctor` treats `pi --version` below 0.79.0 as a setup failure. This keeps package dependency shape aligned with Pi package loading while still making unsupported host Pi versions a release and first-run blocker.
|
|
67
68
|
|
|
@@ -69,11 +70,11 @@ Runtime floor note: package metadata keeps Pi core package peer ranges wildcard
|
|
|
69
70
|
|
|
70
71
|
| Baseline section | Baseline items | Documentation | Runtime handling | Test coverage | Validation status |
|
|
71
72
|
| --- | --- | --- | --- | --- | --- |
|
|
72
|
-
| Built-in skills |
|
|
73
|
+
| Built-in skills | 15 canonical tokens from baseline section `skills`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#built-in-skills). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#built-in-skills), generated baseline block, README proof section, release docs. | `needsManagedSession` keeps read-only skills inspection sessionless while preserving thin upstream passthrough; upstream `@agent-browser/sandbox` remains external package guidance, not a bundled wrapper dependency. | Runtime and extension-validation skills/provider matrix; real-upstream inspection/skills group. | Supported. |
|
|
73
74
|
| Core page, element, navigation, and extraction commands | 74 canonical tokens from baseline section `core-commands`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#core-page-and-element-commands). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#core-page-and-element-commands), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md), README quick start. | Thin passthrough with wrapper-owned JSON/session planning, ref guidance, artifact verification, page-change summaries, click-dispatch diagnostics, no-op scroll/focus diagnostics, shorthand compilers, and redaction. | Real-upstream core matrix plus fake core matrix for passthrough, ordering, diagnostics, and compiler validation. | Supported. Upstream semantics remain upstream-owned. |
|
|
74
75
|
| Sessions, state, tabs, frames, dialogs, and windows | 20 canonical tokens from baseline section `state-tabs-frames-dialogs`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#session-state-frames-dialogs-windows-and-inspection-commands). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#session-state-frames-dialogs-windows-and-inspection-commands), stateful workflow notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Stateful summaries/redaction, state artifact handling, sessionless local command planning, managed-session restore, tab target pinning, and close alias cleanup. | Extension-validation stateful matrix, runtime session/resume tests, presentation redaction tests, lifecycle harness. | Supported. External profile/auth state remains operator-owned. |
|
|
75
|
-
| Network, storage, artifacts, diagnostics, and performance |
|
|
76
|
-
| Batch, auth, confirmations, setup, dashboard, devices, and AI commands |
|
|
76
|
+
| Network, storage, artifacts, diagnostics, and performance | 43 canonical tokens from baseline section `network-storage-artifacts-diagnostics`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage), diagnostic sections, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Thin passthrough plus compact diagnostics, route-mock warnings, useful-but-redacted storage output, stream idempotency normalization, artifact metadata, missing-ffmpeg warnings, sensitive-data redaction, timeout bounds, and cleanup-pair guidance. | Fake non-core matrix and safe real-upstream coverage for network/HAR, diff, trace/profiler, console/errors/highlight, stream, vitals, and React missing-renderer. | Supported. Environment-sensitive operations need suitable local/browser state. |
|
|
77
|
+
| Batch, auth, confirmations, setup, dashboard, devices, and AI commands | 31 canonical tokens from baseline section `batch-auth-setup-ai`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup), README security notes, release docs. | Native-tool batch stdin, generated `job`/`qa`/lookup batch plans, auth/confirmation redaction, sessionless local auth/setup/dashboard/doctor/plugin planning, plugin list/show JSON envelope normalization, bare-`mcp` validation with `mcp --help` preserved, timeout/cleanup guidance. | Parser/runtime plugin and MCP unit coverage; fake-upstream plugin list/show and MCP help/blocking coverage; real-upstream plugin list shape probe; structured input-mode tests; efficiency benchmark scenarios. | Supported. Interactive side-effecting setup/auth/chat remains upstream-owned. `plugin` is local/sessionless; `mcp` is external-client-only except help; `auth login --credential-provider` resolves credentials via a plugin; `install --with-deps` failures remain upstream-owned. |
|
|
77
78
|
| Global flags, config, providers, policy, and environment | 121 canonical tokens from baseline section `options-and-env`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#important-global-flags-config-and-environment). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#important-global-flags-config-and-environment), README provider/setup notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sessionmode), architecture/runtime docs. | Runtime handles command discovery, value-flag prevalidation, launch-scoped flags, redacted echoes, fresh-session recovery hints, explicit sessions, provider/device launch-scoping, parent env forwarding with wrapper overrides, subprocess completion, and package-owned Pi-scoped config for optional companion features. | Runtime tests for flags/planning/redaction/session behavior; process tests for env and stdio-linger completion; config/web-search/CLI tests; fake provider/specialized-skill matrix; package doctor. | Supported. Provider clouds, iOS/Appium, proxies, profiles, and credentials require external setup. |
|
|
78
79
|
|
|
79
80
|
## Follow-up decision after closure
|
package/docs/TOOL_CONTRACT.md
CHANGED
|
@@ -126,7 +126,7 @@ For link and button text, use the **exact** visible label from the latest `snaps
|
|
|
126
126
|
|
|
127
127
|
## Wrapper `--json`
|
|
128
128
|
|
|
129
|
-
The extension always plans normal browser commands with `--json` prepended in `effectiveArgs` so upstream returns structured JSON for presentation and `details`. **Do not** include `--json` in caller `args`; it is unnecessary and can confuse planning or transcript hooks that treat caller-requested JSON differently. Plain-text inspection (`--help`, `--version`) keeps its own output shape. Read-only skills and local/setup commands such as `skills list` / `skills get` / `skills path`, local auth profile management (`auth save/list/show/delete/remove`), `profiles`, `dashboard`, `device list`, `doctor`, `install`, `upgrade`, `session list`, and targeted/all local saved-state maintenance including `state clear --all`, `state clear -a`, and named `state clear <session-name>` skip implicit session injection as documented under `sessionMode
|
|
129
|
+
The extension always plans normal browser commands with `--json` prepended in `effectiveArgs` so upstream returns structured JSON for presentation and `details`. **Do not** include `--json` in caller `args`; it is unnecessary and can confuse planning or transcript hooks that treat caller-requested JSON differently. Plain-text inspection (`--help`, `--version`) keeps its own output shape. Read-only skills and local/setup commands such as `skills list` / `skills get` / `skills path`, local auth profile management (`auth save/list/show/delete/remove`), `profiles`, `dashboard`, `device list`, `doctor`, `install`, `upgrade`, `session list`, `plugin add/list/show/run`, `mcp --help`, and targeted/all local saved-state maintenance including `state clear --all`, `state clear -a`, and named `state clear <session-name>` skip implicit session injection as documented under `sessionMode`; bare `mcp` server calls are rejected before spawn because they are for external MCP clients.
|
|
130
130
|
|
|
131
131
|
## Headed and local fixture limits
|
|
132
132
|
|
|
@@ -157,9 +157,9 @@ The extension always plans normal browser commands with `--json` prepended in `e
|
|
|
157
157
|
- For batch chains that touch cookies, storage, auth, or other secret-bearing commands, use details.batchSteps for per-step artifacts, categories, spill paths, and full structured errors; top-level details.data on batch is only a compact redacted step matrix (success, argv-redacted command, redacted result or scrubbed error text) built from the same presentation rules as standalone calls.
|
|
158
158
|
- For non-core families, pass current upstream commands through the native tool directly: network route/requests/har (including request filters like --type/--method/--status), diff snapshot/screenshot/url with scoped/baseline options, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, device list for iOS simulator inventory, and chat. For compact network requests output, prefer details.nextActions for request detail, route-mock diagnostics, actionable failed-request networkSourceLookup, filtering, clearing the aggregate buffer before repro, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done; stream enable already-enabled outcomes are treated as idempotent success with status/disable follow-ups.
|
|
159
159
|
- For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: "tabs" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close commands (`close`, `quit`, or `exit`) only close the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.
|
|
160
|
-
- For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.
|
|
160
|
+
- For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Hosted sandbox workflows should use upstream @agent-browser/sandbox helpers outside this wrapper. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.
|
|
161
161
|
- For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; dialog commands and eval snippets that look like alert/confirm/prompt/dialog triggers are shorter-bounded than normal browser calls, and timed-out dialog-like interactions may add inspect-dialog-after-timeout, dismiss-dialog-after-timeout, or recover-fresh-session-after-dialog-timeout nextActions. When --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.
|
|
162
|
-
- If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.
|
|
162
|
+
- If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.29.1, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort: use explicit --timeout or top-level timeoutMs for legitimately slow waits, and treat a successful payload like "waited":"timeout" as elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.
|
|
163
163
|
- For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.
|
|
164
164
|
- For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.
|
|
165
165
|
- For downloads, prefer download <selector> <path> when an element click should save a file; simple loopback anchor downloads are saved to the requested path when the wrapper can resolve an HTTP(S) href. Do not rely on click alone when you need the downloaded file on disk.
|
|
@@ -606,7 +606,7 @@ Behavior:
|
|
|
606
606
|
- if `args` already include `--session` (including argv compiled from optional `semanticAction.session`), upstream session choice wins
|
|
607
607
|
- `"auto"` prepends the current extension-managed active session when appropriate
|
|
608
608
|
- `"fresh"` rotates that managed session to a fresh upstream launch so startup-scoped flags like `--profile`, `--executable-path`, `--session-name`, `--cdp`, `--state`, `--auto-connect`, `--init-script`, `--enable`, `-p` / `--provider`, or iOS `--device` apply and later default calls follow the new browser
|
|
609
|
-
- sessionless paths skip that injection even under `"auto"`: plain-text `--help` / `-h` / `--version` / `-V` (see the generated inspection playbook fragment below), read-only `skills list`, `skills get …`, and `skills path …`, local auth profile management (`auth save/list/show/delete/remove`), local/setup commands (`profiles`, `dashboard start/stop`, `device list`, `doctor`, `install`, `upgrade`, `session list`), and targeted/all local saved-state maintenance (`state list/show`, `state clear --all`, `state clear -a`, `state clear <session-name>`, `state clean --older-than <days>`, `state rename`) keep `effectiveArgs` free of the implicit managed `--session` unless the caller supplied `--session` explicitly; successful results therefore omit `usedImplicitSession` and the extension-managed `sessionName` for those calls, while root `session`, untargeted `state clear`, bare `state clean`, browser-backed `auth login`, and `state save/load` keep normal managed-session injection (`extensions/agent-browser/lib/command-policy.ts`, `needsManagedSession`; `extensions/agent-browser/lib/runtime.ts`, `buildExecutionPlan`)
|
|
609
|
+
- sessionless paths skip that injection even under `"auto"`: plain-text `--help` / `-h` / `--version` / `-V` (see the generated inspection playbook fragment below), read-only `skills list`, `skills get …`, and `skills path …`, local auth profile management (`auth save/list/show/delete/remove`), local/setup commands (`profiles`, `dashboard start/stop`, `device list`, `doctor`, `install`, `upgrade`, `session list`, `plugin add/list/show/run`, `mcp --help`), and targeted/all local saved-state maintenance (`state list/show`, `state clear --all`, `state clear -a`, `state clear <session-name>`, `state clean --older-than <days>`, `state rename`) keep `effectiveArgs` free of the implicit managed `--session` unless the caller supplied `--session` explicitly; successful results therefore omit `usedImplicitSession` and the extension-managed `sessionName` for those calls, while bare `mcp` server calls are rejected before spawn and root `session`, untargeted `state clear`, bare `state clean`, browser-backed `auth login`, and `state save/load` keep normal managed-session injection (`extensions/agent-browser/lib/command-policy.ts`, `needsManagedSession`; `extensions/agent-browser/lib/runtime.ts`, `buildExecutionPlan`)
|
|
610
610
|
|
|
611
611
|
Recommended use:
|
|
612
612
|
- use `"auto"` for the common browse/snapshot/click flow inside one `pi` session
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-agent-browser-native",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.55",
|
|
4
4
|
"description": "pi extension that exposes agent-browser as a native tool for browser automation",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"author": "Mitch Fultz (https://github.com/fitchmultz)",
|
|
@@ -63,16 +63,18 @@
|
|
|
63
63
|
"typebox": "*"
|
|
64
64
|
},
|
|
65
65
|
"devDependencies": {
|
|
66
|
-
"@earendil-works/pi-ai": "0.79.
|
|
67
|
-
"@earendil-works/pi-coding-agent": "0.79.
|
|
68
|
-
"@earendil-works/pi-tui": "0.79.
|
|
66
|
+
"@earendil-works/pi-ai": "^0.79.8",
|
|
67
|
+
"@earendil-works/pi-coding-agent": "^0.79.8",
|
|
68
|
+
"@earendil-works/pi-tui": "^0.79.8",
|
|
69
69
|
"@types/node": "^25.9.3",
|
|
70
70
|
"tsx": "^4.21.0",
|
|
71
71
|
"typebox": "^1.1.38",
|
|
72
72
|
"typescript": "^6.0.3"
|
|
73
73
|
},
|
|
74
74
|
"overrides": {
|
|
75
|
-
"basic-ftp": "6.0.1"
|
|
75
|
+
"basic-ftp": "6.0.1",
|
|
76
|
+
"esbuild": "0.28.1",
|
|
77
|
+
"protobufjs": "7.6.4"
|
|
76
78
|
},
|
|
77
79
|
"scripts": {
|
|
78
80
|
"docs": "node ./scripts/project.mjs docs",
|
|
@@ -95,5 +97,19 @@
|
|
|
95
97
|
"prepack": "npm run build",
|
|
96
98
|
"prepare": "node ./scripts/prepare.mjs"
|
|
97
99
|
},
|
|
98
|
-
"packageManager": "npm@11.14.0"
|
|
100
|
+
"packageManager": "npm@11.14.0",
|
|
101
|
+
"peerDependenciesMeta": {
|
|
102
|
+
"@earendil-works/pi-ai": {
|
|
103
|
+
"optional": true
|
|
104
|
+
},
|
|
105
|
+
"@earendil-works/pi-coding-agent": {
|
|
106
|
+
"optional": true
|
|
107
|
+
},
|
|
108
|
+
"@earendil-works/pi-tui": {
|
|
109
|
+
"optional": true
|
|
110
|
+
},
|
|
111
|
+
"typebox": {
|
|
112
|
+
"optional": true
|
|
113
|
+
}
|
|
114
|
+
}
|
|
99
115
|
}
|
|
@@ -14,8 +14,8 @@ export const COMMAND_REFERENCE_BASELINE_BLOCK_IDS = Object.freeze(["upstream-bas
|
|
|
14
14
|
|
|
15
15
|
const sourceEvidence = Object.freeze({
|
|
16
16
|
repository: "vercel-labs/agent-browser",
|
|
17
|
-
upstreamHead: "
|
|
18
|
-
upstreamPackageVersion: "0.
|
|
17
|
+
upstreamHead: "4572acf0d71c0086009206c9c1e2136fc54ec9e5",
|
|
18
|
+
upstreamPackageVersion: "0.29.1",
|
|
19
19
|
inspectedSources: Object.freeze([
|
|
20
20
|
"agent-browser --version",
|
|
21
21
|
"agent-browser --help",
|
|
@@ -27,6 +27,10 @@ const sourceEvidence = Object.freeze({
|
|
|
27
27
|
"agent-browser.schema.json",
|
|
28
28
|
"cli/src/commands.rs",
|
|
29
29
|
"cli/src/flags.rs",
|
|
30
|
+
"packages/@agent-browser/sandbox/README.md",
|
|
31
|
+
"packages/@agent-browser/sandbox/src/shared.ts",
|
|
32
|
+
"packages/@agent-browser/sandbox/src/vercel.ts",
|
|
33
|
+
"packages/@agent-browser/sandbox/src/eve.ts",
|
|
30
34
|
]),
|
|
31
35
|
});
|
|
32
36
|
|
|
@@ -46,6 +50,7 @@ const helpCommands = Object.freeze([
|
|
|
46
50
|
helpCommand("skills help", ["skills", "--help"]),
|
|
47
51
|
helpCommand("skills list", ["skills", "list"]),
|
|
48
52
|
helpCommand("core skill full", ["skills", "get", "core", "--full"]),
|
|
53
|
+
helpCommand("vercel sandbox skill full", ["skills", "get", "vercel-sandbox", "--full"]),
|
|
49
54
|
helpCommand("open help", ["open", "--help"]),
|
|
50
55
|
helpCommand("click help", ["click", "--help"]),
|
|
51
56
|
helpCommand("key help", ["key", "--help"]),
|
|
@@ -113,6 +118,8 @@ const inventorySections = Object.freeze([
|
|
|
113
118
|
"skills get dogfood",
|
|
114
119
|
"skills get vercel-sandbox",
|
|
115
120
|
"skills get agentcore",
|
|
121
|
+
"@agent-browser/sandbox",
|
|
122
|
+
"installSystemDependencies: false",
|
|
116
123
|
"skills path [name]",
|
|
117
124
|
"AGENT_BROWSER_SKILLS_DIR",
|
|
118
125
|
],
|
|
@@ -127,6 +134,8 @@ const inventorySections = Object.freeze([
|
|
|
127
134
|
["skills list", "dogfood"],
|
|
128
135
|
["skills list", "vercel-sandbox"],
|
|
129
136
|
["skills list", "agentcore"],
|
|
137
|
+
["vercel sandbox skill full", "@agent-browser/sandbox"],
|
|
138
|
+
["vercel sandbox skill full", "installSystemDependencies: false"],
|
|
130
139
|
["core skill full", "agent-browser frame @e3"],
|
|
131
140
|
["core skill full", "agent-browser dialog accept"],
|
|
132
141
|
["core skill full", "agent-browser state save ./auth.json"],
|
|
@@ -459,6 +468,7 @@ const inventorySections = Object.freeze([
|
|
|
459
468
|
"device list",
|
|
460
469
|
"install",
|
|
461
470
|
"install --with-deps",
|
|
471
|
+
"install --with-deps exits nonzero",
|
|
462
472
|
"upgrade",
|
|
463
473
|
"doctor [--fix]",
|
|
464
474
|
"doctor --offline --quick",
|
|
@@ -481,6 +491,7 @@ const inventorySections = Object.freeze([
|
|
|
481
491
|
root("dashboard start --port <n>"),
|
|
482
492
|
["device help", "device list"],
|
|
483
493
|
root("install --with-deps"),
|
|
494
|
+
["install help", "fails if deps fail"],
|
|
484
495
|
root("upgrade"),
|
|
485
496
|
root("doctor [--fix]"),
|
|
486
497
|
root("profiles"),
|
|
@@ -731,7 +742,7 @@ const inventorySections = Object.freeze([
|
|
|
731
742
|
]);
|
|
732
743
|
|
|
733
744
|
export const CAPABILITY_BASELINE = Object.freeze({
|
|
734
|
-
targetVersion: "0.
|
|
745
|
+
targetVersion: "0.29.1",
|
|
735
746
|
sourceEvidence,
|
|
736
747
|
helpCommands,
|
|
737
748
|
inventorySections,
|
|
@@ -62,7 +62,7 @@ Environment:
|
|
|
62
62
|
PLATFORM_SMOKE_MAC_USER macOS SSH user; default $USER
|
|
63
63
|
PLATFORM_SMOKE_MAC_WORK_ROOT macOS Crabbox work root
|
|
64
64
|
PLATFORM_SMOKE_MAC_PORT macOS SSH port; default 22
|
|
65
|
-
PLATFORM_SMOKE_UBUNTU_IMAGE Ubuntu local-container image; default pi-agent-browser-native-platform:node24-agent-
|
|
65
|
+
PLATFORM_SMOKE_UBUNTU_IMAGE Ubuntu local-container image; default ${config?.ubuntuContainerImage ?? "pi-agent-browser-native-platform:node24-agent-browser<target>"}
|
|
66
66
|
PLATFORM_SMOKE_WINDOWS_VM Parallels Windows template VM
|
|
67
67
|
PLATFORM_SMOKE_WINDOWS_SNAPSHOT Parallels snapshot name
|
|
68
68
|
PLATFORM_SMOKE_WINDOWS_USER Windows SSH user
|