pi-agent-browser-native 0.2.52 → 0.2.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,37 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## 0.2.54 - 2026-06-19
6
+
7
+ ### Fixed
8
+
9
+ - Accepted upstream `plugin list` / `plugin show` JSON and blocked bare `mcp` native-tool calls while preserving `mcp --help`.
10
+
11
+ ### Validation
12
+
13
+ - Ran `npm run verify -- release` against `agent-browser` `0.28.0`; the gate passed default verification, command-reference checks, build, lifecycle verification, packaged Pi smoke, and macOS/Ubuntu/Windows-native platform smoke.
14
+ - Ran `npm run verify -- real-upstream`, `npm run docs`, `npm run doctor`, `npm run check:platform-smoke`, `npm run smoke:platform:ubuntu-image`, `npm run smoke:platform:doctor`, and `git diff --check`.
15
+ - Ran a tmux-driven Pi checkout dogfood with `pi --approve --no-extensions --no-skills -e .`, covering `--version`, `mcp --help`, `plugin list`, fresh `example.com` open plus `snapshot -i`, `qa` on `react.dev`, and browser close.
16
+
17
+ ## 0.2.53 - 2026-06-18
18
+
19
+ ### Changed
20
+
21
+ - Rebaselined upstream capability metadata, command reference, support matrix, platform-smoke image tag, and real-upstream output-shape metadata for `agent-browser` `0.28.0` / vercel-labs/agent-browser@6323df571ffd17d14e60ec19fcb56cc1caf498ab.
22
+ - Documented upstream `mcp`, `plugin add/list/show/run`, plugin-backed `auth login --credential-provider`, and `AGENT_BROWSER_PLUGINS` surfaces while keeping the wrapper thin and compatibility-shim-free.
23
+ - Marked `mcp` and known `plugin` commands as sessionless wrapper calls so local/infra commands do not get an implicit managed browser session.
24
+ - Collapsed duplicated release/platform-smoke prose across README, release docs, and agent guidance in favor of `docs/platform-smoke.md` as the detailed source of truth.
25
+ - Simplified duplicate internal schema/job compiler plumbing without changing the public tool schema or generated argv behavior.
26
+
27
+ ### Fixed
28
+
29
+ - Retried the Windows platform dogfood smoke once after transient first browser-open failures, matching the existing Windows browser prewarm tolerance while preserving real dogfood failures.
30
+
31
+ ### Validation
32
+
33
+ - Ran `npm run verify -- release` against `agent-browser` `0.28.0`; the gate passed default verification, command-reference checks, build, lifecycle verification, packaged Pi smoke, and macOS/Ubuntu/Windows-native platform smoke after refreshing the Ubuntu image and Windows `crabbox-ready` snapshot.
34
+ - Ran `npm run verify -- real-upstream`, `npm run verify -- dogfood`, `npm run docs`, `npm run verify -- command-reference`, and `git diff --check`.
35
+
5
36
  ## 0.2.52 - 2026-06-15
6
37
 
7
38
  ### Changed
package/README.md CHANGED
@@ -85,7 +85,7 @@ The result is optimized for agent work:
85
85
  | Recording workflows fail late when `ffmpeg` is missing | After successful `record start` / `record restart`, warns when `ffmpeg` is not on `PATH` so agents can install or fix PATH before `record stop` | [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#details), [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#diff-debug-and-streaming), `test/agent-browser.extension-validation.test.ts` |
86
86
  | Direct binary help may be blocked in agent sessions | Publishes a repo-readable command reference and verifies it against the target upstream version | `npm run verify` |
87
87
  | Desktop Electron apps need discovery, CDP attach, and safe teardown | Top-level `electron` runs host `list` / isolated `launch` (temp profile, OS-chosen debug port) / `status` / `probe` / `cleanup`, merges `launchId` plus managed `sessionName`, supports `handoff` `snapshot` / `tabs` / `connect`, and surfaces mismatch and post-command health guidance; wrapper cleanup applies only to launches it created | `extensions/agent-browser/lib/electron/discovery.ts`, `launch.ts`, `cleanup.ts`, [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#electron), [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#electron-desktop-apps) |
88
- | Agents need bundled `skills` text and local setup/status commands without touching the live session | Treats `skills list`, `skills get …`, `skills path …`, local auth profile management (`auth save/list/show/delete/remove`), `profiles`, `dashboard`, `device list`, `doctor`, `install`, `upgrade`, `session list`, and targeted/all saved-state maintenance (`state clear --all`, `state clear -a`, named clear, or `state clean --older-than <days>`) as sessionless reads/actions: no implicit managed `--session` under default `sessionMode: "auto"` (same session-ownership goal as plain-text `--help` / `--version`), while provider and browser-backed workflows stay thin passthroughs that require upstream setup and credentials | [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#built-in-skills), `extensions/agent-browser/lib/command-policy.ts`, `extensions/agent-browser/lib/runtime.ts` |
88
+ | Agents need bundled `skills` text and local setup/status commands without touching the live session | Treats `skills list`, `skills get …`, `skills path …`, local auth profile management (`auth save/list/show/delete/remove`), `profiles`, `dashboard`, `device list`, `doctor`, `install`, `upgrade`, `session list`, `plugin add/list/show/run`, `mcp --help`, and targeted/all saved-state maintenance (`state clear --all`, `state clear -a`, named clear, or `state clean --older-than <days>`) as sessionless reads/actions: no implicit managed `--session` under default `sessionMode: "auto"` (same session-ownership goal as plain-text `--help` / `--version`), while bare `mcp` server calls are blocked and provider/browser-backed workflows stay thin passthroughs that require upstream setup and credentials | [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#built-in-skills), `extensions/agent-browser/lib/command-policy.ts`, `extensions/agent-browser/lib/runtime.ts` |
89
89
 
90
90
  ## Fastest way to try it
91
91
 
@@ -449,7 +449,7 @@ For asynchronous exports, click first and then wait for the download:
449
449
  { "args": ["wait", "--download", "/tmp/report.csv"] }
450
450
  ```
451
451
 
452
- When a user gives exact artifact paths for screenshots, recordings, downloads, PDFs, traces, or HAR files, use those paths or explicitly report why the artifact was unavailable; do not silently substitute a different path in the final report. The wrapper creates missing parent directories for direct artifact paths such as `state save`, screenshots, PDFs, downloads, and `wait --download`. For simple loopback `download <selector> <path>` anchor links with HTTP(S) `href`, it can save the in-page response directly to the requested path before falling back to upstream click/download behavior; non-loopback/profile downloads stay upstream-owned. With upstream `agent-browser 0.27.3`, treat `details.savedFilePath` as upstream-reported metadata and confirm `details.artifacts[].exists` / `details.artifactVerification.verified` before relying on the requested `wait --download <path>` file being present on disk; non-file download payloads such as `data:` URLs are not verified local artifacts.
452
+ When a user gives exact artifact paths for screenshots, recordings, downloads, PDFs, traces, or HAR files, use those paths or explicitly report why the artifact was unavailable; do not silently substitute a different path in the final report. The wrapper creates missing parent directories for direct artifact paths such as `state save`, screenshots, PDFs, downloads, and `wait --download`. For simple loopback `download <selector> <path>` anchor links with HTTP(S) `href`, it can save the in-page response directly to the requested path before falling back to upstream click/download behavior; non-loopback/profile downloads stay upstream-owned. With current upstream `agent-browser`, treat `details.savedFilePath` as upstream-reported metadata and confirm `details.artifacts[].exists` / `details.artifactVerification.verified` before relying on the requested `wait --download <path>` file being present on disk; non-file download payloads such as `data:` URLs are not verified local artifacts.
453
453
 
454
454
  For evidence-only screenshots or QA captures, branch on `details.artifactVerification` and `details.artifacts` before reporting PASS/FAIL; inline image attachments are optional when size limits allow—do not require vision review unless the user asked for visual inspection. If the latest prompt names exact required artifact paths, browser close can be blocked with `details.promptGuard` until those artifacts are saved and verified.
455
455
 
@@ -613,18 +613,7 @@ npm run verify -- dogfood
613
613
 
614
614
  That mode drives the native wrapper through top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close against a deterministic local fixture. It complements, but does not replace, the interactive Pi/tmux release dogfood in [`docs/RELEASE.md`](docs/RELEASE.md#pre-release-checks).
615
615
 
616
- Cross-platform release coverage uses Crabbox to run macOS, Ubuntu Linux, and native Windows target suites:
617
-
618
- ```bash
619
- npm run check:platform-smoke
620
- npm run smoke:platform:ubuntu-image
621
- npm run smoke:platform:doctor
622
- npm run smoke:platform:all
623
- ```
624
-
625
- The required matrix is documented in [`docs/platform-smoke.md`](docs/platform-smoke.md). It runs `platform-build` (fast target-local verify, pack, clean packed Pi install with `--approve`, `pi list --approve`) and `browser-dogfood-smoke` (real `agent-browser`/browser wrapper smoke) on every target. Inspect `.artifacts/platform-smoke/` and check `crabbox list --provider local-container` plus `crabbox list --provider parallels` after release runs so cleanup proof is not chat-only.
626
-
627
- For package release confidence, follow [`docs/RELEASE.md`](docs/RELEASE.md). The release gate is:
616
+ Cross-platform release coverage uses Crabbox to run macOS, Ubuntu Linux, and native Windows target suites; see [`docs/platform-smoke.md`](docs/platform-smoke.md) for the required matrix, standalone coverage (`npm run smoke:platform:all` and per-target `smoke:platform:macos` / `:ubuntu` / `:windows-native`), and artifact/lease inspection. The release gate is:
628
617
 
629
618
  ```bash
630
619
  npm run doctor
@@ -634,7 +623,7 @@ npm run smoke:platform:doctor
634
623
  npm run verify -- release
635
624
  ```
636
625
 
637
- `npm run verify -- release` includes the default verification gate, packaged Pi smoke coverage, and the release-blocking Crabbox platform matrix. The package also has a `prepublishOnly` hook that runs the same release gate and `npm pack --dry-run` during `npm publish`.
626
+ `npm run verify -- release` includes the default verification gate, packaged Pi smoke coverage, and the release-blocking Crabbox platform matrix (the same matrix `npm run smoke:platform:all` runs standalone). For the full maintainer release flow, follow [`docs/RELEASE.md`](docs/RELEASE.md). The package also has a `prepublishOnly` hook that runs the same release gate and `npm pack --dry-run` during `npm publish`.
638
627
 
639
628
  ## How it works
640
629
 
@@ -5,6 +5,7 @@
5
5
  */
6
6
  import { hasOnlyBooleanFlags, hasOnlyOptionFlags, isNonFlagToken, stripSessionlessShapeGlobalFlags } from "./argv-grammar.js";
7
7
  const SESSIONLESS_AUTH_SUBCOMMANDS = new Set(["save", "list", "show", "delete", "remove"]);
8
+ const PLUGIN_SESSIONLESS_SUBCOMMANDS = new Set(["list", "show", "add", "run"]);
8
9
  const EMPTY_BOOLEAN_FLAGS = new Set();
9
10
  const JSON_BOOLEAN_FLAGS = new Set(["--json"]);
10
11
  const AUTH_SAVE_BOOLEAN_FLAGS = new Set(["--json", "--password-stdin"]);
@@ -57,6 +58,12 @@ function isSessionlessStateCommand(commandTokens) {
57
58
  return false;
58
59
  return secondArg === undefined || (secondArg === "--all" && rest.length === 0);
59
60
  }
61
+ function isSessionlessPluginCommand(commandTokens) {
62
+ const [, subcommand] = commandTokens;
63
+ if (subcommand === undefined)
64
+ return true;
65
+ return PLUGIN_SESSIONLESS_SUBCOMMANDS.has(subcommand);
66
+ }
60
67
  function isSessionlessCommand(commandTokens) {
61
68
  const normalizedTokens = stripSessionlessShapeGlobalFlags(commandTokens);
62
69
  const [command, subcommand] = normalizedTokens;
@@ -64,6 +71,10 @@ function isSessionlessCommand(commandTokens) {
64
71
  return ["list", "get", "path"].includes(subcommand ?? "");
65
72
  if (command === "auth")
66
73
  return isSessionlessAuthCommand(normalizedTokens);
74
+ if (command === "plugin")
75
+ return isSessionlessPluginCommand(normalizedTokens);
76
+ if (command === "mcp")
77
+ return true;
67
78
  if (command === "dashboard")
68
79
  return isSessionlessDashboardCommand(normalizedTokens);
69
80
  if (command === "device")
@@ -190,18 +190,21 @@ function compilePathArtifactJobStep(step, action) {
190
190
  return { error: result.error };
191
191
  return { args: action === "waitForDownload" ? ["wait", "--download", result.value] : ["screenshot", result.value] };
192
192
  }
193
- const JOB_STEP_DESCRIPTORS = {
194
- assertText: { allowedFields: JOB_STEP_ALLOWED_FIELDS.assertText, compile: compileAssertTextJobStep },
195
- assertUrl: { allowedFields: JOB_STEP_ALLOWED_FIELDS.assertUrl, compile: compileAssertUrlJobStep },
196
- click: { allowedFields: JOB_STEP_ALLOWED_FIELDS.click, compile: compileClickJobStep },
197
- fill: { allowedFields: JOB_STEP_ALLOWED_FIELDS.fill, compile: compileFillJobStep },
198
- open: { allowedFields: JOB_STEP_ALLOWED_FIELDS.open, compile: compileOpenJobStep },
199
- screenshot: { allowedFields: JOB_STEP_ALLOWED_FIELDS.screenshot, compile: (step) => compilePathArtifactJobStep(step, "screenshot") },
200
- select: { allowedFields: JOB_STEP_ALLOWED_FIELDS.select, compile: compileSelectJobStep },
201
- snapshot: { allowedFields: JOB_STEP_ALLOWED_FIELDS.snapshot, compile: () => ({ args: ["snapshot", "-i"] }) },
202
- type: { allowedFields: JOB_STEP_ALLOWED_FIELDS.type, compile: compileTypeJobStep },
203
- wait: { allowedFields: JOB_STEP_ALLOWED_FIELDS.wait, compile: compileWaitJobStep },
204
- waitForDownload: { allowedFields: JOB_STEP_ALLOWED_FIELDS.waitForDownload, compile: (step) => compilePathArtifactJobStep(step, "waitForDownload") },
193
+ // ponytail: allowedFields for each action live in JOB_STEP_ALLOWED_FIELDS (same key
194
+ // alignment enforced by Record<AgentBrowserJobStepAction, …>), so the compiler map no
195
+ // longer mirrors that set per entry; the call site looks it up by action.
196
+ const JOB_STEP_COMPILERS = {
197
+ assertText: compileAssertTextJobStep,
198
+ assertUrl: compileAssertUrlJobStep,
199
+ click: compileClickJobStep,
200
+ fill: compileFillJobStep,
201
+ open: compileOpenJobStep,
202
+ screenshot: (step) => compilePathArtifactJobStep(step, "screenshot"),
203
+ select: compileSelectJobStep,
204
+ snapshot: () => ({ args: ["snapshot", "-i"] }),
205
+ type: compileTypeJobStep,
206
+ wait: compileWaitJobStep,
207
+ waitForDownload: (step) => compilePathArtifactJobStep(step, "waitForDownload"),
205
208
  };
206
209
  export function compileAgentBrowserJob(input) {
207
210
  if (!isRecord(input)) {
@@ -226,11 +229,11 @@ export function compileAgentBrowserJob(input) {
226
229
  return { error: `job.steps[${index}].action must be one of: ${AGENT_BROWSER_JOB_STEP_ACTIONS.join(", ")}.` };
227
230
  }
228
231
  const jobAction = action;
229
- const descriptor = JOB_STEP_DESCRIPTORS[jobAction];
230
- const unsupportedFieldError = getUnsupportedJobStepFieldError(rawStep, jobAction, descriptor.allowedFields);
232
+ const compile = JOB_STEP_COMPILERS[jobAction];
233
+ const unsupportedFieldError = getUnsupportedJobStepFieldError(rawStep, jobAction, JOB_STEP_ALLOWED_FIELDS[jobAction]);
231
234
  if (unsupportedFieldError)
232
235
  return { error: `job.steps[${index}]: ${unsupportedFieldError}` };
233
- const compiledStep = descriptor.compile(rawStep, index);
236
+ const compiledStep = compile(rawStep, index);
234
237
  if (compiledStep.error)
235
238
  return { error: compiledStep.error.startsWith(`job.steps[${index}]`) ? compiledStep.error : `job.steps[${index}]: ${compiledStep.error}` };
236
239
  steps.push({ action: jobAction, args: compiledStep.args, generatedFrom: compiledStep.generatedFrom }, ...(compiledStep.extraSteps ?? []));
@@ -7,6 +7,21 @@ import { JsonSchema } from "../json-schema.js";
7
7
  import { StringEnum as localStringEnum } from "../string-enum-schema.js";
8
8
  import { ELECTRON_DISCOVERY_DEFAULT_MAX_RESULTS, ELECTRON_DISCOVERY_MAX_RESULTS, } from "../electron/discovery.js";
9
9
  import { AGENT_BROWSER_ELECTRON_HANDOFFS, AGENT_BROWSER_ELECTRON_TARGET_TYPES, AGENT_BROWSER_JOB_STEP_ACTIONS, AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS, AGENT_BROWSER_QA_LOAD_STATES, AGENT_BROWSER_SEMANTIC_ACTIONS, AGENT_BROWSER_SEMANTIC_LOCATORS, DEFAULT_SESSION_MODE, SOURCE_LOOKUP_MAX_WORKSPACE_FILES, } from "./types.js";
10
+ // ponytail: the four electron.launch variants differ only in their single target field
11
+ // (appPath/appName/bundleId/executablePath); the action literal and the shared optional
12
+ // launch fields are identical, so a helper keeps the duplicate schema blocks in sync.
13
+ function electronLaunchVariant(Type, StringEnum, targetField) {
14
+ return Type.Object({
15
+ action: StringEnum(["launch"], { description: "Launch an Electron app with an isolated wrapper-owned profile." }),
16
+ ...targetField,
17
+ appArgs: Type.Optional(Type.Array(Type.String({ description: "Argument passed to the Electron application.", minLength: 1 }), { description: "Optional Electron app argv. Wrapper-owned lifecycle/debug flags are rejected." })),
18
+ handoff: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_HANDOFFS, { description: "Post-launch handoff depth. Defaults to snapshot." })),
19
+ targetType: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_TARGET_TYPES, { description: "Preferred CDP target type. Defaults to page." })),
20
+ timeoutMs: Type.Optional(Type.Integer({ description: "Bounded launch timeout in milliseconds.", minimum: 1 })),
21
+ allow: Type.Optional(Type.Array(Type.String({ description: "App identifier allowed by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned allow list for electron.launch policy checks." })),
22
+ deny: Type.Optional(Type.Array(Type.String({ description: "App identifier denied by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned deny list for electron.launch policy checks; deny wins over allow." })),
23
+ }, { additionalProperties: false });
24
+ }
10
25
  export function createAgentBrowserParamsSchema(Type = JsonSchema, StringEnum = localStringEnum) {
11
26
  return Type.Object({
12
27
  args: Type.Optional(Type.Array(Type.String({ description: "Exact agent-browser CLI arguments, excluding the binary name. Do not pass --json; the wrapper injects it. First-call recipe: open → snapshot -i → click/fill @eN → snapshot -i." }), {
@@ -71,46 +86,10 @@ export function createAgentBrowserParamsSchema(Type = JsonSchema, StringEnum = l
71
86
  query: Type.Optional(Type.String({ description: "Optional case-insensitive substring filter for electron.list across app name, bundle id, desktop id, and paths.", minLength: 1 })),
72
87
  maxResults: Type.Optional(Type.Integer({ description: `Maximum electron.list apps to return. Defaults to ${ELECTRON_DISCOVERY_DEFAULT_MAX_RESULTS}; values above ${ELECTRON_DISCOVERY_MAX_RESULTS} are clamped.`, minimum: 1 })),
73
88
  }, { additionalProperties: false }),
74
- Type.Object({
75
- action: StringEnum(["launch"], { description: "Launch an Electron app with an isolated wrapper-owned profile." }),
76
- appPath: Type.String({ description: "Electron launch target: macOS .app bundle path. Exactly one launch target is required for electron.launch.", minLength: 1 }),
77
- appArgs: Type.Optional(Type.Array(Type.String({ description: "Argument passed to the Electron application.", minLength: 1 }), { description: "Optional Electron app argv. Wrapper-owned lifecycle/debug flags are rejected." })),
78
- handoff: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_HANDOFFS, { description: "Post-launch handoff depth. Defaults to snapshot." })),
79
- targetType: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_TARGET_TYPES, { description: "Preferred CDP target type. Defaults to page." })),
80
- timeoutMs: Type.Optional(Type.Integer({ description: "Bounded launch timeout in milliseconds.", minimum: 1 })),
81
- allow: Type.Optional(Type.Array(Type.String({ description: "App identifier allowed by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned allow list for electron.launch policy checks." })),
82
- deny: Type.Optional(Type.Array(Type.String({ description: "App identifier denied by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned deny list for electron.launch policy checks; deny wins over allow." })),
83
- }, { additionalProperties: false }),
84
- Type.Object({
85
- action: StringEnum(["launch"], { description: "Launch an Electron app with an isolated wrapper-owned profile." }),
86
- appName: Type.String({ description: "Electron launch target: app display name discovered by electron.list. Exactly one launch target is required for electron.launch.", minLength: 1 }),
87
- appArgs: Type.Optional(Type.Array(Type.String({ description: "Argument passed to the Electron application.", minLength: 1 }), { description: "Optional Electron app argv. Wrapper-owned lifecycle/debug flags are rejected." })),
88
- handoff: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_HANDOFFS, { description: "Post-launch handoff depth. Defaults to snapshot." })),
89
- targetType: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_TARGET_TYPES, { description: "Preferred CDP target type. Defaults to page." })),
90
- timeoutMs: Type.Optional(Type.Integer({ description: "Bounded launch timeout in milliseconds.", minimum: 1 })),
91
- allow: Type.Optional(Type.Array(Type.String({ description: "App identifier allowed by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned allow list for electron.launch policy checks." })),
92
- deny: Type.Optional(Type.Array(Type.String({ description: "App identifier denied by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned deny list for electron.launch policy checks; deny wins over allow." })),
93
- }, { additionalProperties: false }),
94
- Type.Object({
95
- action: StringEnum(["launch"], { description: "Launch an Electron app with an isolated wrapper-owned profile." }),
96
- bundleId: Type.String({ description: "Electron launch target: macOS bundle identifier discovered by electron.list. Exactly one launch target is required for electron.launch.", minLength: 1 }),
97
- appArgs: Type.Optional(Type.Array(Type.String({ description: "Argument passed to the Electron application.", minLength: 1 }), { description: "Optional Electron app argv. Wrapper-owned lifecycle/debug flags are rejected." })),
98
- handoff: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_HANDOFFS, { description: "Post-launch handoff depth. Defaults to snapshot." })),
99
- targetType: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_TARGET_TYPES, { description: "Preferred CDP target type. Defaults to page." })),
100
- timeoutMs: Type.Optional(Type.Integer({ description: "Bounded launch timeout in milliseconds.", minimum: 1 })),
101
- allow: Type.Optional(Type.Array(Type.String({ description: "App identifier allowed by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned allow list for electron.launch policy checks." })),
102
- deny: Type.Optional(Type.Array(Type.String({ description: "App identifier denied by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned deny list for electron.launch policy checks; deny wins over allow." })),
103
- }, { additionalProperties: false }),
104
- Type.Object({
105
- action: StringEnum(["launch"], { description: "Launch an Electron app with an isolated wrapper-owned profile." }),
106
- executablePath: Type.String({ description: "Electron launch target: executable path. Discovery is not required when this is provided. Exactly one launch target is required for electron.launch.", minLength: 1 }),
107
- appArgs: Type.Optional(Type.Array(Type.String({ description: "Argument passed to the Electron application.", minLength: 1 }), { description: "Optional Electron app argv. Wrapper-owned lifecycle/debug flags are rejected." })),
108
- handoff: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_HANDOFFS, { description: "Post-launch handoff depth. Defaults to snapshot." })),
109
- targetType: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_TARGET_TYPES, { description: "Preferred CDP target type. Defaults to page." })),
110
- timeoutMs: Type.Optional(Type.Integer({ description: "Bounded launch timeout in milliseconds.", minimum: 1 })),
111
- allow: Type.Optional(Type.Array(Type.String({ description: "App identifier allowed by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned allow list for electron.launch policy checks." })),
112
- deny: Type.Optional(Type.Array(Type.String({ description: "App identifier denied by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned deny list for electron.launch policy checks; deny wins over allow." })),
113
- }, { additionalProperties: false }),
89
+ electronLaunchVariant(Type, StringEnum, { appPath: Type.String({ description: "Electron launch target: macOS .app bundle path. Exactly one launch target is required for electron.launch.", minLength: 1 }) }),
90
+ electronLaunchVariant(Type, StringEnum, { appName: Type.String({ description: "Electron launch target: app display name discovered by electron.list. Exactly one launch target is required for electron.launch.", minLength: 1 }) }),
91
+ electronLaunchVariant(Type, StringEnum, { bundleId: Type.String({ description: "Electron launch target: macOS bundle identifier discovered by electron.list. Exactly one launch target is required for electron.launch.", minLength: 1 }) }),
92
+ electronLaunchVariant(Type, StringEnum, { executablePath: Type.String({ description: "Electron launch target: executable path. Discovery is not required when this is provided. Exactly one launch target is required for electron.launch.", minLength: 1 }) }),
114
93
  Type.Object({
115
94
  action: StringEnum(["status", "cleanup"], { description: "Inspect or cleanup one wrapper-tracked Electron launch by launchId." }),
116
95
  launchId: Type.String({ description: "Wrapper launch id for electron.status and electron.cleanup.", minLength: 1 }),
@@ -66,6 +66,13 @@ export async function parseAgentBrowserEnvelope(options) {
66
66
  if (!isRecord(parsed)) {
67
67
  return { parseError: "agent-browser returned JSON, but it was not an object envelope." };
68
68
  }
69
+ const keys = Object.keys(parsed);
70
+ if (keys.length === 1 && keys[0] === "plugins" && Array.isArray(parsed.plugins)) {
71
+ return { envelope: { success: true, data: { plugins: parsed.plugins } } };
72
+ }
73
+ if (keys.length === 1 && keys[0] === "plugin" && isRecord(parsed.plugin) && !Array.isArray(parsed.plugin)) {
74
+ return { envelope: { success: true, data: { plugin: parsed.plugin } } };
75
+ }
69
76
  if (!("success" in parsed)) {
70
77
  return { parseError: "agent-browser returned an invalid JSON envelope: missing boolean success field." };
71
78
  }
@@ -533,6 +533,14 @@ function getSingleKeyCommandValidationError(args) {
533
533
  const label = command === "key" ? "key/press" : command;
534
534
  return `agent-browser ${label} accepts exactly one key argument. Do not pass a selector or ref to ${label}; focus or click the target first, then run ${command} <key> (for example: focus @e1, then press Enter).`;
535
535
  }
536
+ function getBareMcpValidationError(args) {
537
+ const { commandInfo, commandTokens } = parseArgvDescriptor(args);
538
+ if (commandInfo.command !== "mcp")
539
+ return undefined;
540
+ if (commandTokens.includes("--help") || commandTokens.includes("-h"))
541
+ return undefined;
542
+ return "agent-browser mcp starts a stdio MCP server for external MCP clients, not a one-shot native agent_browser tool workflow. Use the native agent_browser tool modes directly, or configure an MCP client to launch `agent-browser mcp`. Use `mcp --help` for help.";
543
+ }
536
544
  export function validateToolArgs(args) {
537
545
  if (args.length === 0) {
538
546
  return "`args` must contain at least one agent-browser command token.";
@@ -545,7 +553,7 @@ export function validateToolArgs(args) {
545
553
  if (sessionModeArg) {
546
554
  return "Do not pass `--session-mode` in args. Use the top-level agent_browser `sessionMode` field instead, for example { args: [\"--profile\", \"Default\", \"open\", \"https://example.com\"], sessionMode: \"fresh\" }.";
547
555
  }
548
- return getSingleKeyCommandValidationError(args);
556
+ return getBareMcpValidationError(args) ?? getSingleKeyCommandValidationError(args);
549
557
  }
550
558
  function getInvalidValueFlagDetails(args) {
551
559
  for (let index = 0; index < args.length; index += 1) {
@@ -18,7 +18,7 @@ This project intentionally blocks normal `agent-browser` bash usage in most agen
18
18
 
19
19
  <!-- agent-browser-capability-baseline:start upstream-baseline -->
20
20
  <!-- Generated from scripts/agent-browser-capability-baseline.mjs. Run `npm run docs -- command-reference write` to update. Do not edit manually. -->
21
- This reference is baselined to the locally installed `agent-browser 0.27.3` command/help surface, audited against vercel-labs/agent-browser@2c7991c9eccca1c9db6eee1a26a713414778de5a. Upstream `agent-browser` remains the source of truth for command semantics; this file is the local fallback for Pi agent sessions where direct binary help is blocked or discouraged.
21
+ This reference is baselined to the locally installed `agent-browser 0.28.0` command/help surface, audited against vercel-labs/agent-browser@6323df571ffd17d14e60ec19fcb56cc1caf498ab. Upstream `agent-browser` remains the source of truth for command semantics; this file is the local fallback for Pi agent sessions where direct binary help is blocked or discouraged.
22
22
 
23
23
  The lightweight drift check is `npm run verify -- command-reference`. Run it whenever the installed upstream `agent-browser` version changes or this reference is edited.
24
24
 
@@ -36,6 +36,17 @@ The 0.27.3 rebaseline is an install-only compatibility update: upstream changed
36
36
  - warm CLI command latency and batch daemon respawn/retry improvements
37
37
  - GNU Linux release artifacts pinned to glibc 2.28
38
38
 
39
+ ### Upstream 0.28.0 rebaseline
40
+
41
+ The 0.28.0 rebaseline tracks new local/infra upstream surfaces and does not change core browser-command semantics. New agent-facing surface captured by the capability baseline:
42
+
43
+ - `mcp` starts a local MCP stdio server exposing agent-browser tools. It is intended for external MCP clients that spawn `agent-browser mcp` as a subprocess; an agent inside pi would not normally invoke it, and the wrapper treats it as sessionless (no managed browser session injected).
44
+ - `plugin add <ref>`, `plugin [list]`, `plugin show <name>`, and `plugin run <name> <type>` manage configured plugins in `agent-browser.json` (added from npm or GitHub); all are sessionless in the wrapper.
45
+ - `auth login <name> --credential-provider <plugin>` resolves credentials just-in-time from a configured credential plugin (for example, a vault); credentials are not saved locally.
46
+ - `AGENT_BROWSER_PLUGINS` is a JSON plugin registry override.
47
+
48
+ The wrapper adds no compatibility shim for older upstream releases.
49
+
39
50
  ## Core mental model
40
51
 
41
52
  Input mode chooser (one per call): **`args`** for the default open → snapshot -i → click/fill `@refs` flow; **`semanticAction`** for stable role/text/label targets; **`job`** / **`qa`** for multi-step checks; **`electron`** for desktop apps only; **`sourceLookup`** / **`networkSourceLookup`** are **experimental candidates-only** helpers (not authoritative mappings). Do not pass `--json` in `args`—the wrapper injects it. Match link and button text to the latest snapshot (on `https://example.com/` the main link is `Learn more`, not legacy `More information...` copy). See [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#input-mode-chooser) for snapshot variants (`-i` vs `--compact` vs full) and batching three or more getters.
@@ -140,7 +151,7 @@ Use `vitals [url]` for Core Web Vitals plus React hydration timing when availabl
140
151
  { "args": ["pushstate", "/dashboard?tab=settings"] }
141
152
  ```
142
153
 
143
- For first-navigation setup, start on `about:blank`, then stage routes, cookies, or init scripts before navigating. The relevant v0.27.3 surfaces, unchanged from the prior baseline, are `network route <url> [--abort|--body <json>] [--resource-type <csv>]` and `cookies set --curl <file>`:
154
+ For first-navigation setup, start on `about:blank`, then stage routes, cookies, or init scripts before navigating. The relevant current upstream surfaces are `network route <url> [--abort|--body <json>] [--resource-type <csv>]` and `cookies set --curl <file>`:
144
155
 
145
156
  ```json
146
157
  { "args": ["open"], "sessionMode": "fresh" }
@@ -359,7 +370,7 @@ For one-call flows, put the click and wait in `batch`; the wait step keeps the s
359
370
  { "args": ["batch"], "stdin": "[[\"click\",\"@export\"],[\"wait\",\"--download\",\"/tmp/report.csv\"]]" }
360
371
  ```
361
372
 
362
- A successful wait-based download renders a readable summary such as `Download completed: /tmp/report.csv` and exposes top-level `details.savedFilePath` plus `details.savedFile` for non-batch calls. With the current upstream `agent-browser 0.27.3`, `wait --download <path>` may report the requested path before this environment can verify that the file was persisted there. Treat `details.savedFilePath` as upstream-reported metadata unless `details.artifacts[].exists` is true. Upstream tracking: [vercel-labs/agent-browser#1300](https://github.com/vercel-labs/agent-browser/issues/1300).
373
+ A successful wait-based download renders a readable summary such as `Download completed: /tmp/report.csv` and exposes top-level `details.savedFilePath` plus `details.savedFile` for non-batch calls. With current upstream `agent-browser`, `wait --download <path>` may report the requested path before this environment can verify that the file was persisted there. Treat `details.savedFilePath` as upstream-reported metadata unless `details.artifacts[].exists` is true. Upstream tracking: [vercel-labs/agent-browser#1300](https://github.com/vercel-labs/agent-browser/issues/1300).
363
374
 
364
375
  ### Download, screenshot, and PDF files
365
376
 
@@ -639,7 +650,7 @@ For dense pages, the wrapper also accepts `snapshot -i --search <text>` and `sna
639
650
  | `wait --download [path]` | Wait for a download started by a previous action and optionally save it to `path`; successful wrapper results include upstream-reported `savedFilePath`/`savedFile`, while `details.artifacts[].exists` is the wrapper's on-disk verification signal. |
640
651
  | `wait --download [path] --timeout <ms>` | Set download-start timeout in milliseconds. The native Pi wrapper forwards explicit wait timeouts and extends the subprocess watchdog unless the caller supplies top-level `timeoutMs`. |
641
652
 
642
- Current v0.27.3 source still does not parse `wait <selector> --state hidden` / `wait <selector> --state detached` as distinct wait modes even though upstream help mentions those examples. Use `wait --fn "!document.querySelector('#spinner')"` or another explicit JavaScript predicate for disappearance/detach checks until upstream parser support exists.
653
+ Current upstream source still does not parse `wait <selector> --state hidden` / `wait <selector> --state detached` as distinct wait modes even though upstream help mentions those examples. Use `wait --fn "!document.querySelector('#spinner')"` or another explicit JavaScript predicate for disappearance/detach checks until upstream parser support exists.
643
654
 
644
655
  ### Diff, debug, and streaming
645
656
 
@@ -700,9 +711,16 @@ Long-running or lifecycle commands should be explicitly paired with cleanup call
700
711
  | `install --with-deps` | Install browser binaries plus Linux system dependencies. |
701
712
  | `upgrade` | Upgrade `agent-browser` to the latest version. |
702
713
  | `doctor [--fix]` | Diagnose install issues and optionally auto-clean stale files. Use `doctor --offline --quick` for a fast local-only check and `doctor --json` for structured output. |
714
+ | `plugin add <ref>` | Add a plugin from npm or GitHub (`<owner>/<repo>` or `@scope/<name>`); writes `agent-browser.json`. Flags such as `--name`, `--capability`, `--global`, and `--no-manifest` shape discovery. |
715
+ | `plugin [list]` | List configured plugins (default subcommand); `{ "plugins": [...] }` is a successful sessionless result. |
716
+ | `plugin show <name>` | Show one configured plugin; `{ "plugin": {...} }` is a successful sessionless result. |
717
+ | `plugin run <name> <type>` | Run a `command.run` or custom plugin request over the agent-browser plugin stdio protocol. |
718
+ | `auth login <name> --credential-provider <plugin>` | Resolve credentials just-in-time from a configured credential plugin (e.g. a vault) instead of saved passwords; pair with `--item <ref>` and optional selector overrides. Credentials are not stored locally. |
719
+ | `mcp --help` | Show MCP server help through the native tool. |
720
+ | `mcp` | Start a local MCP stdio server for external MCP clients; bare native-tool calls are rejected before spawn. |
703
721
  | `profiles` | List available Chrome profiles. |
704
722
 
705
- When these commands are invoked through the native `agent_browser` tool, structured diagnostic/status outputs are rendered as compact summaries. Local inspection/setup calls (`auth save/list/show/delete/remove`, `dashboard start/stop`, `device list`, `doctor`, `install`, `upgrade`, `profiles`, `session list`, `state list/show/rename`, `state clean --older-than <days>`, `state clear --all`, `state clear -a`, and `state clear <session-name>`) are sessionless unless you explicitly pass `--session`; context-dependent calls such as root `session`, untargeted `state clear`, `auth login`, `chat`, and `state save/load` keep normal session behavior. List-like outputs such as sessions, Chrome profiles, auth profiles, network requests, console messages, and page errors include counts and key fields; large outputs are previewed with a `Full output path:` spill file instead of dumping the entire payload into context. For `network requests`, the wrapper shows a failed-request summary split into actionable versus benign low-impact rows, then status, method, URL, resource/mime type, request id, and, when the installed upstream output includes body-like fields, bounded redacted payload, response, and failure/error snippets. Safe request IDs also produce `details.nextActions` for exact request details, actionable failed-request source lookup candidates, filtered request lists, or starting HAR capture before a repro. If the same session has active wrapper-observed network routes, failed/pending/CORS-looking matched request rows add `details.networkRouteDiagnostics` and executable route-mock next actions before the generic request actions. `data:image` artifact rows are omitted from compact request previews but remain in raw `details.data.requests`. `network request <requestId>` can expose upstream full-detail body fields such as response bodies using the same bounded model-facing preview; its request URL stays diagnostic-only and does not overwrite `details.sessionTabTarget` for later ref guards. Clipboard failures that mention `NotAllowedError` or permission denial are usually browser/OS capability limits, not proof that a read, paste, or page mutation happened; prefer page-native reads (`snapshot -i`, `get text`, `eval --stdin`) or direct typing (`keyboard inserttext` / `keyboard type`) when the workflow allows it, and retry true clipboard flows only from an allowed profile/session on a normal `http(s)` page. Header, cookie, auth, token, and other secret-like fields are not expanded in model-facing text or `details.data`; low-risk primitive storage values may remain visible, while command echoes still redact `--body`, `--headers`, `--password`, proxy credentials, auth-bearing URLs, `clipboard write` text, cookie/storage set values, and bearer/basic credential text in positional arguments. Use upstream HAR or full raw details only when complete data is required.
723
+ When these commands are invoked through the native `agent_browser` tool, structured diagnostic/status outputs are rendered as compact summaries. Local inspection/setup calls (`auth save/list/show/delete/remove`, `dashboard start/stop`, `device list`, `doctor`, `install`, `upgrade`, `profiles`, `session list`, `plugin add/list/show/run`, `state list/show/rename`, `state clean --older-than <days>`, `state clear --all`, `state clear -a`, and `state clear <session-name>`) are sessionless unless you explicitly pass `--session`; bare `mcp` server calls are blocked except help. Context-dependent calls such as root `session`, untargeted `state clear`, `auth login`, `chat`, and `state save/load` keep normal session behavior. List-like outputs such as sessions, Chrome profiles, auth profiles, network requests, console messages, and page errors include counts and key fields; large outputs are previewed with a `Full output path:` spill file instead of dumping the entire payload into context. For `network requests`, the wrapper shows a failed-request summary split into actionable versus benign low-impact rows, then status, method, URL, resource/mime type, request id, and, when the installed upstream output includes body-like fields, bounded redacted payload, response, and failure/error snippets. Safe request IDs also produce `details.nextActions` for exact request details, actionable failed-request source lookup candidates, filtered request lists, or starting HAR capture before a repro. If the same session has active wrapper-observed network routes, failed/pending/CORS-looking matched request rows add `details.networkRouteDiagnostics` and executable route-mock next actions before the generic request actions. `data:image` artifact rows are omitted from compact request previews but remain in raw `details.data.requests`. `network request <requestId>` can expose upstream full-detail body fields such as response bodies using the same bounded model-facing preview; its request URL stays diagnostic-only and does not overwrite `details.sessionTabTarget` for later ref guards. Clipboard failures that mention `NotAllowedError` or permission denial are usually browser/OS capability limits, not proof that a read, paste, or page mutation happened; prefer page-native reads (`snapshot -i`, `get text`, `eval --stdin`) or direct typing (`keyboard inserttext` / `keyboard type`) when the workflow allows it, and retry true clipboard flows only from an allowed profile/session on a normal `http(s)` page. Header, cookie, auth, token, and other secret-like fields are not expanded in model-facing text or `details.data`; low-risk primitive storage values may remain visible, while command echoes still redact `--body`, `--headers`, `--password`, proxy credentials, auth-bearing URLs, `clipboard write` text, cookie/storage set values, and bearer/basic credential text in positional arguments. Use upstream HAR or full raw details only when complete data is required.
706
724
 
707
725
  ## Optional package config and companion web search
708
726
 
@@ -824,6 +842,7 @@ Browser default config is conservative: it adds agent guidance for signed-in/acc
824
842
  - `-v, --verbose`: show tool commands and raw output.
825
843
  - `-q, --quiet`: show only AI text responses.
826
844
  - `--debug`: debug output. Environment: `AGENT_BROWSER_DEBUG`.
845
+ - `AGENT_BROWSER_PLUGINS`: JSON plugin registry override for the upstream `plugin` commands.
827
846
  - `--version`, `-V`: show version.
828
847
 
829
848
  ### Config precedence
@@ -860,17 +879,19 @@ Other useful environment variables include `AGENT_BROWSER_DEFAULT_TIMEOUT`, `AGE
860
879
  <!-- agent-browser-capability-baseline:start capability-token-baseline -->
861
880
  <!-- Generated from scripts/agent-browser-capability-baseline.mjs. Run `npm run docs -- command-reference write` to update. Do not edit manually. -->
862
881
  <details>
863
- <summary>Generated verifier capability baseline for agent-browser 0.27.3</summary>
882
+ <summary>Generated verifier capability baseline for agent-browser 0.28.0</summary>
864
883
 
865
884
  This generated block is review data for maintainers. The human-authored reference sections above remain the readable command guide.
866
885
 
867
886
  #### Source evidence
868
887
  - repository: `vercel-labs/agent-browser`
869
- - upstream HEAD: `2c7991c9eccca1c9db6eee1a26a713414778de5a`
870
- - upstream package version: `0.27.3`
888
+ - upstream HEAD: `6323df571ffd17d14e60ec19fcb56cc1caf498ab`
889
+ - upstream package version: `0.28.0`
871
890
  - inspected: `agent-browser --version`
872
891
  - inspected: `agent-browser --help`
873
892
  - inspected: `selected agent-browser <command> --help output`
893
+ - inspected: `agent-browser mcp --help`
894
+ - inspected: `agent-browser plugin --help`
874
895
  - inspected: `README.md`
875
896
  - inspected: `CHANGELOG.md`
876
897
  - inspected: `agent-browser.schema.json`
@@ -929,14 +950,16 @@ This generated block is review data for maintainers. The human-authored referenc
929
950
  - install help: `agent-browser install --help`
930
951
  - upgrade help: `agent-browser upgrade --help`
931
952
  - profiles help: `agent-browser profiles --help`
953
+ - mcp help: `agent-browser mcp --help`
954
+ - plugin help: `agent-browser plugin --help`
932
955
 
933
956
  #### Inventory sections
934
957
  - Built-in skills: 13 human-doc token(s), 13 upstream token(s)
935
958
  - Core page, element, navigation, and extraction commands: 74 human-doc token(s), 74 upstream token(s)
936
959
  - Sessions, state, tabs, frames, dialogs, and windows: 20 human-doc token(s), 16 upstream token(s)
937
960
  - Network, storage, artifacts, diagnostics, and performance: 43 human-doc token(s), 53 upstream token(s)
938
- - Batch, auth, confirmations, setup, dashboard, devices, and AI commands: 24 human-doc token(s), 24 upstream token(s)
939
- - Global flags, config, providers, policy, and environment: 120 human-doc token(s), 90 upstream token(s)
961
+ - Batch, auth, confirmations, setup, dashboard, devices, and AI commands: 30 human-doc token(s), 34 upstream token(s)
962
+ - Global flags, config, providers, policy, and environment: 121 human-doc token(s), 91 upstream token(s)
940
963
 
941
964
  #### Human-authored doc tokens required
942
965
  ##### Built-in skills
@@ -1121,6 +1144,12 @@ This generated block is review data for maintainers. The human-authored referenc
1121
1144
  - `doctor [--fix]`
1122
1145
  - `doctor --offline --quick`
1123
1146
  - `doctor --json`
1147
+ - `mcp`
1148
+ - `plugin add <ref>`
1149
+ - `plugin [list]`
1150
+ - `plugin show <name>`
1151
+ - `plugin run <name> <type>`
1152
+ - `auth login <name> --credential-provider <plugin>`
1124
1153
  - `profiles`
1125
1154
 
1126
1155
  ##### Global flags, config, providers, policy, and environment
@@ -1194,6 +1223,7 @@ This generated block is review data for maintainers. The human-authored referenc
1194
1223
  - `AGENT_BROWSER_CONFIRM_INTERACTIVE`
1195
1224
  - `-p, --provider <name>`
1196
1225
  - `AGENT_BROWSER_PROVIDER`
1226
+ - `AGENT_BROWSER_PLUGINS`
1197
1227
  - `browserbase`
1198
1228
  - `kernel`
1199
1229
  - `browseruse`
@@ -1435,6 +1465,16 @@ This generated block is review data for maintainers. The human-authored referenc
1435
1465
  - chat help: `chat <message>`
1436
1466
  - doctor help: `--offline`
1437
1467
  - doctor help: `--json`
1468
+ - root help: `Start an MCP stdio server`
1469
+ - root help: `plugin add <ref>`
1470
+ - root help: `plugin [list]`
1471
+ - root help: `plugin show <name>`
1472
+ - root help: `plugin run <name> <type>`
1473
+ - auth help: `--credential-provider <p>`
1474
+ - mcp help: `agent_browser_open`
1475
+ - mcp help: `--tools`
1476
+ - plugin help: `Add a plugin from npm or GitHub`
1477
+ - plugin help: `credential.read`
1438
1478
 
1439
1479
  ##### Global flags, config, providers, policy, and environment
1440
1480
  - root help: `--profile <name|path>`
@@ -1506,6 +1546,7 @@ This generated block is review data for maintainers. The human-authored referenc
1506
1546
  - root help: `AGENT_BROWSER_CONFIRM_INTERACTIVE`
1507
1547
  - root help: `--provider <name>`
1508
1548
  - root help: `AGENT_BROWSER_PROVIDER`
1549
+ - root help: `AGENT_BROWSER_PLUGINS`
1509
1550
  - root help: `agent-browser -p ios device list`
1510
1551
  - root help: `agent-browser -p ios swipe up`
1511
1552
  - root help: `agent-browser -p ios tap @e1`
package/docs/RELEASE.md CHANGED
@@ -66,13 +66,9 @@ For a deterministic host-only real-browser wrapper smoke without model choice in
66
66
  npm run verify -- dogfood
67
67
  ```
68
68
 
69
- For direct Crabbox diagnostics outside the full release compose, run:
69
+ For direct Crabbox diagnostics outside the full release compose, run the [required platform gate](platform-smoke.md#required-release-gate) (`check:platform-smoke`, `smoke:platform:ubuntu-image`, `smoke:platform:doctor`, `smoke:platform:all`) from [`platform-smoke.md`](platform-smoke.md), then inspect provider leases:
70
70
 
71
71
  ```bash
72
- npm run check:platform-smoke
73
- npm run smoke:platform:ubuntu-image
74
- npm run smoke:platform:doctor
75
- npm run smoke:platform:all
76
72
  crabbox list --provider local-container
77
73
  crabbox list --provider parallels
78
74
  ```
@@ -300,7 +296,7 @@ The default unit suite also runs `agentBrowserExtension passes through core comm
300
296
  - **Missing or extra `details` / `data` keys:** Update `test/fixtures/agent-browser-real-output-shapes.json` in the same change as the wrapper or presentation code that shifts those keys.
301
297
  - **Timeouts:** A 120s bound covers the full matrix; repeated timeouts usually mean a hung browser, blocked loopback, or an environment preventing headful/headless launch—check upstream logs and local security tooling before loosening timeouts.
302
298
 
303
- The current upstream `agent-browser 0.27.3` `wait --download <path>` saveAs persistence limitation is tracked at [vercel-labs/agent-browser#1300](https://github.com/vercel-labs/agent-browser/issues/1300); until it is fixed, release validation must treat `details.savedFilePath` as upstream-reported metadata and use `details.artifacts[].exists` as the filesystem truth (the contract asserts the requested path is absent on disk while upstream still reports success). If the suite fails because JSON/detail keys drifted, update the wrapper behavior or refresh `test/fixtures/agent-browser-real-output-shapes.json` together with the presentation work that consumes those shapes.
299
+ The upstream `agent-browser` `wait --download <path>` saveAs persistence limitation is tracked at [vercel-labs/agent-browser#1300](https://github.com/vercel-labs/agent-browser/issues/1300); until it is fixed, release validation must treat `details.savedFilePath` as upstream-reported metadata and use `details.artifacts[].exists` as the filesystem truth (the contract asserts the requested path is absent on disk while upstream still reports success). If the suite fails because JSON/detail keys drifted, update the wrapper behavior or refresh `test/fixtures/agent-browser-real-output-shapes.json` together with the presentation work that consumes those shapes.
304
300
 
305
301
  Example smoke prompt:
306
302
 
@@ -26,10 +26,10 @@ When upstream ships a new `agent-browser` or the inventory changes:
26
26
 
27
27
  ## Audit result
28
28
 
29
- - Target upstream: `agent-browser 0.27.3` (must match `CAPABILITY_BASELINE.targetVersion` in [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs)).
29
+ - Target upstream: `agent-browser 0.28.0` (must match `CAPABILITY_BASELINE.targetVersion` in [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs)).
30
30
  - Source of truth: `CAPABILITY_BASELINE.inventorySections` in the same file (stable `id` keys: `skills`, `core-commands`, `state-tabs-frames-dialogs`, `network-storage-artifacts-diagnostics`, `batch-auth-setup-ai`, `options-and-env`).
31
31
  - Status: supported for the current wrapper contract after the 2026-05-26 all-command audit.
32
- - High-priority support gaps: 2026-05-26 audit found sessionless local commands and command-scoped value flags needed sharper wrapper handling; runtime/tests/docs now cover those paths. The 0.27.3 rebaseline is install-only: no CLI/help or browser-command semantics changed, so no new wrapper surface was added. The prior rebaseline preserves thin support for upstream click reliability, frame-scoped selectors/waits, form-command fixes, daemon retry improvements, and glibc-pinned release artifacts; wrapper wait planning forwards explicit long `wait <ms>` / `wait --timeout <ms>` calls instead of rejecting them before spawn. Remaining upstream-owned caveat: `agent-browser 0.27.3` help mentions `wait <selector> --state hidden`, but source parsing does not implement that distinct wait mode, so wrapper docs steer agents to `wait --fn` predicates.
32
+ - High-priority support gaps: 2026-05-26 audit found sessionless local commands and command-scoped value flags needed sharper wrapper handling; runtime/tests/docs now cover those paths. The 0.28.0 rebaseline adds local `mcp` and `plugin` surfaces plus plugin-backed credential login; wrapper docs/tests mark `mcp` and known `plugin` commands sessionless, with no compatibility shim for older upstream releases. The prior rebaseline preserves thin support for upstream click reliability, frame-scoped selectors/waits, form-command fixes, daemon retry improvements, and glibc-pinned release artifacts; wrapper wait planning forwards explicit long `wait <ms>` / `wait --timeout <ms>` calls instead of rejecting them before spawn. Remaining upstream-owned caveat: current help mentions `wait <selector> --state hidden`, but source parsing does not implement that distinct wait mode, so wrapper docs steer agents to `wait --fn` predicates.
33
33
  - Post-`v0.2.29` review state: commits `eb55320` through `86abbfb` add browser guidance/smoke coverage plus `RQ-0086` click-probe reduction, `RQ-0087` same-snapshot form fill batching, `RQ-0088` current-ref fallback on locator misses, `RQ-0089` direct-upstream click mutation investigation, and `RQ-0090` stop-boundary/artifact-path guidance. Verification gates below were rerun on 2026-05-18 after those tasks landed. Constrained `job` (`RQ-0064`), the lightweight `qa` preset (`RQ-0065`), the experimental `sourceLookup` helper (`RQ-0066`), the experimental `networkSourceLookup` helper (`RQ-0067`), optional Exa/Brave-backed `agent_browser_web_search` with Pi-scoped package config (`RQ-0121`), and agent recovery for search/profile configuration failures (`RQ-0122`) are implemented; see [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#job), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#qa), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sourcelookup), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#networksourcelookup), and [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#optional-companion-web-search). Reusable browser recipes (`RQ-0068`) are intentionally not adopted as a runtime surface; see [`ARCHITECTURE.md`](ARCHITECTURE.md#no-reusable-recipe-layer-yet).
34
34
 
35
35
  ## Open UX/reliability follow-ups from 2026-05-29 agent feedback
@@ -47,21 +47,21 @@ Current summary:
47
47
 
48
48
  ## Verification evidence
49
49
 
50
- Re-run the gates below before each release; this table records what the closure audit exercised. Rows marked **Current for 0.27.3** were rerun after the `agent-browser 0.27.3` install-only rebaseline. Rows marked **Historical / pending refresh** are useful prior evidence but must not be treated as current release proof until rerun under the named condition.
50
+ Re-run the gates below before each release; this table records what the closure audit exercised. Rows marked **Current for 0.28.0** were rerun after the `agent-browser 0.28.0` rebaseline. Rows marked **Historical / pending refresh** are useful prior evidence but must not be treated as current release proof until rerun under the named condition.
51
51
 
52
52
  | Gate | Evidence | Status |
53
53
  | --- | --- | --- |
54
- | Default local gate | `npm run verify` checks generated playbook drift, clean-builds generated `dist/`, runs `tsc --noEmit`, unit/fake tests, generated command-reference blocks, and live command-reference sampling. | **Current for 0.27.3:** pass on 2026-06-13 (`npm run verify`; clean build, TypeScript, 571 passed, 1 skipped, generated docs check, and live command-reference sampling passed with `agent-browser 0.27.3` on `PATH`). |
54
+ | Default local gate | `npm run verify` checks generated playbook drift, clean-builds generated `dist/`, runs `tsc --noEmit`, unit/fake tests, generated command-reference blocks, and live command-reference sampling. | **Current for 0.28.0:** pass on 2026-06-18 as part of `npm run verify -- release` (clean build, TypeScript, 571 passed, 1 skipped, generated docs check, and live command-reference sampling passed with `agent-browser 0.28.0` on `PATH`). |
55
55
  | Pre-PR local gate | `npm run verify -- pre-pr` composes the default gate with package-content verification. Use before larger local handoffs or PR-ready claims when lifecycle/platform/live dogfood cost is not warranted. | Added 2026-06-10; orchestration is locked by `test/project-verify.test.ts` and does not change release mode. |
56
- | Real upstream contract | `npm run verify -- real-upstream` runs the localhost fixture matrix against the real installed `agent-browser` matching the baseline. | **Historical / pending refresh:** pass on 2026-06-11 (`npm run verify -- real-upstream`, `agent-browser 0.27.2` on `PATH`; includes 0.27.2 off-viewport click, frame-scoped selector/wait/click, form command, and wait-download artifact coverage). Not rerun for the 0.27.3 install-only rebaseline unless noted in release evidence. |
57
- | Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads the packaged `agent_browser` tool without requiring optional Brave config, and executes fake-upstream `--version`. | **Historical / pending refresh:** pass on 2026-06-11 as part of `npm run verify -- release` and rerun after the compiled-entrypoint change (`verify-package.mjs --smoke-pi`; packed 117 files, packaged `agent_browser --version` invocation passed). Not rerun for 0.27.3 unless noted in release evidence. |
56
+ | Real upstream contract | `npm run verify -- real-upstream` runs the localhost fixture matrix against the real installed `agent-browser` matching the baseline. | **Current for 0.28.0:** pass on 2026-06-18 (`npm run verify -- real-upstream`; localhost fixture matrix passed against installed `agent-browser 0.28.0`). |
57
+ | Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads the packaged `agent_browser` tool without requiring optional Brave config, and executes fake-upstream `--version`. | **Current for 0.28.0:** pass on 2026-06-18 as part of `npm run verify -- release` (`verify-package.mjs --smoke-pi`; packed 118 files, packaged `agent_browser --version` invocation passed). |
58
58
  | Startup profile | `npm run verify -- startup-profile --samples <n>` clean-builds generated `dist/`, records direct package entrypoint import/factory timing in fresh Node processes, and writes `.artifacts/startup-profile/latest.json`. It must not launch Pi, tmux, mise, npm, browsers, or `agent-browser`; full Pi TUI ready-prompt profiling is intentionally excluded after it proved too invasive for routine verification. Run this opt-in evidence when package layout, the compiled entrypoint, top-level imports, schema registration, or prompt/config startup logic changes. | **Current for compiled entrypoint:** pass on 2026-06-11 with direct compiled entrypoint import+factory median 47.136 ms in earlier samples, below the 250 ms direct-import guard and below the prior ~96 ms TypeScript-entrypoint baseline. Full-Pi startup numbers from the unsafe tmux profiler are not accepted as ongoing release evidence. |
59
- | Deterministic dogfood smoke | `npm run verify -- dogfood` (`scripts/verify-agent-browser-dogfood.ts`) drives the native wrapper against a local file fixture through top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close with the real `agent-browser` on `PATH`. | **Current for 0.27.3:** pass on 2026-06-13 (`npm run verify -- dogfood`, `agent-browser 0.27.3`; `qa-url`, fresh/current opens, semantic click, job screenshot artifact verification, and close all passed). |
59
+ | Deterministic dogfood smoke | `npm run verify -- dogfood` (`scripts/verify-agent-browser-dogfood.ts`) drives the native wrapper against a local file fixture through top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close with the real `agent-browser` on `PATH`. | **Current for 0.28.0:** pass on 2026-06-18 (`npm run verify -- dogfood`; `qa-url`, fresh/current opens, semantic click, job screenshot artifact verification, and close all passed). |
60
60
  | Efficiency benchmark | `npm run verify -- benchmark` runs deterministic browser workflow accounting plus focused benchmark tests, including JSONL sampling fixtures and job/qa/sourceLookup/networkSourceLookup/Electron scenario coverage. | **Historical / pending refresh:** pass on 2026-05-29 (`npm run verify -- benchmark`). This deterministic gate is not upstream-version-specific, but rerun before claiming current benchmark evidence after benchmark or workflow-scenario edits. |
61
- | Crabbox platform smoke | `npm run check:platform-smoke` syntax-checks the harness and cheap invariants. `npm run smoke:platform:ubuntu-image` builds the project-owned Linux image, `npm run smoke:platform:doctor` checks Crabbox 0.26.0+ and local target readiness, and `npm run smoke:platform:all` runs doctor first, then fast target-local `platform-build` (`npm run verify -- platform-target`, pack, clean Pi install) plus `browser-dogfood-smoke` on Crabbox `macos`, `ubuntu`, and `windows-native`; see [`platform-smoke.md`](platform-smoke.md). Target artifacts include Crabbox/provider/work-root metadata, and release review also checks provider-specific `crabbox list` commands for leftover leases/clones. | **Historical / pending refresh:** pass on 2026-06-11 inside `npm run verify -- release`; rebuilt Ubuntu image `pi-agent-browser-native-platform:node24-agent-browser0.27.2`, refreshed the Windows `crabbox-ready` template snapshot to `agent-browser 0.27.2`, doctor passed, then Crabbox platform smoke passed for macOS, Ubuntu, and native Windows. Not rerun for 0.27.3 unless the release gate below records a fresh platform pass. |
62
- | `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with the configured-source lifecycle harness, packaged Pi smoke, and the release-blocking Crabbox platform matrix (`verifySteps` `release` in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits standalone real-upstream, host-only dogfood, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). | **Historical / pending refresh:** pass on 2026-06-11 (`npm run verify -- release`), including default unit/fake gate, generated docs checks, live command-reference sampling, lifecycle harness, packaged Pi smoke, and macOS/Ubuntu/native-Windows Crabbox platform smoke. Not rerun for 0.27.3 unless noted in release evidence. |
63
- | Configured-source lifecycle | `npm run verify -- lifecycle` (`scripts/verify-lifecycle.mjs`) drives `/reload`, closes and relaunches Pi with the same exact `--session-id`, checks the JSONL session header id, session continuity, slash-command sentinel tokens (`v1` before reload and `v2` after full relaunch because compiled JS package modules are process-cached), persisted spill reachability, and real Pi `tool_result` failure-patch semantics for a QA reclassification with a fake upstream on `PATH`. Default Pi model is `zai/glm-5.2`; default per-step wait is **180000 ms** (`DEFAULT_TIMEOUT_MS`); override model with `--model <id>` and waits with `--timeout-ms <ms>`. Passthrough flags in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs): `--keep-artifacts`, `--model`, `--verbose`, and `--timeout-ms` plus a value (for example `npm run verify -- lifecycle --model openai-codex/gpt-5.5:minimal --keep-artifacts --verbose --timeout-ms 600000`). | **Historical / pending refresh:** lifecycle-focused pass on 2026-06-11 after compiled-entrypoint update; managed browser session continuity and persisted full output verified before cleanup. Not rerun for 0.27.3 unless noted in release evidence. |
64
- | Quick isolated Pi smoke | `pi --approve --no-extensions --no-skills -e . --tools agent_browser` from trusted repo root; native `agent_browser` only. | **Historical / pending refresh:** pass on 2026-06-11 via tmux with `pi --approve --no-extensions --no-skills -e .`; native `agent_browser` only. Covered `qa` with `sessionMode: "fresh"` against `https://example.com`, `open` and compact `snapshot -i` on `https://react.dev`, `semanticAction` link click to `https://react.dev/learn`, screenshot artifact verification at `/tmp/piab-release-smoke-react.png`, and `close`; explicit screenshot and temporary session artifacts were removed after evidence capture. Broader historical coverage also includes version/help/skills, eval stdin, batch stdin, explicit session, network requests, console/errors, diff snapshot, stream status/disable, dashboard start/stop, and chat credential-failure pass-through during RQ-0055. Not rerun for 0.27.3 unless noted in release evidence. |
61
+ | Crabbox platform smoke | `npm run check:platform-smoke` syntax-checks the harness and cheap invariants. `npm run smoke:platform:ubuntu-image` builds the project-owned Linux image, `npm run smoke:platform:doctor` checks Crabbox 0.26.0+ and local target readiness, and `npm run smoke:platform:all` runs doctor first, then fast target-local `platform-build` (`npm run verify -- platform-target`, pack, clean Pi install) plus `browser-dogfood-smoke` on Crabbox `macos`, `ubuntu`, and `windows-native`; see [`platform-smoke.md`](platform-smoke.md). Target artifacts include Crabbox/provider/work-root metadata, and release review also checks provider-specific `crabbox list` commands for leftover leases/clones. | **Current for 0.28.0:** pass on 2026-06-18 inside `npm run verify -- release`; rebuilt Ubuntu image `pi-agent-browser-native-platform:node24-agent-browser0.28.0`, refreshed the Windows `crabbox-ready` template snapshot to `agent-browser 0.28.0`, doctor passed, then Crabbox platform smoke passed for macOS, Ubuntu, and native Windows. |
62
+ | `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with the configured-source lifecycle harness, packaged Pi smoke, and the release-blocking Crabbox platform matrix (`verifySteps` `release` in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits standalone real-upstream, host-only dogfood, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). | **Current for 0.28.0:** pass on 2026-06-18 (`npm run verify -- release`), including default unit/fake gate, generated docs checks, live command-reference sampling, lifecycle harness, packaged Pi smoke, and macOS/Ubuntu/native-Windows Crabbox platform smoke. |
63
+ | Configured-source lifecycle | `npm run verify -- lifecycle` (`scripts/verify-lifecycle.mjs`) drives `/reload`, closes and relaunches Pi with the same exact `--session-id`, checks the JSONL session header id, session continuity, slash-command sentinel tokens (`v1` before reload and `v2` after full relaunch because compiled JS package modules are process-cached), persisted spill reachability, and real Pi `tool_result` failure-patch semantics for a QA reclassification with a fake upstream on `PATH`. Default Pi model is `zai/glm-5.2`; default per-step wait is **180000 ms** (`DEFAULT_TIMEOUT_MS`); override model with `--model <id>` and waits with `--timeout-ms <ms>`. Passthrough flags in [`scripts/project.mjs`](https://github.com/fitchmultz/pi-agent-browser-native/blob/main/scripts/project.mjs): `--keep-artifacts`, `--model`, `--verbose`, and `--timeout-ms` plus a value (for example `npm run verify -- lifecycle --model openai-codex/gpt-5.5:minimal --keep-artifacts --verbose --timeout-ms 600000`). | **Current for 0.28.0:** pass on 2026-06-18 as part of `npm run verify -- release`; managed browser session continuity and persisted full output verified before cleanup. |
64
+ | Quick isolated Pi smoke | `pi --approve --no-extensions --no-skills -e . --tools agent_browser` from trusted repo root; native `agent_browser` only. | **Historical / pending refresh:** pass on 2026-06-11 via tmux with `pi --approve --no-extensions --no-skills -e .`; native `agent_browser` only. Covered `qa` with `sessionMode: "fresh"` against `https://example.com`, `open` and compact `snapshot -i` on `https://react.dev`, `semanticAction` link click to `https://react.dev/learn`, screenshot artifact verification at `/tmp/piab-release-smoke-react.png`, and `close`; explicit screenshot and temporary session artifacts were removed after evidence capture. Broader historical coverage also includes version/help/skills, eval stdin, batch stdin, explicit session, network requests, console/errors, diff snapshot, stream status/disable, dashboard start/stop, and chat credential-failure pass-through during RQ-0055. Not rerun for 0.28.0 unless noted in release evidence. |
65
65
 
66
66
  Runtime floor note: package metadata keeps Pi core package peer ranges wildcard per installed Pi package docs, but `pi-agent-browser-doctor` / `npm run doctor` treats `pi --version` below 0.79.0 as a setup failure. This keeps package dependency shape aligned with Pi package loading while still making unsupported host Pi versions a release and first-run blocker.
67
67
 
@@ -73,8 +73,8 @@ Runtime floor note: package metadata keeps Pi core package peer ranges wildcard
73
73
  | Core page, element, navigation, and extraction commands | 74 canonical tokens from baseline section `core-commands`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#core-page-and-element-commands). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#core-page-and-element-commands), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md), README quick start. | Thin passthrough with wrapper-owned JSON/session planning, ref guidance, artifact verification, page-change summaries, click-dispatch diagnostics, no-op scroll/focus diagnostics, shorthand compilers, and redaction. | Real-upstream core matrix plus fake core matrix for passthrough, ordering, diagnostics, and compiler validation. | Supported. Upstream semantics remain upstream-owned. |
74
74
  | Sessions, state, tabs, frames, dialogs, and windows | 20 canonical tokens from baseline section `state-tabs-frames-dialogs`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#session-state-frames-dialogs-windows-and-inspection-commands). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#session-state-frames-dialogs-windows-and-inspection-commands), stateful workflow notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Stateful summaries/redaction, state artifact handling, sessionless local command planning, managed-session restore, tab target pinning, and close alias cleanup. | Extension-validation stateful matrix, runtime session/resume tests, presentation redaction tests, lifecycle harness. | Supported. External profile/auth state remains operator-owned. |
75
75
  | Network, storage, artifacts, diagnostics, and performance | 42 canonical tokens from baseline section `network-storage-artifacts-diagnostics`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage), diagnostic sections, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Thin passthrough plus compact diagnostics, route-mock warnings, useful-but-redacted storage output, stream idempotency normalization, artifact metadata, missing-ffmpeg warnings, sensitive-data redaction, timeout bounds, and cleanup-pair guidance. | Fake non-core matrix and safe real-upstream coverage for network/HAR, diff, trace/profiler, console/errors/highlight, stream, vitals, and React missing-renderer. | Supported. Environment-sensitive operations need suitable local/browser state. |
76
- | Batch, auth, confirmations, setup, dashboard, devices, and AI commands | 24 canonical tokens from baseline section `batch-auth-setup-ai`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup), README security notes, release docs. | Native-tool batch stdin, generated `job`/`qa`/lookup batch plans, auth/confirmation redaction, sessionless local auth/setup/dashboard/doctor planning, timeout/cleanup guidance. | Unit/fake batch/auth/confirmation/dashboard/chat/doctor tests; extension-validation for structured input modes; efficiency benchmark scenarios. | Supported. Interactive side-effecting setup/auth/chat remains upstream-owned. |
77
- | Global flags, config, providers, policy, and environment | 120 canonical tokens from baseline section `options-and-env`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#important-global-flags-config-and-environment). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#important-global-flags-config-and-environment), README provider/setup notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sessionmode), architecture/runtime docs. | Runtime handles command discovery, value-flag prevalidation, launch-scoped flags, redacted echoes, fresh-session recovery hints, explicit sessions, provider/device launch-scoping, parent env forwarding with wrapper overrides, subprocess completion, and package-owned Pi-scoped config for optional companion features. | Runtime tests for flags/planning/redaction/session behavior; process tests for env and stdio-linger completion; config/web-search/CLI tests; fake provider/specialized-skill matrix; package doctor. | Supported. Provider clouds, iOS/Appium, proxies, profiles, and credentials require external setup. |
76
+ | Batch, auth, confirmations, setup, dashboard, devices, and AI commands | 30 canonical tokens from baseline section `batch-auth-setup-ai`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-devices-and-setup), README security notes, release docs. | Native-tool batch stdin, generated `job`/`qa`/lookup batch plans, auth/confirmation redaction, sessionless local auth/setup/dashboard/doctor/plugin planning, plugin list/show JSON envelope normalization, bare-`mcp` validation with `mcp --help` preserved, timeout/cleanup guidance. | Parser/runtime plugin and MCP unit coverage; fake-upstream plugin list/show and MCP help/blocking coverage; real-upstream plugin list shape probe; structured input-mode tests; efficiency benchmark scenarios. | Supported. Interactive side-effecting setup/auth/chat remains upstream-owned. `plugin` is local/sessionless; `mcp` is external-client-only except help; `auth login --credential-provider` resolves credentials via a plugin. |
77
+ | Global flags, config, providers, policy, and environment | 121 canonical tokens from baseline section `options-and-env`; see [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs) and generated [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#important-global-flags-config-and-environment). | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#important-global-flags-config-and-environment), README provider/setup notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sessionmode), architecture/runtime docs. | Runtime handles command discovery, value-flag prevalidation, launch-scoped flags, redacted echoes, fresh-session recovery hints, explicit sessions, provider/device launch-scoping, parent env forwarding with wrapper overrides, subprocess completion, and package-owned Pi-scoped config for optional companion features. | Runtime tests for flags/planning/redaction/session behavior; process tests for env and stdio-linger completion; config/web-search/CLI tests; fake provider/specialized-skill matrix; package doctor. | Supported. Provider clouds, iOS/Appium, proxies, profiles, and credentials require external setup. |
78
78
 
79
79
  ## Follow-up decision after closure
80
80
 
@@ -126,7 +126,7 @@ For link and button text, use the **exact** visible label from the latest `snaps
126
126
 
127
127
  ## Wrapper `--json`
128
128
 
129
- The extension always plans normal browser commands with `--json` prepended in `effectiveArgs` so upstream returns structured JSON for presentation and `details`. **Do not** include `--json` in caller `args`; it is unnecessary and can confuse planning or transcript hooks that treat caller-requested JSON differently. Plain-text inspection (`--help`, `--version`) keeps its own output shape. Read-only skills and local/setup commands such as `skills list` / `skills get` / `skills path`, local auth profile management (`auth save/list/show/delete/remove`), `profiles`, `dashboard`, `device list`, `doctor`, `install`, `upgrade`, `session list`, and targeted/all local saved-state maintenance including `state clear --all`, `state clear -a`, and named `state clear <session-name>` skip implicit session injection as documented under `sessionMode`.
129
+ The extension always plans normal browser commands with `--json` prepended in `effectiveArgs` so upstream returns structured JSON for presentation and `details`. **Do not** include `--json` in caller `args`; it is unnecessary and can confuse planning or transcript hooks that treat caller-requested JSON differently. Plain-text inspection (`--help`, `--version`) keeps its own output shape. Read-only skills and local/setup commands such as `skills list` / `skills get` / `skills path`, local auth profile management (`auth save/list/show/delete/remove`), `profiles`, `dashboard`, `device list`, `doctor`, `install`, `upgrade`, `session list`, `plugin add/list/show/run`, `mcp --help`, and targeted/all local saved-state maintenance including `state clear --all`, `state clear -a`, and named `state clear <session-name>` skip implicit session injection as documented under `sessionMode`; bare `mcp` server calls are rejected before spawn because they are for external MCP clients.
130
130
 
131
131
  ## Headed and local fixture limits
132
132
 
@@ -606,7 +606,7 @@ Behavior:
606
606
  - if `args` already include `--session` (including argv compiled from optional `semanticAction.session`), upstream session choice wins
607
607
  - `"auto"` prepends the current extension-managed active session when appropriate
608
608
  - `"fresh"` rotates that managed session to a fresh upstream launch so startup-scoped flags like `--profile`, `--executable-path`, `--session-name`, `--cdp`, `--state`, `--auto-connect`, `--init-script`, `--enable`, `-p` / `--provider`, or iOS `--device` apply and later default calls follow the new browser
609
- - sessionless paths skip that injection even under `"auto"`: plain-text `--help` / `-h` / `--version` / `-V` (see the generated inspection playbook fragment below), read-only `skills list`, `skills get …`, and `skills path …`, local auth profile management (`auth save/list/show/delete/remove`), local/setup commands (`profiles`, `dashboard start/stop`, `device list`, `doctor`, `install`, `upgrade`, `session list`), and targeted/all local saved-state maintenance (`state list/show`, `state clear --all`, `state clear -a`, `state clear <session-name>`, `state clean --older-than <days>`, `state rename`) keep `effectiveArgs` free of the implicit managed `--session` unless the caller supplied `--session` explicitly; successful results therefore omit `usedImplicitSession` and the extension-managed `sessionName` for those calls, while root `session`, untargeted `state clear`, bare `state clean`, browser-backed `auth login`, and `state save/load` keep normal managed-session injection (`extensions/agent-browser/lib/command-policy.ts`, `needsManagedSession`; `extensions/agent-browser/lib/runtime.ts`, `buildExecutionPlan`)
609
+ - sessionless paths skip that injection even under `"auto"`: plain-text `--help` / `-h` / `--version` / `-V` (see the generated inspection playbook fragment below), read-only `skills list`, `skills get …`, and `skills path …`, local auth profile management (`auth save/list/show/delete/remove`), local/setup commands (`profiles`, `dashboard start/stop`, `device list`, `doctor`, `install`, `upgrade`, `session list`, `plugin add/list/show/run`, `mcp --help`), and targeted/all local saved-state maintenance (`state list/show`, `state clear --all`, `state clear -a`, `state clear <session-name>`, `state clean --older-than <days>`, `state rename`) keep `effectiveArgs` free of the implicit managed `--session` unless the caller supplied `--session` explicitly; successful results therefore omit `usedImplicitSession` and the extension-managed `sessionName` for those calls, while bare `mcp` server calls are rejected before spawn and root `session`, untargeted `state clear`, bare `state clean`, browser-backed `auth login`, and `state save/load` keep normal managed-session injection (`extensions/agent-browser/lib/command-policy.ts`, `needsManagedSession`; `extensions/agent-browser/lib/runtime.ts`, `buildExecutionPlan`)
610
610
 
611
611
  Recommended use:
612
612
  - use `"auto"` for the common browse/snapshot/click flow inside one `pi` session
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-agent-browser-native",
3
- "version": "0.2.52",
3
+ "version": "0.2.54",
4
4
  "description": "pi extension that exposes agent-browser as a native tool for browser automation",
5
5
  "type": "module",
6
6
  "author": "Mitch Fultz (https://github.com/fitchmultz)",
@@ -14,12 +14,14 @@ export const COMMAND_REFERENCE_BASELINE_BLOCK_IDS = Object.freeze(["upstream-bas
14
14
 
15
15
  const sourceEvidence = Object.freeze({
16
16
  repository: "vercel-labs/agent-browser",
17
- upstreamHead: "2c7991c9eccca1c9db6eee1a26a713414778de5a",
18
- upstreamPackageVersion: "0.27.3",
17
+ upstreamHead: "6323df571ffd17d14e60ec19fcb56cc1caf498ab",
18
+ upstreamPackageVersion: "0.28.0",
19
19
  inspectedSources: Object.freeze([
20
20
  "agent-browser --version",
21
21
  "agent-browser --help",
22
22
  "selected agent-browser <command> --help output",
23
+ "agent-browser mcp --help",
24
+ "agent-browser plugin --help",
23
25
  "README.md",
24
26
  "CHANGELOG.md",
25
27
  "agent-browser.schema.json",
@@ -91,6 +93,8 @@ const helpCommands = Object.freeze([
91
93
  helpCommand("install help", ["install", "--help"]),
92
94
  helpCommand("upgrade help", ["upgrade", "--help"]),
93
95
  helpCommand("profiles help", ["profiles", "--help"]),
96
+ helpCommand("mcp help", ["mcp", "--help"]),
97
+ helpCommand("plugin help", ["plugin", "--help"]),
94
98
  ]);
95
99
 
96
100
  const inventorySections = Object.freeze([
@@ -459,6 +463,12 @@ const inventorySections = Object.freeze([
459
463
  "doctor [--fix]",
460
464
  "doctor --offline --quick",
461
465
  "doctor --json",
466
+ "mcp",
467
+ "plugin add <ref>",
468
+ "plugin [list]",
469
+ "plugin show <name>",
470
+ "plugin run <name> <type>",
471
+ "auth login <name> --credential-provider <plugin>",
462
472
  "profiles",
463
473
  ],
464
474
  [
@@ -486,6 +496,16 @@ const inventorySections = Object.freeze([
486
496
  ["chat help", "chat <message>"],
487
497
  ["doctor help", "--offline"],
488
498
  ["doctor help", "--json"],
499
+ root("Start an MCP stdio server"),
500
+ root("plugin add <ref>"),
501
+ root("plugin [list]"),
502
+ root("plugin show <name>"),
503
+ root("plugin run <name> <type>"),
504
+ ["auth help", "--credential-provider <p>"],
505
+ ["mcp help", "agent_browser_open"],
506
+ ["mcp help", "--tools"],
507
+ ["plugin help", "Add a plugin from npm or GitHub"],
508
+ ["plugin help", "credential.read"],
489
509
  ],
490
510
  ),
491
511
  section(
@@ -562,6 +582,7 @@ const inventorySections = Object.freeze([
562
582
  "AGENT_BROWSER_CONFIRM_INTERACTIVE",
563
583
  "-p, --provider <name>",
564
584
  "AGENT_BROWSER_PROVIDER",
585
+ "AGENT_BROWSER_PLUGINS",
565
586
  "browserbase",
566
587
  "kernel",
567
588
  "browseruse",
@@ -683,6 +704,7 @@ const inventorySections = Object.freeze([
683
704
  root("AGENT_BROWSER_CONFIRM_INTERACTIVE"),
684
705
  root("--provider <name>"),
685
706
  root("AGENT_BROWSER_PROVIDER"),
707
+ root("AGENT_BROWSER_PLUGINS"),
686
708
  root("agent-browser -p ios device list"),
687
709
  root("agent-browser -p ios swipe up"),
688
710
  root("agent-browser -p ios tap @e1"),
@@ -709,7 +731,7 @@ const inventorySections = Object.freeze([
709
731
  ]);
710
732
 
711
733
  export const CAPABILITY_BASELINE = Object.freeze({
712
- targetVersion: "0.27.3",
734
+ targetVersion: "0.28.0",
713
735
  sourceEvidence,
714
736
  helpCommands,
715
737
  inventorySections,
@@ -92,8 +92,14 @@ Write-Output "PLATFORM_TSX_CLI=$TsxCli"
92
92
  $DogfoodStdout = Join-Path $DogfoodDir "dogfood.stdout.txt"
93
93
  $DogfoodStderr = Join-Path $DogfoodDir "dogfood.stderr.txt"
94
94
  if ($NpmCiExit -eq 0 -and $AgentBrowserExit -eq 0 -and $BrowserCacheExit -eq 0 -and $BrowserPrewarmExit -eq 0) {
95
- & $TsxCli "scripts/verify-agent-browser-dogfood.ts" --artifact-dir $DogfoodArtifactDir --json >$DogfoodStdout 2>$DogfoodStderr
96
- $DogfoodExit = $LASTEXITCODE
95
+ $DogfoodExit = 1
96
+ for ($Attempt = 1; $Attempt -le 2; $Attempt++) {
97
+ Write-Output "PLATFORM_DOGFOOD_ATTEMPT=$Attempt"
98
+ if ($Attempt -gt 1) { Start-Sleep -Seconds 2 }
99
+ & $TsxCli "scripts/verify-agent-browser-dogfood.ts" --artifact-dir $DogfoodArtifactDir --json >$DogfoodStdout 2>$DogfoodStderr
100
+ $DogfoodExit = $LASTEXITCODE
101
+ if ($DogfoodExit -eq 0) { break }
102
+ }
97
103
  } else {
98
104
  "npm ci or agent-browser setup failed" | Set-Content $DogfoodStderr
99
105
  $DogfoodExit = 1