pi-agent-browser-native 0.2.46 → 0.2.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +64 -20
  2. package/README.md +45 -20
  3. package/docs/ARCHITECTURE.md +14 -14
  4. package/docs/COMMAND_REFERENCE.md +37 -23
  5. package/docs/ELECTRON.md +3 -3
  6. package/docs/RELEASE.md +33 -24
  7. package/docs/REQUIREMENTS.md +4 -4
  8. package/docs/SUPPORT_MATRIX.md +34 -106
  9. package/docs/TOOL_CONTRACT.md +24 -22
  10. package/docs/platform-smoke.md +2 -2
  11. package/extensions/agent-browser/index.ts +20 -2
  12. package/extensions/agent-browser/lib/config-policy.js +16 -5
  13. package/extensions/agent-browser/lib/config.ts +17 -4
  14. package/extensions/agent-browser/lib/input-modes/job.ts +138 -62
  15. package/extensions/agent-browser/lib/input-modes/params.ts +2 -2
  16. package/extensions/agent-browser/lib/orchestration/browser-run/artifact-paths.ts +44 -0
  17. package/extensions/agent-browser/lib/orchestration/browser-run/click-dispatch.ts +42 -19
  18. package/extensions/agent-browser/lib/orchestration/browser-run/diagnostics.ts +6 -4
  19. package/extensions/agent-browser/lib/orchestration/browser-run/final-result.ts +18 -9
  20. package/extensions/agent-browser/lib/orchestration/browser-run/prepare/direct-anchor-download.ts +158 -0
  21. package/extensions/agent-browser/lib/orchestration/browser-run/prepare/network-page-filter.ts +116 -0
  22. package/extensions/agent-browser/lib/orchestration/browser-run/prepare/scroll-shims.ts +147 -0
  23. package/extensions/agent-browser/lib/orchestration/browser-run/prepare/snapshot-filter.ts +183 -0
  24. package/extensions/agent-browser/lib/orchestration/browser-run/prepare/wait-timeouts.ts +58 -0
  25. package/extensions/agent-browser/lib/orchestration/browser-run/prepare.ts +19 -653
  26. package/extensions/agent-browser/lib/orchestration/browser-run/process-output.ts +1 -6
  27. package/extensions/agent-browser/lib/orchestration/browser-run/session-artifacts.ts +8 -0
  28. package/extensions/agent-browser/lib/orchestration/browser-run/types.ts +1 -0
  29. package/extensions/agent-browser/lib/pi-tool-rendering.ts +34 -19
  30. package/extensions/agent-browser/lib/playbook.ts +4 -4
  31. package/extensions/agent-browser/lib/results/action-recommendations.ts +3 -3
  32. package/extensions/agent-browser/lib/web-search.ts +11 -4
  33. package/package.json +4 -4
  34. package/scripts/agent-browser-capability-baseline.mjs +6 -3
  35. package/scripts/doctor.mjs +12 -11
  36. package/scripts/platform-smoke/platform-build-windows.ps1 +2 -2
  37. package/scripts/platform-smoke/targets.mjs +7 -3
  38. package/scripts/platform-smoke.mjs +2 -2
@@ -92,6 +92,7 @@ import {
92
92
  sleepMs,
93
93
  } from "./diagnostics.js";
94
94
  import { repairScreenshotData } from "./prepare.js";
95
+ import { getPersistentSessionArtifactStore } from "./session-artifacts.js";
95
96
  import {
96
97
  buildFinalAgentBrowserToolResult,
97
98
  buildRedactedPresentationContent,
@@ -113,12 +114,6 @@ import type {
113
114
  ScreenshotPathRequest,
114
115
  } from "./types.js";
115
116
 
116
- function getPersistentSessionArtifactStore(ctx: BrowserRunContext): PersistentSessionArtifactStore | undefined {
117
- const sessionDir = typeof ctx.sessionManager.getSessionDir === "function" ? ctx.sessionManager.getSessionDir() : undefined;
118
- const sessionId = ctx.sessionManager.getSessionId();
119
- return sessionDir && sessionId ? { sessionDir, sessionId } : undefined;
120
- }
121
-
122
117
  async function repairScreenshotArtifact(options: {
123
118
  cwd: string;
124
119
  envelope?: AgentBrowserEnvelope;
@@ -0,0 +1,8 @@
1
+ import type { PersistentSessionArtifactStore } from "../../temp.js";
2
+ import type { BrowserRunContext } from "./types.js";
3
+
4
+ export function getPersistentSessionArtifactStore(ctx: BrowserRunContext): PersistentSessionArtifactStore | undefined {
5
+ const sessionDir = typeof ctx.sessionManager.getSessionDir === "function" ? ctx.sessionManager.getSessionDir() : undefined;
6
+ const sessionId = ctx.sessionManager.getSessionId();
7
+ return sessionDir && sessionId ? { sessionDir, sessionId } : undefined;
8
+ }
@@ -139,6 +139,7 @@ export type ClickDispatchProbeTarget =
139
139
  selector: string;
140
140
  }
141
141
  | {
142
+ duplicateIndex?: number;
142
143
  kind: "accessible";
143
144
  name: string;
144
145
  refId: string;
@@ -102,28 +102,39 @@ function formatVisualTruncationNotice(remainingLines: number, totalLines: number
102
102
  return truncateToWidth(notice, Math.max(0, width));
103
103
  }
104
104
 
105
- export function formatAgentBrowserRenderCall(args: unknown, theme: Theme): string {
106
- const input = isRecord(args) ? args : {};
107
- const semanticAction = compileAgentBrowserSemanticAction(input.semanticAction);
108
- const job = compileAgentBrowserJob(input.job);
109
- const qa = compileAgentBrowserQaPreset(input.qa);
110
- const sourceLookup = compileAgentBrowserSourceLookup(input.sourceLookup);
111
- const networkSourceLookup = compileAgentBrowserNetworkSourceLookup(input.networkSourceLookup);
112
- const electron = compileAgentBrowserElectron(input.electron);
113
- const generatedBatch = networkSourceLookup.compiled ?? sourceLookup.compiled ?? job.compiled ?? qa.compiled;
114
- const rawArgs = Array.isArray(input.args)
115
- ? input.args.filter((value): value is string => typeof value === "string")
116
- : electron.compiled
117
- ? ["electron", electron.compiled.action]
118
- : (semanticAction.compiled?.args ?? generatedBatch?.args ?? []);
105
+ function getStructuredModeInvocation(input: Record<string, unknown>): { mode?: string; rawArgs: string[] } {
106
+ if (Array.isArray(input.args)) return { rawArgs: input.args.filter((value): value is string => typeof value === "string") };
107
+ if (input.semanticAction !== undefined) return { mode: "semanticAction", rawArgs: compileAgentBrowserSemanticAction(input.semanticAction).compiled?.args ?? [] };
108
+ if (input.job !== undefined) return { mode: "job", rawArgs: compileAgentBrowserJob(input.job).compiled?.args ?? [] };
109
+ if (input.qa !== undefined) return { mode: "qa", rawArgs: compileAgentBrowserQaPreset(input.qa).compiled?.args ?? [] };
110
+ if (input.sourceLookup !== undefined) return { mode: "sourceLookup", rawArgs: compileAgentBrowserSourceLookup(input.sourceLookup).compiled?.args ?? [] };
111
+ if (input.networkSourceLookup !== undefined) return { mode: "networkSourceLookup", rawArgs: compileAgentBrowserNetworkSourceLookup(input.networkSourceLookup).compiled?.args ?? [] };
112
+ if (input.electron !== undefined) {
113
+ const electron = compileAgentBrowserElectron(input.electron);
114
+ return { mode: "electron", rawArgs: electron.compiled ? ["electron", electron.compiled.action] : [] };
115
+ }
116
+ return { rawArgs: [] };
117
+ }
118
+
119
+ function formatInvocationPreview(rawArgs: string[]): string {
119
120
  const redactedArgs = redactInvocationArgs(rawArgs);
120
121
  const invocation = sanitizeDisplayText(redactedArgs.join(" ")).replace(/\s+/g, " ").trim();
121
- const invocationPreview =
122
- invocation.length > TUI_INVOCATION_PREVIEW_MAX_CHARS
123
- ? `${invocation.slice(0, TUI_INVOCATION_PREVIEW_MAX_CHARS - 3)}...`
124
- : invocation;
122
+ return invocation.length > TUI_INVOCATION_PREVIEW_MAX_CHARS
123
+ ? `${invocation.slice(0, TUI_INVOCATION_PREVIEW_MAX_CHARS - 3)}...`
124
+ : invocation;
125
+ }
126
+
127
+ export function formatAgentBrowserRenderCall(args: unknown, theme: Theme): string {
128
+ const input = isRecord(args) ? args : {};
129
+ const { mode, rawArgs } = getStructuredModeInvocation(input);
130
+ const invocationPreview = formatInvocationPreview(rawArgs);
125
131
  let text = theme.fg("toolTitle", theme.bold("agent_browser"));
126
- if (invocationPreview.length > 0) {
132
+ if (mode) {
133
+ text += ` ${theme.fg("accent", mode)}`;
134
+ if (invocationPreview.length > 0) {
135
+ text += ` ${theme.fg("dim", "→")} ${theme.fg("accent", invocationPreview)}`;
136
+ }
137
+ } else if (invocationPreview.length > 0) {
127
138
  text += ` ${theme.fg("accent", invocationPreview)}`;
128
139
  }
129
140
  if (input.sessionMode === "fresh") {
@@ -146,7 +157,11 @@ export function formatAgentBrowserRenderResult(
146
157
  }
147
158
 
148
159
  const outputText = getPrimaryTextContent(result);
160
+ const failureCategoryNotice = formatModelVisibleFailureCategoryNotice(result.details);
149
161
  const outputLines = colorizeToolOutputLines(outputText, theme, isError);
162
+ if (failureCategoryNotice && outputLines.length > 0) {
163
+ outputLines.unshift(theme.fg("error", failureCategoryNotice), "");
164
+ }
150
165
  if (outputLines.length === 0) {
151
166
  const details = isRecord(result.details) ? result.details : undefined;
152
167
  const rawSummary = typeof details?.summary === "string" ? details.summary : isError ? "agent-browser failed" : "Done";
@@ -25,7 +25,7 @@ export const QUICK_START_GUIDELINES = [
25
25
  "Common first calls (first-call recipe): { args: [\"open\", \"<url>\"] } → { args: [\"snapshot\", \"-i\"] } → { args: [\"click\", \"@eN\"] } or { args: [\"fill\", \"@eN\", \"<text>\"] } using @refs and visible labels from that snapshot, then { args: [\"snapshot\", \"-i\"] } after navigation or DOM changes. On https://example.com/ the main link label is Learn more (use exact snapshot text, not guessed link copy).",
26
26
  "Locator-first clicks/fills and native select changes without hand-building argv: { semanticAction: { action: \"click\", locator: \"text\", value: \"Close\" } }, { semanticAction: { action: \"fill\", locator: \"label\", value: \"Email\", text: \"user@example.com\" } }, direct current targets such as { semanticAction: { action: \"fill\", selector: \"@e1\", text: \"prompt\" } }, or { semanticAction: { action: \"select\", selector: \"#flavor\", value: \"chocolate\" } }; add semanticAction.session when targeting a named upstream browser session; details.compiledSemanticAction shows the semantic target, while details.effectiveArgs may show a resolved current @ref for active-session role/name click/check/fill actions to avoid hidden duplicate matches; semanticAction does not expose uncheck while upstream find ... uncheck is not runtime-supported, so use raw uncheck with a stable selector or current ref; selector-not-found failures may append bounded click try-*-candidate next actions or, for fill misses with current editable refs, details.richInputRecovery with focus/click actions that do not copy fill text; stale-ref failures can return retry-semantic-action-after-stale-ref for compiled find actions when retry safety is provable.",
27
27
  `Common advanced calls: { args: ["batch"], stdin: "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }, { job: { steps: [{ action: "open", url: "https://example.com" }, { action: "assertText", text: "Example Domain" }, { action: "screenshot", path: ".dogfood/example.png" }] } }, { qa: { url: "https://example.com", expectedText: "Example Domain", screenshotPath: ".dogfood/qa-example.png" } } (example.com smoke only; elsewhere match exact visible text from snapshot -i), { electron: { action: "list", query: "code" } }, { electron: { action: "launch", appName: "Visual Studio Code", handoff: "snapshot" } }, { electron: { action: "probe" } }, { qa: { attached: true, expectedText: "Explorer" } }, { args: ["eval", "--stdin"], stdin: "document.title", outputPath: "logs/page-title.json" }, { args: ["auth", "save", "name", "--password-stdin"], stdin: "<password from user-approved secret source>" }, { args: ["--profile", "Default", "open", "https://example.com/account"], sessionMode: "fresh" }, and { args: ["open", "--enable", "react-devtools", "https://example.com"], sessionMode: "fresh" }. For app pages with a native dropdown, job steps can include { action: "select", selector: "#flavor", value: "chocolate" } before the dependent assertion; for locator-friendly pages, job click/fill steps can use semantic locator fields such as { action: "fill", locator: "role", role: "searchbox", name: "Search", text: "agent browser" }; for human-paced input, job type steps can use { action: "type", selector: "#prompt", text: "hello", delayMs: 20, press: "Enter" }; delayed typing is capped at 200 characters per step, and generated per-character rows are compacted in visible batch prose while full rows remain in details.batchSteps.`,
28
- "Constrained job navigation is explicit only: click (and select/submit flows that may navigate) does not prove the next page loaded; add assertUrl and/or assertText after navigation-prone steps before screenshot or later interactions. Example: { job: { steps: [{ action: \"open\", url: \"https://shop.example/checkout\" }, { action: \"fill\", selector: \"#email\", text: \"user@example.com\" }, { action: \"click\", selector: \"#continue\" }, { action: \"assertUrl\", url: \"**/shipping\" }, { action: \"assertText\", text: \"Shipping address\" }, { action: \"screenshot\", path: \".dogfood/shipping.png\" }] } }. Top-level click may add navigationSummary hints, but job never auto-inserts post-click asserts.",
28
+ "Constrained job navigation is explicit only: click (and select/submit flows that may navigate) does not prove the next page loaded; add assertUrl and/or assertText after navigation-prone steps before screenshot or later interactions. Keep jobs short around navigation, click, and rerender boundaries on dynamic React/product apps; avoid a whole checkout in one job. If a long job times out and details.timeoutPartialProgress shows a mutating incomplete step, inspect current page state and continue with a shorter job or single action instead of blindly retrying the mutating step. Example: { job: { steps: [{ action: \"open\", url: \"https://shop.example/checkout\" }, { action: \"fill\", selector: \"#email\", text: \"user@example.com\" }, { action: \"click\", selector: \"#continue\" }, { action: \"assertUrl\", url: \"**/shipping\" }, { action: \"assertText\", text: \"Shipping address\" }, { action: \"screenshot\", path: \".dogfood/shipping.png\" }] } }. Top-level click may add navigationSummary hints, but job never auto-inserts post-click asserts.",
29
29
  "High-value command reference: click <selector> --new-tab opens link-like targets in a new tab; select <selector> <value...> changes native dropdown values; scroll <dir> [px] --selector <sel>, wrapper-handled scroll <selector> <dir> [px|percent] targets nested scrollers, and wrapper-handled scroll to end/top targets document scrolling; download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [selector] [path] captures a page or element image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation; tap <selector> and swipe <direction> [distance] support iOS/provider touch flows.",
30
30
  "For artifact-producing commands, read the visible artifact block and details.artifactVerification before using files: check requested path, absolute path, existence, size bytes, artifact kind, optional mediaType, status, optional limitation, and verified/missing/pending/unverified counts. details.artifacts contains per-file metadata; record start rows are pending/openRecording until record stop writes the target. The wrapper creates parent directories for direct artifact paths and can save simple loopback HTTP(S) anchor downloads directly to the requested path before upstream download fallback. Browser close does not delete explicit saved files; if close reports details.artifactCleanup, use host file tools to remove paths listed in explicitArtifactPaths (when non-empty) after inspection. If close fails with details.promptGuard.reason=requested-artifacts-missing-before-close, save the exact required artifact path before closing. For annotated screenshots inside batch, put --annotate in top-level args (for example { args: [\"--annotate\", \"batch\"], stdin: \"[[\\\"screenshot\\\",\\\"/tmp/page.png\\\"]]\" }) rather than inside the screenshot step; if annotation labels crowd a dense page, use a scoped or non-annotated screenshot plus snapshot refs instead.",
31
31
  "When details.nextActions is present, prefer those exact native agent_browser follow-up payloads over prose guidance; they may include args, stdin, sessionMode, networkSourceLookup, safety notes, or artifactPath for saved files.",
@@ -54,14 +54,14 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
54
54
  "For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: \"tabs\" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close commands (`close`, `quit`, or `exit`) only close the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.",
55
55
  "For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
56
56
  "For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; dialog commands and eval snippets that look like alert/confirm/prompt/dialog triggers are shorter-bounded than normal browser calls, and timed-out dialog-like interactions may add inspect-dialog-after-timeout, dismiss-dialog-after-timeout, or recover-fresh-session-after-dialog-timeout nextActions. When --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.",
57
- "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.27.1, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort, must stay below the wrapper IPC budget (wait 30000 is intentionally blocked), and a successful payload like \"waited\":\"timeout\" means elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
57
+ "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.27.2, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort: use explicit --timeout or top-level timeoutMs for legitimately slow waits, and treat a successful payload like \"waited\":\"timeout\" as elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
58
58
  "For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
59
59
  "For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
60
60
  "For downloads, prefer download <selector> <path> when an element click should save a file; simple loopback anchor downloads are saved to the requested path when the wrapper can resolve an HTTP(S) href. Do not rely on click alone when you need the downloaded file on disk.",
61
61
  "On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, details.data.scrolled may be false/noMovement true and you should prefer scrollintoview <@ref> or target the actual scrollable region with scroll <selector> <dir> [px|percent]. For native selects, use select <selector> <value...> (or semanticAction/job select) instead of clicking option refs; for custom comboboxes, a click/semanticAction may only focus the field, so re-snapshot and fall back to type, press Enter/arrow keys, or visible option refs.",
62
62
  "When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
63
63
  "When using eval --stdin for extraction, pass the JavaScript through the native tool stdin field, not as an extra args token after --stdin, and return the value you want instead of relying on console.log as the primary result channel. Prefer plain expressions like ({ title: document.title }) or explicitly invoked functions like (() => ({ title: document.title }))(); use outputPath when the eval/get/snapshot data should be saved as a durable local file. If a function-shaped snippet returns {}, details.evalStdinHint may warn that the function was serialized instead of called. On file:// pages, when upstream JSON returns result: null for non-trivial stdin, details.evalResultWarning may append Eval result warning without failing the tool—treat that as inconclusive DOM verification. If get text on a CSS selector surfaces details.selectorTextVisibility or selectorTextVisibilityAll, prefer a visible @ref, a more specific selector, or the inspect-visible-text-candidates nextAction over hidden tab content.",
64
- "When details.pageChangeSummary is present, use changeType and summary as a compact signal for navigation, DOM mutation, confirmations, or artifacts; when nextActionIds is set, match those ids to entries in details.nextActions (or per-step nextActions inside batch) for concrete follow-up payloads instead of inferring from prose alone. If details.clickDispatch reports no trusted DOM event, refresh/inspect/retry the real click first; for static local fixtures only, an explicit eval --stdin programmatic .click() can exercise app handlers, but treat it as an untrusted scripted workaround and never use it to bypass stop-before-submit/order/purchase boundaries. If a no-navigation click surfaces details.overlayBlockers, inspect the fresh snapshot evidence before using a close/dismiss candidate nextAction; ordinary page chrome without dialog/alertdialog evidence should not trigger this diagnostic.",
64
+ "When details.pageChangeSummary is present, use changeType and summary as a compact signal for navigation, DOM mutation, confirmations, or artifacts; when nextActionIds is set, match those ids to entries in details.nextActions (or per-step nextActions inside batch) for concrete follow-up payloads instead of inferring from prose alone. If details.clickDispatch reports a click-dispatch miss, refresh/inspect/retry the real click first; for static local fixtures only, an explicit eval --stdin programmatic .click() can exercise app handlers, but treat it as an untrusted scripted workaround and never use it to bypass stop-before-submit/order/purchase boundaries. If a no-navigation click surfaces details.overlayBlockers, inspect the fresh snapshot evidence before using a close/dismiss candidate nextAction; ordinary page chrome without dialog/alertdialog evidence should not trigger this diagnostic.",
65
65
  "When commands save or spill files (screenshots, downloads, PDFs, traces, recordings, HAR, large snapshot spills), use the user's exact requested paths when given and treat paths as provisional until details.artifactVerification shows every row verified: branch on missingCount, pendingCount, unverifiedCount, per-entry state, and optional limitation before downstream file use or PASS/FAIL reporting.",
66
66
  "For evidence-only screenshots, QA captures, or other audit artifacts, save to an explicit path and branch on details.artifactVerification plus details.artifacts before reporting PASS/FAIL; do not require vision review of inline image attachments unless the user asked for visual inspection.",
67
67
  "Respect explicit user stop boundaries yourself: if the user says to stop before order/post/purchase/submit, do not click that final action. The wrapper does not infer broad business intent from prompt text; details.promptGuard is reserved for concrete artifact-before-close checks.",
@@ -101,7 +101,7 @@ export function buildSharedBrowserPlaybookGuidelines(options: { includeWebSearch
101
101
  /** Tier A: always-on tool promptGuidelines (keep small; Tier B lives in SHARED_BROWSER_PLAYBOOK_GUIDELINES and docs). */
102
102
  export const RUNTIME_PROMPT_GUIDELINES = [
103
103
  "Use agent_browser with exactly one input mode: args, semanticAction, job, qa, sourceLookup/networkSourceLookup, or electron. stdin only for batch/eval/auth or wrapper batch; electron rejects stdin. Do not pass --json in args; agent_browser injects it.",
104
- "For agent_browser, the common flow is open, snapshot -i, use current @refs or semanticAction, then re-snapshot after navigation/scroll/rerender/DOM change. Batch same-snapshot forms unless they may submit/navigate/rerender. Respect explicit stop boundaries: stop before order/post/purchase/submit.",
104
+ "For agent_browser, the common flow is open, snapshot -i, use current @refs or semanticAction, then re-snapshot after navigation/scroll/rerender/DOM change. Batch same-snapshot forms unless they may submit/navigate/rerender. Keep job flows short around navigation/click/rerender boundaries on dynamic apps. Respect explicit stop boundaries: stop before order/post/purchase/submit.",
105
105
  "Use agent_browser top-level sessionMode=fresh for launch-scoped flags; never put --session-mode in args. For signed-in/account-specific content, use requested/configured profiles, never assume --profile Default; on profile failures, run profiles/doctor and tell the user what to configure. Use --executable-path for configured Chromium. Profile content is model-visible.",
106
106
  "For agent_browser artifacts, save the exact user path and verify details.artifactVerification/details.artifacts before claiming success. If close is blocked by details.promptGuard, save the required artifact first. record stop needs ffmpeg; close does not delete saved files; waited:timeout is not proof.",
107
107
  "When agent_browser details.nextActions is present, prefer exact payloads over prose/guessed selectors. For dense snapshots, check Omitted high-value controls/details.data.highValueControlRefIds. For dashboards, verify scroll with screenshot/snapshot; if nothing moved, target the real scroll region.",
@@ -179,7 +179,7 @@ export function buildAgentBrowserNextActions(options: {
179
179
  args: ["wait", "--download", artifact.path],
180
180
  id: "wait-for-download",
181
181
  reason: "Upstream reported a download path, but the wrapper did not verify the file on disk.",
182
- safety: "Use a bounded wait timeout that stays below the native wrapper IPC budget.",
182
+ safety: "Use an explicit wait timeout; if you set top-level timeoutMs, keep it above the wait duration plus a small grace window.",
183
183
  }));
184
184
  } else {
185
185
  actions.push(buildArtifactVerificationAction(artifact));
@@ -200,7 +200,7 @@ export function buildAgentBrowserNextActions(options: {
200
200
  args: ["wait", "--download", artifact.path],
201
201
  id: "wait-for-download",
202
202
  reason: "The requested download artifact was not found on disk after upstream reported completion.",
203
- safety: "Use a bounded wait timeout that stays below the native wrapper IPC budget.",
203
+ safety: "Use an explicit wait timeout; if you set top-level timeoutMs, keep it above the wait duration plus a small grace window.",
204
204
  }));
205
205
  } else {
206
206
  actions.push(buildArtifactVerificationAction(artifact));
@@ -241,7 +241,7 @@ export function buildAgentBrowserNextActions(options: {
241
241
  args: retryPath ? ["wait", "--download", retryPath] : ["wait", "--download"],
242
242
  id: "wait-for-download",
243
243
  reason: "Wait for the browser download and let the wrapper verify saved-file metadata.",
244
- safety: "Use a bounded wait timeout that stays below the native wrapper IPC budget.",
244
+ safety: "Use an explicit wait timeout; if you set top-level timeoutMs, keep it above the wait duration plus a small grace window.",
245
245
  }));
246
246
  }
247
247
  break;
@@ -662,7 +662,10 @@ type AgentBrowserWebSearchParamsInput = {
662
662
  searchType?: ExaSearchType;
663
663
  };
664
664
 
665
- export function createAgentBrowserWebSearchTool(configState: AgentBrowserConfigState) {
665
+ export function createAgentBrowserWebSearchTool(
666
+ configState: AgentBrowserConfigState,
667
+ options: { loadConfigState?: (ctx: { cwd: string; isProjectTrusted?: () => boolean }) => AgentBrowserConfigState } = {},
668
+ ) {
666
669
  const requestGate = new WebSearchRequestGate();
667
670
  return {
668
671
  name: AGENT_BROWSER_WEB_SEARCH_TOOL_NAME,
@@ -677,12 +680,16 @@ export function createAgentBrowserWebSearchTool(configState: AgentBrowserConfigS
677
680
  "After using agent_browser_web_search, cite result URLs in the final answer when web evidence informed the answer.",
678
681
  ],
679
682
  parameters: AgentBrowserWebSearchParams,
680
- async execute(_toolCallId: string, params: AgentBrowserWebSearchParamsInput, signal?: AbortSignal) {
681
- if (!configState.webSearchEnabled) {
683
+ async execute(_toolCallId: string, params: AgentBrowserWebSearchParamsInput, signal?: AbortSignal, _onUpdate?: unknown, ctx?: { cwd: string; isProjectTrusted?: () => boolean }) {
684
+ const runtimeConfigState = ctx ? options.loadConfigState?.(ctx) ?? configState : configState;
685
+ if (runtimeConfigState.errors.length > 0) {
686
+ throw new Error(`agent_browser_web_search config is invalid: ${runtimeConfigState.errors.join("; ")}`);
687
+ }
688
+ if (!runtimeConfigState.webSearchEnabled) {
682
689
  throw new Error("agent_browser_web_search is disabled by pi-agent-browser-native config.");
683
690
  }
684
691
  const requestedProvider = params.provider ?? "auto";
685
- const resolved = await resolvePreferredWebSearchCredential(configState, { provider: requestedProvider, signal });
692
+ const resolved = await resolvePreferredWebSearchCredential(runtimeConfigState, { provider: requestedProvider, signal });
686
693
  if (!resolved) throw new Error(buildMissingCredentialError(requestedProvider));
687
694
  const query = params.query.trim();
688
695
  if (!query) throw new Error("query must not be blank");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-agent-browser-native",
3
- "version": "0.2.46",
3
+ "version": "0.2.48",
4
4
  "description": "pi extension that exposes agent-browser as a native tool for browser automation",
5
5
  "type": "module",
6
6
  "author": "Mitch Fultz (https://github.com/fitchmultz)",
@@ -62,9 +62,9 @@
62
62
  "typebox": "*"
63
63
  },
64
64
  "devDependencies": {
65
- "@earendil-works/pi-ai": "^0.78.1",
66
- "@earendil-works/pi-coding-agent": "^0.78.1",
67
- "@earendil-works/pi-tui": "^0.78.1",
65
+ "@earendil-works/pi-ai": "0.79.1",
66
+ "@earendil-works/pi-coding-agent": "0.79.1",
67
+ "@earendil-works/pi-tui": "0.79.1",
68
68
  "@types/node": "^25.6.1",
69
69
  "tsx": "^4.21.0",
70
70
  "typebox": "^1.1.38",
@@ -14,8 +14,8 @@ export const COMMAND_REFERENCE_BASELINE_BLOCK_IDS = Object.freeze(["upstream-bas
14
14
 
15
15
  const sourceEvidence = Object.freeze({
16
16
  repository: "vercel-labs/agent-browser",
17
- upstreamHead: "90050f2913159875e2c3719e424746396ccb3cbf",
18
- upstreamPackageVersion: "0.27.1",
17
+ upstreamHead: "5185339ca3fdab9848e11b8ec676eecfdec3733f",
18
+ upstreamPackageVersion: "0.27.2",
19
19
  inspectedSources: Object.freeze([
20
20
  "agent-browser --version",
21
21
  "agent-browser --help",
@@ -609,6 +609,9 @@ const inventorySections = Object.freeze([
609
609
  "AWS_PROFILE",
610
610
  "AWS_ACCESS_KEY_ID",
611
611
  "AWS_SECRET_ACCESS_KEY",
612
+ "AWS_SESSION_TOKEN",
613
+ "AWS_REGION",
614
+ "AWS_DEFAULT_REGION",
612
615
  ],
613
616
  [
614
617
  root("--profile <name|path>"),
@@ -706,7 +709,7 @@ const inventorySections = Object.freeze([
706
709
  ]);
707
710
 
708
711
  export const CAPABILITY_BASELINE = Object.freeze({
709
- targetVersion: "0.27.1",
712
+ targetVersion: "0.27.2",
710
713
  sourceEvidence,
711
714
  helpCommands,
712
715
  inventorySections,
@@ -22,7 +22,7 @@ const PACKAGE_NAME = "pi-agent-browser-native";
22
22
  const REPO_URL_FRAGMENT = "github.com/fitchmultz/pi-agent-browser-native";
23
23
  const EXTENSION_ENTRYPOINT = "extensions/agent-browser/index.ts";
24
24
  const EXPECTED_VERSION = CAPABILITY_BASELINE.targetVersion;
25
- const RECOMMENDED_PI_VERSION = "0.78.1";
25
+ const MINIMUM_PI_VERSION = "0.79.0";
26
26
  const DEFAULT_AGENT_DIR = resolve(homedir(), ".pi/agent");
27
27
  const THIS_PACKAGE_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "..");
28
28
 
@@ -67,7 +67,7 @@ Options:
67
67
  Checks:
68
68
  1. agent-browser is installed on PATH.
69
69
  2. agent-browser --version matches the package capability baseline.
70
- 3. pi --version is at least the recommended Pi floor for this release.
70
+ 3. pi --version is at least the minimum Pi runtime version for this release.
71
71
  4. Pi settings and repo-local autoload locations do not point at multiple active pi-agent-browser-native sources.
72
72
 
73
73
  Examples:
@@ -302,14 +302,14 @@ async function checkPiVersion({ runPi }) {
302
302
  try {
303
303
  const rawOutput = await runPi(["--version"]);
304
304
  const version = normalizePiVersion(rawOutput);
305
- const supported = versionAtLeast(version, RECOMMENDED_PI_VERSION);
305
+ const supported = versionAtLeast(version, MINIMUM_PI_VERSION);
306
306
  if (supported === false) {
307
307
  return {
308
- status: "warn",
309
- title: `Pi ${RECOMMENDED_PI_VERSION} or newer is recommended; found ${version || "<empty>"}.`,
308
+ status: "fail",
309
+ title: `Pi ${MINIMUM_PI_VERSION} or newer is required; found ${version || "<empty>"}.`,
310
310
  lines: [
311
- "This package does not hard-pin Pi 0.78.1, but this release was audited against Pi 0.78.1 extension/package behavior.",
312
- "Update Pi before release validation or lifecycle debugging if you see tool routing, /reload, exact-session, or package-install differences.",
311
+ "This release enforces the Pi 0.79.0 runtime floor through the read-only doctor and release/package validation because it depends on Project Trust, package loading, session lifecycle, TUI rendering, and tool_result patch behavior from that baseline.",
312
+ "Update Pi before using this package or running lifecycle/package validation.",
313
313
  ],
314
314
  };
315
315
  }
@@ -317,17 +317,17 @@ async function checkPiVersion({ runPi }) {
317
317
  return {
318
318
  status: "warn",
319
319
  title: `Could not parse pi --version output: ${version || "<empty>"}.`,
320
- lines: [`Pi ${RECOMMENDED_PI_VERSION} or newer is recommended for this release's validation baseline.`],
320
+ lines: [`Pi ${MINIMUM_PI_VERSION} or newer is required for this release; run this doctor from the same shell that launches Pi so the setup gate can verify the host runtime.`],
321
321
  };
322
322
  }
323
- return { status: "pass", title: `Pi version is within the recommended baseline: ${version}`, lines: [] };
323
+ return { status: "pass", title: `Pi version satisfies the minimum runtime floor: ${version}`, lines: [] };
324
324
  } catch (error) {
325
325
  const code = error && typeof error === "object" ? error.code : undefined;
326
326
  return {
327
327
  status: "warn",
328
328
  title: "Could not inspect pi --version.",
329
329
  lines: [
330
- `Pi ${RECOMMENDED_PI_VERSION} or newer is recommended for this release's validation baseline, but it is not hard-pinned as a runtime requirement.`,
330
+ `Pi ${MINIMUM_PI_VERSION} or newer is required for this release; run this doctor from the same shell that launches Pi so the setup gate can verify the host runtime.`,
331
331
  "Make sure the same shell that launches pi can run `pi --version` when debugging lifecycle or package-install behavior.",
332
332
  code && code !== "ENOENT" ? `Spawn error: ${String(code)}` : undefined,
333
333
  ].filter(Boolean),
@@ -392,7 +392,7 @@ async function checkPiSources({ cwd, agentDir, settingsPaths, readText, pathExis
392
392
  ...sources.map((source) => `- ${source.source} from ${source.location}`),
393
393
  "Keep exactly one active source:",
394
394
  "- for normal use: keep `pi install npm:pi-agent-browser-native` and remove/disable checkout paths from Pi settings",
395
- "- for temporary package or checkout trials: use `pi --no-extensions -e <source>` so configured sources are bypassed",
395
+ "- for temporary package or checkout trials: use `pi --approve --no-extensions -e <source>` when you intentionally trust the current project, or omit `--approve` to let Pi prompt in interactive mode",
396
396
  "- for configured-source lifecycle validation: keep exactly one checkout or package source, then launch plain `pi`",
397
397
  ],
398
398
  warnings,
@@ -434,6 +434,7 @@ export async function evaluateDoctor(options = {}) {
434
434
 
435
435
  const piVersionCheck = await checkPiVersion({ runPi });
436
436
  checks.push(piVersionCheck);
437
+ if (piVersionCheck.status === "fail") failures.push(piVersionCheck);
437
438
 
438
439
  if (!options.skipSourceCheck) {
439
440
  const sourceCheck = await checkPiSources({ cwd, agentDir, settingsPaths, readText, pathExists });
@@ -71,7 +71,7 @@ $PiInstallStderr = Join-Path $PackDir "pi-install.stderr.txt"
71
71
  if ($PackedNodeInstallExit -eq 0) {
72
72
  Push-Location $PiProject
73
73
  $env:PI_OFFLINE = "1"
74
- & $PiCli install -l ".\node_modules\$PackageName" >$PiInstallStdout 2>$PiInstallStderr
74
+ & $PiCli install -l --approve ".\node_modules\$PackageName" >$PiInstallStdout 2>$PiInstallStderr
75
75
  $PiInstallExit = $LASTEXITCODE
76
76
  Remove-Item Env:\PI_OFFLINE -ErrorAction SilentlyContinue
77
77
  Pop-Location
@@ -87,7 +87,7 @@ $PiListStdout = Join-Path $PackDir "pi-list.stdout.txt"
87
87
  $PiListStderr = Join-Path $PackDir "pi-list.stderr.txt"
88
88
  Push-Location $PiProject
89
89
  $env:PI_OFFLINE = "1"
90
- & $PiCli list >$PiListStdout 2>$PiListStderr
90
+ & $PiCli list --approve >$PiListStdout 2>$PiListStderr
91
91
  $PiListExit = $LASTEXITCODE
92
92
  Remove-Item Env:\PI_OFFLINE -ErrorAction SilentlyContinue
93
93
  Pop-Location
@@ -93,6 +93,10 @@ function section(text, name) {
93
93
  return (endIndex === -1 ? text.slice(contentStart) : text.slice(contentStart, endIndex)).replace(/^\r?\n/, "").replace(/\r?\n$/, "");
94
94
  }
95
95
 
96
+ function escapeRegExp(text) {
97
+ return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
98
+ }
99
+
96
100
  function marker(text, name) {
97
101
  return text.match(new RegExp(`^${name}=(.*)$`, "m"))?.[1]?.trim() ?? "";
98
102
  }
@@ -288,11 +292,11 @@ export function buildPlatformBuildCommand(targetName, packageName = "pi-agent-br
288
292
  lines.push(`echo "PLATFORM_PACKED_NODE_INSTALL_EXIT=$PACKED_NODE_INSTALL_EXIT"`);
289
293
  lines.push(`echo "--- PACKED_NODE_INSTALL_STDOUT START ---"; cat "$PACK_DIR/packed-node-install.stdout.txt" 2>/dev/null || true; echo "--- PACKED_NODE_INSTALL_STDOUT END ---"`);
290
294
  lines.push(`echo "--- PACKED_NODE_INSTALL_STDERR START ---"; cat "$PACK_DIR/packed-node-install.stderr.txt" 2>/dev/null || true; echo "--- PACKED_NODE_INSTALL_STDERR END ---"`);
291
- lines.push(`if [ "$PACKED_NODE_INSTALL_EXIT" -eq 0 ] && [ -n "$PI_CLI" ]; then (cd "$PI_PROJECT" && PI_OFFLINE=1 "$PI_CLI" install -l ./node_modules/${packageName} >"$PACK_DIR/pi-install.stdout.txt" 2>"$PACK_DIR/pi-install.stderr.txt"); PI_INSTALL_EXIT=$?; else echo "missing pi cli or packed install" >"$PACK_DIR/pi-install.stderr.txt"; PI_INSTALL_EXIT=1; fi`);
295
+ lines.push(`if [ "$PACKED_NODE_INSTALL_EXIT" -eq 0 ] && [ -n "$PI_CLI" ]; then (cd "$PI_PROJECT" && PI_OFFLINE=1 "$PI_CLI" install -l --approve ./node_modules/${packageName} >"$PACK_DIR/pi-install.stdout.txt" 2>"$PACK_DIR/pi-install.stderr.txt"); PI_INSTALL_EXIT=$?; else echo "missing pi cli or packed install" >"$PACK_DIR/pi-install.stderr.txt"; PI_INSTALL_EXIT=1; fi`);
292
296
  lines.push(`echo "PLATFORM_PI_INSTALL_EXIT=$PI_INSTALL_EXIT"`);
293
297
  lines.push(`echo "--- PI_INSTALL_STDOUT START ---"; cat "$PACK_DIR/pi-install.stdout.txt" 2>/dev/null || true; echo "--- PI_INSTALL_STDOUT END ---"`);
294
298
  lines.push(`echo "--- PI_INSTALL_STDERR START ---"; cat "$PACK_DIR/pi-install.stderr.txt" 2>/dev/null || true; echo "--- PI_INSTALL_STDERR END ---"`);
295
- lines.push(`if [ -n "$PI_CLI" ]; then (cd "$PI_PROJECT" && PI_OFFLINE=1 "$PI_CLI" list >"$PACK_DIR/pi-list.stdout.txt" 2>"$PACK_DIR/pi-list.stderr.txt"); PI_LIST_EXIT=$?; else echo "missing pi cli" >"$PACK_DIR/pi-list.stderr.txt"; PI_LIST_EXIT=1; fi`);
299
+ lines.push(`if [ -n "$PI_CLI" ]; then (cd "$PI_PROJECT" && PI_OFFLINE=1 "$PI_CLI" list --approve >"$PACK_DIR/pi-list.stdout.txt" 2>"$PACK_DIR/pi-list.stderr.txt"); PI_LIST_EXIT=$?; else echo "missing pi cli" >"$PACK_DIR/pi-list.stderr.txt"; PI_LIST_EXIT=1; fi`);
296
300
  lines.push(`echo "PLATFORM_PI_LIST_EXIT=$PI_LIST_EXIT"`);
297
301
  lines.push(`echo "--- PI_LIST_STDOUT START ---"; cat "$PACK_DIR/pi-list.stdout.txt" 2>/dev/null || true; echo "--- PI_LIST_STDOUT END ---"`);
298
302
  lines.push(`echo "--- PI_LIST_STDERR START ---"; cat "$PACK_DIR/pi-list.stderr.txt" 2>/dev/null || true; echo "--- PI_LIST_STDERR END ---"`);
@@ -459,7 +463,7 @@ async function runPlatformBuildSuite(config, targetName, suiteName, leaseSession
459
463
  { id: "npm-pack", fn: () => /PLATFORM_NPM_PACK_EXIT=0/.test(stdout) && marker(stdout, "PLATFORM_PACKED_TARBALL").length > 0 },
460
464
  { id: "packed-node-install", fn: () => /PLATFORM_PACKED_NODE_INSTALL_EXIT=0/.test(stdout) },
461
465
  { id: "pi-install-local-package", fn: () => /PLATFORM_PI_INSTALL_EXIT=0/.test(stdout) },
462
- { id: "pi-list-local-package", fn: () => /PLATFORM_PI_LIST_EXIT=0/.test(stdout) && listOutput.includes(config.packageName) },
466
+ { id: "pi-list-local-package", fn: () => /PLATFORM_PI_LIST_EXIT=0/.test(stdout) && new RegExp(`Project packages:[\\s\\S]*${escapeRegExp(config.packageName)}`).test(listOutput) },
463
467
  { id: "no-source-extension-shortcut", fn: () => !/\bpi\s+(?:-e|--extension)\s+\./.test(stdout) },
464
468
  { id: "no-secret-artifacts", fn: () => secretViolations.length === 0, error: secretViolations.join(", ") },
465
469
  ];
@@ -39,7 +39,7 @@ Targets:
39
39
  macos, ubuntu, windows-native
40
40
 
41
41
  Suites:
42
- platform-build npm ci, npm run verify -- platform-target, npm pack, packed pi install, pi list
42
+ platform-build npm ci, npm run verify -- platform-target, npm pack, packed pi install --approve, pi list --approve
43
43
  browser-dogfood-smoke model-free native agent_browser smoke with real agent-browser/browser
44
44
 
45
45
  Options:
@@ -62,7 +62,7 @@ Environment:
62
62
  PLATFORM_SMOKE_MAC_USER macOS SSH user; default $USER
63
63
  PLATFORM_SMOKE_MAC_WORK_ROOT macOS Crabbox work root
64
64
  PLATFORM_SMOKE_MAC_PORT macOS SSH port; default 22
65
- PLATFORM_SMOKE_UBUNTU_IMAGE Ubuntu local-container image; default pi-agent-browser-native-platform:node24-agent-browser0.27.1
65
+ PLATFORM_SMOKE_UBUNTU_IMAGE Ubuntu local-container image; default pi-agent-browser-native-platform:node24-agent-browser0.27.2
66
66
  PLATFORM_SMOKE_WINDOWS_VM Parallels Windows template VM
67
67
  PLATFORM_SMOKE_WINDOWS_SNAPSHOT Parallels snapshot name
68
68
  PLATFORM_SMOKE_WINDOWS_USER Windows SSH user