npm - pi-agent-browser-native - Versions diffs - 0.2.44 → 0.2.45 - Mend

pi-agent-browser-native 0.2.44 → 0.2.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/extensions/agent-browser/lib/playbook.ts CHANGED Viewed

@@ -16,18 +16,18 @@ export const TOOL_PROMPT_GUIDELINES_PREFIX = [
 ] as const;
 export function buildInstalledDocsGuideline(paths: { readmePath: string; commandReferencePath: string; toolContractPath: string }): string {
-	return `For deeper guidance without bloating context, read installed package docs on demand: ${paths.readmePath} for setup/external dependencies, ${paths.commandReferencePath} for command workflows, and ${paths.toolContractPath} for result/details contracts. Do not load the full command reference unless needed; prefer targeted sections.`;
+	return `For deeper agent_browser guidance without bloating context, read installed package docs on demand: ${paths.readmePath} for setup/external dependencies, ${paths.commandReferencePath} for command workflows, and ${paths.toolContractPath} for result/details contracts. Do not load the full command reference unless needed; prefer targeted sections.`;
 }
 export const QUICK_START_GUIDELINES = [
-	`Quick start mental model: use exactly one of args (exact agent-browser CLI args after the binary), semanticAction (a thin shorthand compiled to find argv for locator actions or select argv for native dropdowns), job (a constrained short-workflow schema compiled to batch), qa (a lightweight QA preset built on job/batch, including qa.attached for current sessions), electron (desktop Electron list/launch/status/cleanup/probe), or the experimental sourceLookup / networkSourceLookup helpers (candidates only; each compiled to batch); stdin is only for batch, eval --stdin, auth save --password-stdin, and wrapper-generated batch stdin from job, qa, sourceLookup, or networkSourceLookup, and is rejected with electron; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new launch-scoped flags (${LAUNCH_SCOPED_FLAG_LABEL}) to apply. Do not pass --json in args; the wrapper injects it.`,
+	`Quick start mental model: use exactly one of args (exact agent-browser CLI args after the binary), semanticAction (a thin shorthand compiled to find argv for locator actions, direct selector/ref click/check/fill, or select argv for native dropdowns), job (a constrained short-workflow schema compiled to batch --bail by default; set failFast:false only when later diagnostics should continue after a failed step), qa (a lightweight fail-fast QA preset built on batch --bail with bounded visible expected-text checks, including qa.attached for current sessions), electron (desktop Electron list/launch/status/cleanup/probe), or the experimental sourceLookup / networkSourceLookup helpers (candidates only; each compiled to batch); stdin is only for batch, eval --stdin, auth save --password-stdin, and wrapper-generated batch stdin from job, qa, sourceLookup, or networkSourceLookup, and is rejected with electron; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new launch-scoped flags (${LAUNCH_SCOPED_FLAG_LABEL}) to apply. Use outputPath for durable eval/get/snapshot captures. Do not pass --json in args; the wrapper injects it.`,
 	"There is no first-class reusable named browser recipe runtime above top-level job, the qa preset, and raw batch stdin; keep recurring flows in documentation examples or those inputs (closed RQ-0068; see docs/ARCHITECTURE.md#no-reusable-recipe-layer-yet).",
 	"Common first calls (first-call recipe): { args: [\"open\", \"<url>\"] } → { args: [\"snapshot\", \"-i\"] } → { args: [\"click\", \"@eN\"] } or { args: [\"fill\", \"@eN\", \"<text>\"] } using @refs and visible labels from that snapshot, then { args: [\"snapshot\", \"-i\"] } after navigation or DOM changes. On https://example.com/ the main link label is Learn more (use exact snapshot text, not guessed link copy).",
-	"Locator-first clicks/fills and native select changes without hand-building argv: { semanticAction: { action: \"click\", locator: \"text\", value: \"Close\" } }, { semanticAction: { action: \"fill\", locator: \"label\", value: \"Email\", text: \"user@example.com\" } }, or { semanticAction: { action: \"select\", selector: \"#flavor\", value: \"chocolate\" } }; add semanticAction.session when targeting a named upstream browser session; details.compiledSemanticAction shows the semantic target, while details.effectiveArgs may show a resolved current @ref for active-session role/name click/check/fill actions to avoid hidden duplicate matches; semanticAction does not expose uncheck while upstream find ... uncheck is not runtime-supported, so use raw uncheck with a stable selector or current ref; selector-not-found failures may append bounded click try-*-candidate next actions or, for fill misses with current editable refs, details.richInputRecovery with focus/click actions that do not copy fill text; stale-ref failures can return retry-semantic-action-after-stale-ref for compiled find actions when retry safety is provable.",
-	`Common advanced calls: { args: ["batch"], stdin: "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }, { job: { steps: [{ action: "open", url: "https://example.com" }, { action: "assertText", text: "Example Domain" }, { action: "screenshot", path: ".dogfood/example.png" }] } }, { qa: { url: "https://example.com", expectedText: "Example Domain", screenshotPath: ".dogfood/qa-example.png" } } (example.com smoke only; elsewhere match exact visible text from snapshot -i), { electron: { action: "list", query: "code" } }, { electron: { action: "launch", appName: "Visual Studio Code", handoff: "snapshot" } }, { electron: { action: "probe" } }, { qa: { attached: true, expectedText: "Explorer" } }, { args: ["eval", "--stdin"], stdin: "document.title" }, { args: ["auth", "save", "name", "--password-stdin"], stdin: "<password from user-approved secret source>" }, { args: ["--profile", "Default", "open", "https://example.com/account"], sessionMode: "fresh" }, and { args: ["open", "--enable", "react-devtools", "https://example.com"], sessionMode: "fresh" }. For app pages with a native dropdown, job steps can include { action: "select", selector: "#flavor", value: "chocolate" } before the dependent assertion.`,
+	"Locator-first clicks/fills and native select changes without hand-building argv: { semanticAction: { action: \"click\", locator: \"text\", value: \"Close\" } }, { semanticAction: { action: \"fill\", locator: \"label\", value: \"Email\", text: \"user@example.com\" } }, direct current targets such as { semanticAction: { action: \"fill\", selector: \"@e1\", text: \"prompt\" } }, or { semanticAction: { action: \"select\", selector: \"#flavor\", value: \"chocolate\" } }; add semanticAction.session when targeting a named upstream browser session; details.compiledSemanticAction shows the semantic target, while details.effectiveArgs may show a resolved current @ref for active-session role/name click/check/fill actions to avoid hidden duplicate matches; semanticAction does not expose uncheck while upstream find ... uncheck is not runtime-supported, so use raw uncheck with a stable selector or current ref; selector-not-found failures may append bounded click try-*-candidate next actions or, for fill misses with current editable refs, details.richInputRecovery with focus/click actions that do not copy fill text; stale-ref failures can return retry-semantic-action-after-stale-ref for compiled find actions when retry safety is provable.",
+	`Common advanced calls: { args: ["batch"], stdin: "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }, { job: { steps: [{ action: "open", url: "https://example.com" }, { action: "assertText", text: "Example Domain" }, { action: "screenshot", path: ".dogfood/example.png" }] } }, { qa: { url: "https://example.com", expectedText: "Example Domain", screenshotPath: ".dogfood/qa-example.png" } } (example.com smoke only; elsewhere match exact visible text from snapshot -i), { electron: { action: "list", query: "code" } }, { electron: { action: "launch", appName: "Visual Studio Code", handoff: "snapshot" } }, { electron: { action: "probe" } }, { qa: { attached: true, expectedText: "Explorer" } }, { args: ["eval", "--stdin"], stdin: "document.title", outputPath: "logs/page-title.json" }, { args: ["auth", "save", "name", "--password-stdin"], stdin: "<password from user-approved secret source>" }, { args: ["--profile", "Default", "open", "https://example.com/account"], sessionMode: "fresh" }, and { args: ["open", "--enable", "react-devtools", "https://example.com"], sessionMode: "fresh" }. For app pages with a native dropdown, job steps can include { action: "select", selector: "#flavor", value: "chocolate" } before the dependent assertion; for locator-friendly pages, job click/fill steps can use semantic locator fields such as { action: "fill", locator: "role", role: "searchbox", name: "Search", text: "agent browser" }; for human-paced input, job type steps can use { action: "type", selector: "#prompt", text: "hello", delayMs: 20, press: "Enter" }; delayed typing is capped at 200 characters per step, and generated per-character rows are compacted in visible batch prose while full rows remain in details.batchSteps.`,
 	"Constrained job navigation is explicit only: click (and select/submit flows that may navigate) does not prove the next page loaded; add assertUrl and/or assertText after navigation-prone steps before screenshot or later interactions. Example: { job: { steps: [{ action: \"open\", url: \"https://shop.example/checkout\" }, { action: \"fill\", selector: \"#email\", text: \"user@example.com\" }, { action: \"click\", selector: \"#continue\" }, { action: \"assertUrl\", url: \"**/shipping\" }, { action: \"assertText\", text: \"Shipping address\" }, { action: \"screenshot\", path: \".dogfood/shipping.png\" }] } }. Top-level click may add navigationSummary hints, but job never auto-inserts post-click asserts.",
-	"High-value command reference: click <selector> --new-tab opens link-like targets in a new tab; select <selector> <value...> changes native dropdown values; scroll <dir> [px] --selector <sel> targets nested scrollers; download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [selector] [path] captures a page or element image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation; tap <selector> and swipe <direction> [distance] support iOS/provider touch flows.",
-	"For artifact-producing commands, read the visible artifact block and details.artifactVerification before using files: check requested path, absolute path, existence, size bytes, artifact kind, optional mediaType, status, optional limitation, and verified/missing/pending/unverified counts. details.artifacts contains per-file metadata. Browser close does not delete explicit saved files; if close reports details.artifactCleanup, use host file tools to remove paths listed in explicitArtifactPaths (when non-empty) after inspection. If close fails with details.promptGuard.reason=requested-artifacts-missing-before-close, save the exact required artifact path before closing. For annotated screenshots inside batch, put --annotate in top-level args (for example { args: [\"--annotate\", \"batch\"], stdin: \"[[\\\"screenshot\\\",\\\"/tmp/page.png\\\"]]\" }) rather than inside the screenshot step.",
+	"High-value command reference: click <selector> --new-tab opens link-like targets in a new tab; select <selector> <value...> changes native dropdown values; scroll <dir> [px] --selector <sel>, wrapper-handled scroll <selector> <dir> [px|percent] targets nested scrollers, and wrapper-handled scroll to end/top targets document scrolling; download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [selector] [path] captures a page or element image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation; tap <selector> and swipe <direction> [distance] support iOS/provider touch flows.",
+	"For artifact-producing commands, read the visible artifact block and details.artifactVerification before using files: check requested path, absolute path, existence, size bytes, artifact kind, optional mediaType, status, optional limitation, and verified/missing/pending/unverified counts. details.artifacts contains per-file metadata; record start rows are pending/openRecording until record stop writes the target. The wrapper creates parent directories for direct artifact paths and can save simple loopback HTTP(S) anchor downloads directly to the requested path before upstream download fallback. Browser close does not delete explicit saved files; if close reports details.artifactCleanup, use host file tools to remove paths listed in explicitArtifactPaths (when non-empty) after inspection. If close fails with details.promptGuard.reason=requested-artifacts-missing-before-close, save the exact required artifact path before closing. For annotated screenshots inside batch, put --annotate in top-level args (for example { args: [\"--annotate\", \"batch\"], stdin: \"[[\\\"screenshot\\\",\\\"/tmp/page.png\\\"]]\" }) rather than inside the screenshot step; if annotation labels crowd a dense page, use a scoped or non-annotated screenshot plus snapshot refs instead.",
 	"When details.nextActions is present, prefer those exact native agent_browser follow-up payloads over prose guidance; they may include args, stdin, sessionMode, networkSourceLookup, safety notes, or artifactPath for saved files.",
 ] as const;
@@ -36,7 +36,7 @@ export const WEB_SEARCH_PROMPT_GUIDELINE =
 export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
-	"Standard workflow: open the page, snapshot -i, interact using current @refs from that snapshot, and re-snapshot after navigation, scrolling, rerendering, or other major DOM changes because refs are page-scoped; the wrapper fails mutation-prone stale/recycled refs before upstream can silently target a different current-page element.",
+	"Standard workflow: open the page, snapshot -i, interact using current @refs from that snapshot, and re-snapshot after navigation, scrolling, rerendering, or other major DOM changes because refs are page-scoped; the wrapper fails mutation-prone stale/recycled refs before upstream can silently target a different current-page element. On dense pages, use wrapper-side snapshot -i --search <text> or snapshot -i --filter role=<role> to render matching refs while preserving the full ref map in details.refSnapshot, add snapshot --viewport when scroll position or above/below-fold context matters, and add snapshot --diff when a quick before/after ref-map delta would prevent reading a full spill file.",
 	"For ordinary forms from one snapshot, batch multiple fill @refs before the submit/click step to avoid serial tool calls; if a fill may autosubmit, navigate, or rerender later fields, split the flow and refresh refs first.",
 	"Snapshot choice: prefer snapshot -i for routine clicks/fills (interactive @refs, main-content-first). Use snapshot --compact when you need a denser same-page tree without full spill; use full snapshot (no -i) only when you need the complete accessibility tree. Re-snapshot after navigation or major DOM changes. When snapshot -i compacts because the tree is oversized, scan visible output for Omitted high-value controls and optional details.data.highValueControlRefIds before opening the spill file: those list bounded searchboxes, textboxes, comboboxes, buttons, tabs, checkboxes, radios, options, and menuitems that did not fit the key/other ref previews.",
 	"When a visible text or accessible-name target should survive ref churn, prefer find locators such as role, text, label, placeholder, alt, title, or testid with the intended action instead of guessing a CSS selector.",
@@ -50,21 +50,21 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
 	"For first-navigation setup, use open without a URL plus network route --resource-type <csv>, cookies set --curl <file>, or --init-script/--enable before navigate/opening the target page.",
 	"For stateful browser context work, prefer purpose-specific page actions before dumping browser data: use auth save --password-stdin with the tool stdin field for credentials, auth list/show/delete/remove for local auth-profile maintenance, auth login when you need the browser to fill a saved profile, state save/load for portable test state, state list/show/rename/clear/clear -a/clean for saved-state lifecycle cleanup, cookies get/set/clear and storage local|session only when the task needs those values, and expect cookie/storage/auth/state summaries to redact credential-like fields while allowing benign primitive storage values when useful for local QA.",
 	"For batch chains that touch cookies, storage, auth, or other secret-bearing commands, use details.batchSteps for per-step artifacts, categories, spill paths, and full structured errors; top-level details.data on batch is only a compact redacted step matrix (success, argv-redacted command, redacted result or scrubbed error text) built from the same presentation rules as standalone calls.",
-	"For non-core families, pass current upstream commands through the native tool directly: network route/requests/har (including request filters like --type/--method/--status), diff snapshot/screenshot/url with scoped/baseline options, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, device list for iOS simulator inventory, and chat. For compact network requests output, prefer details.nextActions for request detail, route-mock diagnostics, actionable failed-request networkSourceLookup, filtering, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done; stream enable already-enabled outcomes are treated as idempotent success with status/disable follow-ups.",
+	"For non-core families, pass current upstream commands through the native tool directly: network route/requests/har (including request filters like --type/--method/--status), diff snapshot/screenshot/url with scoped/baseline options, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, device list for iOS simulator inventory, and chat. For compact network requests output, prefer details.nextActions for request detail, route-mock diagnostics, actionable failed-request networkSourceLookup, filtering, clearing the aggregate buffer before repro, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done; stream enable already-enabled outcomes are treated as idempotent success with status/disable follow-ups.",
 	"For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: \"tabs\" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close commands (`close`, `quit`, or `exit`) only close the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.",
 	"For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
-	"For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; when --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.",
+	"For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; dialog commands and eval snippets that look like alert/confirm/prompt/dialog triggers are shorter-bounded than normal browser calls, and timed-out dialog-like interactions may add inspect-dialog-after-timeout, dismiss-dialog-after-timeout, or recover-fresh-session-after-dialog-timeout nextActions. When --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.",
 	"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.27.1, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort, must stay below the wrapper IPC budget (wait 30000 is intentionally blocked), and a successful payload like \"waited\":\"timeout\" means elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
 	"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
 	"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
-	"For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.",
-	"On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, prefer scrollintoview <@ref> or target the actual scrollable region. For native selects, use select <selector> <value...> (or semanticAction/job select) instead of clicking option refs; for custom comboboxes, a click/semanticAction may only focus the field, so re-snapshot and fall back to type, press Enter/arrow keys, or visible option refs.",
+	"For downloads, prefer download <selector> <path> when an element click should save a file; simple loopback anchor downloads are saved to the requested path when the wrapper can resolve an HTTP(S) href. Do not rely on click alone when you need the downloaded file on disk.",
+	"On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, details.data.scrolled may be false/noMovement true and you should prefer scrollintoview <@ref> or target the actual scrollable region with scroll <selector> <dir> [px|percent]. For native selects, use select <selector> <value...> (or semanticAction/job select) instead of clicking option refs; for custom comboboxes, a click/semanticAction may only focus the field, so re-snapshot and fall back to type, press Enter/arrow keys, or visible option refs.",
 	"When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
-	"When using eval --stdin for extraction, pass the JavaScript through the native tool stdin field, not as an extra args token after --stdin, and return the value you want instead of relying on console.log as the primary result channel. Prefer plain expressions like ({ title: document.title }) or explicitly invoked functions like (() => ({ title: document.title }))(); if a function-shaped snippet returns {}, details.evalStdinHint may warn that the function was serialized instead of called. On file:// pages, when upstream JSON returns result: null for non-trivial stdin, details.evalResultWarning may append Eval result warning without failing the tool—treat that as inconclusive DOM verification. If get text on a CSS selector surfaces details.selectorTextVisibility or selectorTextVisibilityAll, prefer a visible @ref, a more specific selector, or the inspect-visible-text-candidates nextAction over hidden tab content.",
+	"When using eval --stdin for extraction, pass the JavaScript through the native tool stdin field, not as an extra args token after --stdin, and return the value you want instead of relying on console.log as the primary result channel. Prefer plain expressions like ({ title: document.title }) or explicitly invoked functions like (() => ({ title: document.title }))(); use outputPath when the eval/get/snapshot data should be saved as a durable local file. If a function-shaped snippet returns {}, details.evalStdinHint may warn that the function was serialized instead of called. On file:// pages, when upstream JSON returns result: null for non-trivial stdin, details.evalResultWarning may append Eval result warning without failing the tool—treat that as inconclusive DOM verification. If get text on a CSS selector surfaces details.selectorTextVisibility or selectorTextVisibilityAll, prefer a visible @ref, a more specific selector, or the inspect-visible-text-candidates nextAction over hidden tab content.",
 	"When details.pageChangeSummary is present, use changeType and summary as a compact signal for navigation, DOM mutation, confirmations, or artifacts; when nextActionIds is set, match those ids to entries in details.nextActions (or per-step nextActions inside batch) for concrete follow-up payloads instead of inferring from prose alone. If details.clickDispatch reports no trusted DOM event, refresh/inspect/retry the real click first; for static local fixtures only, an explicit eval --stdin programmatic .click() can exercise app handlers, but treat it as an untrusted scripted workaround and never use it to bypass stop-before-submit/order/purchase boundaries. If a no-navigation click surfaces details.overlayBlockers, inspect the fresh snapshot evidence before using a close/dismiss candidate nextAction; ordinary page chrome without dialog/alertdialog evidence should not trigger this diagnostic.",
 	"When commands save or spill files (screenshots, downloads, PDFs, traces, recordings, HAR, large snapshot spills), use the user's exact requested paths when given and treat paths as provisional until details.artifactVerification shows every row verified: branch on missingCount, pendingCount, unverifiedCount, per-entry state, and optional limitation before downstream file use or PASS/FAIL reporting.",
 	"For evidence-only screenshots, QA captures, or other audit artifacts, save to an explicit path and branch on details.artifactVerification plus details.artifacts before reporting PASS/FAIL; do not require vision review of inline image attachments unless the user asked for visual inspection.",
-	"Respect explicit user stop boundaries: if the user says to stop before order/post/purchase/submit, do not click that final action. If the wrapper returns details.promptGuard.reason=explicit-user-stop-boundary, gather evidence on the current page instead of retrying the blocked final action.",
+	"Respect explicit user stop boundaries yourself: if the user says to stop before order/post/purchase/submit, do not click that final action. The wrapper does not infer broad business intent from prompt text; details.promptGuard is reserved for concrete artifact-before-close checks.",
 	"Successful record stop needs ffmpeg on PATH; the wrapper may warn after record start when ffmpeg is missing.",
 	"Do not call --help or other exploratory inspection commands unless the user explicitly asks for them or debugging the browser integration is necessary.",
 ] as const;
@@ -72,10 +72,10 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
 export const TOOL_PROMPT_GUIDELINES_SUFFIX = [
 	"Prefer agent_browser over bash for opening sites, docs, clicking, filling, screenshots, eval, and batch workflows.",
 	"Do not fall back to osascript, AppleScript, or generic browser-driving bash commands when agent_browser can do the job.",
-	"Pass exact agent-browser CLI arguments in args when you are not using semanticAction, job, or qa, excluding the binary name and --json (the wrapper injects --json automatically).",
-	"Use stdin only for eval --stdin, batch, auth save --password-stdin, or wrapper-generated job/qa batches instead of shell heredocs or password args; other command/stdin combinations are rejected before launch.",
-	`Let the extension-managed session handle the common path unless you explicitly need a fresh launch for launch-scoped flags (${LAUNCH_SCOPED_FLAG_LABEL}).`,
-	"Use sessionMode=fresh when switching from an existing implicit session to a new profile/browser executable/debug/init-script/provider launch without inventing a fixed explicit session name; later auto calls will follow that new session.",
+	"Pass exact agent-browser CLI arguments in agent_browser args when you are not using semanticAction, job, or qa, excluding the binary name and --json (agent_browser injects --json automatically).",
+	"Use agent_browser stdin only for eval --stdin, batch, auth save --password-stdin, or wrapper-generated job/qa batches instead of shell heredocs or password args; other command/stdin combinations are rejected before launch.",
+	`Let the agent_browser extension-managed session handle the common path unless you explicitly need a fresh launch for launch-scoped flags (${LAUNCH_SCOPED_FLAG_LABEL}).`,
+	"Use agent_browser sessionMode=fresh when switching from an existing implicit session to a new profile/browser executable/debug/init-script/provider launch without inventing a fixed explicit session name; later auto calls will follow that new session.",
 ] as const;
 export const INSPECTION_TOOL_CALL_EXAMPLES = [
@@ -100,25 +100,25 @@ export function buildSharedBrowserPlaybookGuidelines(options: { includeWebSearch
 /** Tier A: always-on tool promptGuidelines (keep small; Tier B lives in SHARED_BROWSER_PLAYBOOK_GUIDELINES and docs). */
 export const RUNTIME_PROMPT_GUIDELINES = [
-	"Use exactly one input mode: args, semanticAction, job, qa, sourceLookup/networkSourceLookup, or electron. stdin only for batch/eval/auth or wrapper batch; electron rejects stdin. Do not pass --json in args; wrapper injects it.",
-	"Common flow: open, snapshot -i, use current @refs or semanticAction, then re-snapshot after navigation/scroll/rerender/DOM change. Batch same-snapshot forms unless they may submit/navigate/rerender. Respect explicit stop boundaries: stop before order/post/purchase/submit.",
-	"Use top-level sessionMode=fresh for launch-scoped flags; never put --session-mode in args. For signed-in/account-specific content, use requested/configured profiles, never assume --profile Default; on profile failures, run profiles/doctor and tell the user what to configure. Use --executable-path for configured Chromium. Profile content is model-visible.",
-	"For artifacts, save the exact user path and verify details.artifactVerification/details.artifacts before claiming success. If close is blocked by details.promptGuard, save the required artifact first. record stop needs ffmpeg; close does not delete saved files; waited:timeout is not proof.",
-	"When details.nextActions is present, prefer exact payloads over prose/guessed selectors. For dense snapshots, check Omitted high-value controls/details.data.highValueControlRefIds. For dashboards, verify scroll with screenshot/snapshot; if nothing moved, target the real scroll region.",
-	"For extraction, prefer get title/url/text/html/value/attr/count or eval --stdin with plain expression, not console.log. Batch three or more known refs/selectors (e.g. [[\"get\",\"text\",\"@e1\"],[\"get\",\"text\",\"@e2\"]]); selector visibility warnings → visible @refs/nextActions.",
+	"Use agent_browser with exactly one input mode: args, semanticAction, job, qa, sourceLookup/networkSourceLookup, or electron. stdin only for batch/eval/auth or wrapper batch; electron rejects stdin. Do not pass --json in args; agent_browser injects it.",
+	"For agent_browser, the common flow is open, snapshot -i, use current @refs or semanticAction, then re-snapshot after navigation/scroll/rerender/DOM change. Batch same-snapshot forms unless they may submit/navigate/rerender. Respect explicit stop boundaries: stop before order/post/purchase/submit.",
+	"Use agent_browser top-level sessionMode=fresh for launch-scoped flags; never put --session-mode in args. For signed-in/account-specific content, use requested/configured profiles, never assume --profile Default; on profile failures, run profiles/doctor and tell the user what to configure. Use --executable-path for configured Chromium. Profile content is model-visible.",
+	"For agent_browser artifacts, save the exact user path and verify details.artifactVerification/details.artifacts before claiming success. If close is blocked by details.promptGuard, save the required artifact first. record stop needs ffmpeg; close does not delete saved files; waited:timeout is not proof.",
+	"When agent_browser details.nextActions is present, prefer exact payloads over prose/guessed selectors. For dense snapshots, check Omitted high-value controls/details.data.highValueControlRefIds. For dashboards, verify scroll with screenshot/snapshot; if nothing moved, target the real scroll region.",
+	"For agent_browser extraction, prefer get title/url/text/html/value/attr/count or eval --stdin with plain expression, not console.log. Batch three or more known refs/selectors (e.g. [[\"get\",\"text\",\"@e1\"],[\"get\",\"text\",\"@e2\"]]); selector visibility warnings → visible @refs/nextActions.",
 ] as const;
 export function buildBrowserExecutablePathGuideline(executablePath: string | undefined): string | undefined {
 	if (!executablePath) return undefined;
-	return `Agent-browser config sets browser.executablePath to ${JSON.stringify(executablePath)}; for fresh browser launches that should use that Chromium-compatible executable, add --executable-path ${JSON.stringify(executablePath)} with sessionMode:fresh. The upstream profiles command still lists Chrome profiles only; for non-Chrome Chromium login state, ask the user for an explicit profile/user-data directory path or inspect local setup with profiles/doctor before recommending a profile value.`;
+	return `agent_browser config sets browser.executablePath to ${JSON.stringify(executablePath)}; for fresh browser launches that should use that Chromium-compatible executable, add --executable-path ${JSON.stringify(executablePath)} with sessionMode:fresh. The upstream profiles command still lists Chrome profiles only; for non-Chrome Chromium login state, ask the user for an explicit profile/user-data directory path or inspect local setup with profiles/doctor before recommending a profile value.`;
 }
 export function buildBrowserDefaultProfileGuideline(profile: { name: string; policy: "explicit-only" | "authenticated-only" | "always" } | undefined): string | undefined {
 	if (!profile || profile.policy === "explicit-only") return undefined;
 	if (profile.policy === "always") {
-		return `Agent-browser config sets browser.defaultProfile.name to ${JSON.stringify(profile.name)} with policy always; use --profile ${JSON.stringify(profile.name)} with sessionMode:fresh when a fresh browser launch should use the configured profile, and treat profile content as model-visible user data.`;
+		return `agent_browser config sets browser.defaultProfile.name to ${JSON.stringify(profile.name)} with policy always; use --profile ${JSON.stringify(profile.name)} with sessionMode:fresh when a fresh browser launch should use the configured profile, and treat profile content as model-visible user data.`;
 	}
-	return `Agent-browser config sets browser.defaultProfile.name to ${JSON.stringify(profile.name)}; for signed-in/account-specific browser tasks, start with --profile ${JSON.stringify(profile.name)} plus sessionMode:fresh unless the user asks for a different profile.`;
+	return `agent_browser config sets browser.defaultProfile.name to ${JSON.stringify(profile.name)}; for signed-in/account-specific browser tasks, start with --profile ${JSON.stringify(profile.name)} plus sessionMode:fresh unless the user asks for a different profile.`;
 }
 export function buildToolPromptGuidelines(options: {

package/extensions/agent-browser/lib/process.ts CHANGED Viewed

@@ -22,7 +22,7 @@ const AGENT_BROWSER_DEFAULT_TIMEOUT_ENV = "AGENT_BROWSER_DEFAULT_TIMEOUT";
 const PI_AGENT_BROWSER_PROCESS_TIMEOUT_ENV = "PI_AGENT_BROWSER_PROCESS_TIMEOUT_MS";
 const DEFAULT_AGENT_BROWSER_SOCKET_DIR_PREFIX = "/tmp/piab";
 export const SAFE_AGENT_BROWSER_OPERATION_TIMEOUT_MS = 25_000;
-const DEFAULT_AGENT_BROWSER_PROCESS_TIMEOUT_MS = 28_000;
+const DEFAULT_AGENT_BROWSER_PROCESS_TIMEOUT_MS = 35_000;
 /** Grace period after `exit` before resolving when `close` is delayed by inherited stdio handles. */
 const EXIT_STDIO_GRACE_MS = 100;
 const httpProxyEnvName = "http_proxy";

package/extensions/agent-browser/lib/prompt-policy.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /**
  * Purpose: Derive operator prompt constraints for browser-run preflight guards and legacy bash policy.
- * Responsibilities: Parse the latest user message into stop boundaries, requested artifact paths, and legacy bash allowance.
+ * Responsibilities: Parse the latest user message into requested artifact paths and legacy bash allowance.
  * Scope: Pure prompt-text policy; enforcement lives in orchestration prompt-guards and the extension entrypoint.
  */
@@ -10,14 +10,9 @@ export interface PromptRequestedArtifact {
 	required: boolean;
 }
-export interface PromptStopBoundary {
-	reason: "avoid-final-submit-action";
-}
 export interface PromptPolicy {
 	allowLegacyAgentBrowserBash: boolean;
 	requestedArtifacts: PromptRequestedArtifact[];
-	stopBoundary?: PromptStopBoundary;
 }
 const BROWSER_PROMPT_PATTERNS = [
@@ -36,19 +31,8 @@ const LEGACY_BASH_ALLOW_PATTERNS = [
 	/\bdebug(?:ging)?\b.*\b(?:agent[_ -]?browser|agent_browser|browser integration)\b/i,
 ];
-const STOP_BOUNDARY_PATTERNS = [
-	/\b(?:do\s+not|don't|dont|never)\s+(?:place|submit|complete|finish|finali[sz]e|confirm)\s+(?:the\s+)?(?:order|purchase|checkout|payment)\b/i,
-	/\b(?:do\s+not|don't|dont|never)\s+click\s+(?:the\s+)?(?:finish|submit|place\s+order|complete\s+order|confirm\s+order|buy\s+now|pay\s+now)\b/i,
-	/\bstop\s+(?:on|at|before)\b[^.\n]*(?:checkout\s+overview|finish|place\s+(?:the\s+)?order|submit\s+(?:the\s+)?order|complete\s+(?:the\s+)?order|purchase|payment)\b/i,
-	/\bwithout\s+(?:placing|submitting|completing|finishing|confirming)\s+(?:the\s+)?(?:order|purchase|payment)\b/i,
-];
 const PROMPT_ARTIFACT_PATH_PATTERN = /(?:^|[\s"'`(:])((?:\/[^\s"'`),;]+|[A-Za-z]:[\\/][^\s"'`),;]+|\.{1,2}[\\/][^\s"'`),;]+|[^\s"'`),;:\\/]+(?:[\\/][^\s"'`),;]+)+|[^\s"'`),;:\\/]+)\.(?:png|jpe?g|webp|gif|webm|mp4|har|pdf|trace|json))(?:[\s"'`),;.]|$)/gi;
-function buildPromptStopBoundary(prompt: string): PromptStopBoundary | undefined {
-	return STOP_BOUNDARY_PATTERNS.some((pattern) => pattern.test(prompt)) ? { reason: "avoid-final-submit-action" } : undefined;
-}
 function extractPromptRequestedArtifacts(prompt: string): PromptRequestedArtifact[] {
 	const artifacts: PromptRequestedArtifact[] = [];
 	const seen = new Set<string>();
@@ -81,7 +65,6 @@ export function buildPromptPolicy(prompt: string): PromptPolicy {
 	return {
 		allowLegacyAgentBrowserBash: LEGACY_BASH_ALLOW_PATTERNS.some((pattern) => pattern.test(prompt)),
 		requestedArtifacts: extractPromptRequestedArtifacts(prompt),
-		stopBoundary: buildPromptStopBoundary(prompt),
 	};
 }

package/extensions/agent-browser/lib/results/artifact-manifest.ts CHANGED Viewed

@@ -6,6 +6,7 @@
  * Invariants/Assumptions: Explicit-path artifacts are host-owned while persistent-session spill files are bounded by the manifest cap.
  */
+import { isRecord } from "../parsing.js";
 import type { SessionArtifactManifest, SessionArtifactManifestEntry } from "./contracts.js";
 export const SESSION_ARTIFACT_MANIFEST_VERSION = 1;
@@ -23,10 +24,6 @@ export function getSessionArtifactManifestMaxEntries(env: NodeJS.ProcessEnv = pr
 	return parsePositiveSafeInteger(env[SESSION_ARTIFACT_MANIFEST_MAX_ENTRIES_ENV]) ?? DEFAULT_SESSION_ARTIFACT_MANIFEST_MAX_ENTRIES;
 }
-function isRecord(value: unknown): value is Record<string, unknown> {
-	return typeof value === "object" && value !== null;
-}
 function isManifestEntry(value: unknown): value is SessionArtifactManifestEntry {
 	if (!isRecord(value)) return false;
 	if (typeof value.path !== "string" || value.path.trim().length === 0) return false;

package/extensions/agent-browser/lib/results/artifact-state.ts CHANGED Viewed

@@ -3,11 +3,15 @@
  * Responsibilities: Identify pending recording artifacts whose output is not durable until record stop completes.
  * Scope: Artifact predicates only; verification summaries, manifests, and user-facing formatting live in neighboring modules.
  * Usage: Imported by categories, action recommendations, and presentation to avoid divergent artifact-state rules.
- * Invariants/Assumptions: `record start` video artifacts are pending and should not be treated like verified saved files.
+ * Invariants/Assumptions: `record start` / `record restart` video artifacts are pending and should not be treated like verified saved files.
  */
-import type { FileArtifactMetadata } from "./contracts.js";
+import type { FileArtifactKind, FileArtifactMetadata } from "./contracts.js";
+export function isPendingRecordingCommand(command: string | undefined, subcommand: string | undefined, kind: FileArtifactKind | undefined): boolean {
+	return command === "record" && (subcommand === "start" || subcommand === "restart") && kind === "video";
+}
 export function isPendingRecordingArtifact(artifact: FileArtifactMetadata): boolean {
-	return artifact.command === "record" && artifact.subcommand === "start" && artifact.kind === "video";
+	return isPendingRecordingCommand(artifact.command, artifact.subcommand, artifact.kind);
 }

package/extensions/agent-browser/lib/results/contracts.ts CHANGED Viewed

@@ -64,7 +64,7 @@ export interface AgentBrowserPageChangeSummary {
 export type FileArtifactKind = "download" | "file" | "har" | "image" | "pdf" | "profile" | "trace" | "video";
-export type FileArtifactStatus = "missing" | "repaired-from-temp" | "saved" | "upstream-temp-only";
+export type FileArtifactStatus = "missing" | "pending" | "repaired-from-temp" | "saved" | "upstream-temp-only";
 export interface FileArtifactMetadata {
 	absolutePath: string;
@@ -76,12 +76,14 @@ export interface FileArtifactMetadata {
 	kind: FileArtifactKind;
 	mediaType?: string;
 	path: string;
+	recordingState?: "openRecording";
 	requestedPath?: string;
 	session?: string;
 	sizeBytes?: number;
 	status?: FileArtifactStatus;
 	subcommand?: string;
 	tempPath?: string;
+	willExistOnStop?: boolean;
 }
 export type ArtifactVerificationState = "missing" | "pending" | "unverified" | "verified";
@@ -94,11 +96,13 @@ export interface ArtifactVerificationEntry {
 	mediaType?: string;
 	path: string;
 	requestedPath?: string;
+	recordingState?: "openRecording";
 	retentionState?: ArtifactRetentionState;
 	sizeBytes?: number;
 	state: ArtifactVerificationState;
 	status?: FileArtifactStatus;
 	storageScope?: ArtifactStorageScope;
+	willExistOnStop?: boolean;
 }
 export interface ArtifactVerificationSummary {
@@ -229,7 +233,7 @@ export interface NetworkRouteRecord {
 export interface NetworkRouteDiagnostic {
 	mode: NetworkRouteRecord["mode"];
-	reason: "pending-routed-request" | "cors-likely-routed-request";
+	reason: "pending-routed-request" | "cors-likely-routed-request" | "unfulfilled-routed-request";
 	requestId?: string;
 	requestUrl?: string;
 	routePattern: string;

package/extensions/agent-browser/lib/results/envelope.ts CHANGED Viewed

@@ -84,6 +84,12 @@ export async function parseAgentBrowserEnvelope(options: string | { stdout: stri
 		if (typeof parsed.success !== "boolean") {
 			return { parseError: "agent-browser returned an invalid JSON envelope: success field must be boolean." };
 		}
+		if (!Object.hasOwn(parsed, "data")) {
+			const { success, error, ...topLevelData } = parsed;
+			if (Object.keys(topLevelData).length > 0) {
+				return { envelope: { error, success, data: topLevelData } as AgentBrowserEnvelope };
+			}
+		}
 		return { envelope: parsed as AgentBrowserEnvelope };
 	} catch (error) {
 		const message = error instanceof Error ? error.message : String(error);
@@ -119,9 +125,12 @@ function buildExitCodeFallback(options: { command?: string; effectiveArgs?: stri
 function buildWatchdogTimeoutMessage(options: { timeoutMs?: number }): string {
 	const timeoutText = options.timeoutMs === undefined ? "the wrapper watchdog" : `the ${options.timeoutMs}ms wrapper watchdog`;
+	const ipcTiming = options.timeoutMs !== undefined && options.timeoutMs <= 30_000
+		? "before the upstream CLI entered its 30s IPC retry path"
+		: "after waiting beyond the upstream CLI's 30s IPC retry window";
 	return [
-		`agent-browser exceeded ${timeoutText} and was stopped before the upstream CLI entered its 30s IPC retry path.`,
-		"Keep a single agent-browser command under 30 seconds; split long waits into shorter waits or retry with sessionMode: \"fresh\" if the session state looks stale.",
+		`agent-browser exceeded ${timeoutText} and was stopped ${ipcTiming}.`,
+		"Prefer a condition wait or split long work into shorter calls; for legitimately long opens or captures, pass agent_browser timeoutMs with a bounded higher value and inspect details.timeoutPartialProgress before retrying.",
 	].join(" ");
 }

package/extensions/agent-browser/lib/results/network-routes.ts CHANGED Viewed

@@ -23,11 +23,12 @@ function getSafeRequestId(item: Record<string, unknown>): string | undefined {
 	return requestId;
 }
-function getRouteDiagnosticReason(item: Record<string, unknown>): NetworkRouteDiagnostic["reason"] | undefined {
+function getRouteDiagnosticReason(item: Record<string, unknown>, route: NetworkRouteRecord): NetworkRouteDiagnostic["reason"] | undefined {
 	const statusMissing = typeof item.status !== "number";
 	const error = getStringRecordField(item, "error") ?? getStringRecordField(item, "failureText") ?? getStringRecordField(item, "errorText");
 	if (error && /(?:cors|cross-origin|preflight|access-control-allow-origin)/i.test(error)) return "cors-likely-routed-request";
 	if (statusMissing && isApiLikeNetworkRequest(item)) return "pending-routed-request";
+	if (route.mode !== "abort" && ((typeof item.status === "number" && item.status >= 400) || item.failed === true || typeof error === "string")) return "unfulfilled-routed-request";
 	return undefined;
 }
@@ -58,10 +59,10 @@ export function buildNetworkRouteDiagnostics(data: unknown, routes: NetworkRoute
 		if (!isRecord(item)) continue;
 		const url = getStringRecordField(item, "url");
 		if (!url) continue;
-		const reason = getRouteDiagnosticReason(item);
-		if (!reason) continue;
 		const route = routes.find((candidate) => networkRoutePatternMatchesUrl(candidate.pattern, url));
 		if (!route) continue;
+		const reason = getRouteDiagnosticReason(item, route);
+		if (!reason) continue;
 		const requestId = getSafeRequestId(item);
 		const requestUrl = redactSensitiveText(url);
 		const routePattern = redactSensitiveText(route.pattern);
@@ -73,7 +74,9 @@ export function buildNetworkRouteDiagnostics(data: unknown, routes: NetworkRoute
 			routePattern,
 			summary: reason === "cors-likely-routed-request"
 				? `Routed request ${requestId ?? requestUrl} looks CORS/preflight-related for route ${routePattern}.`
-				: `Routed request ${requestId ?? requestUrl} is still pending/no-status for route ${routePattern}.`,
+				: reason === "unfulfilled-routed-request"
+					? `Routed request ${requestId ?? requestUrl} failed instead of returning the configured route ${routePattern}.`
+					: `Routed request ${requestId ?? requestUrl} is still pending/no-status for route ${routePattern}.`,
 		});
 	}
 	return diagnostics.length > 0 ? diagnostics.slice(0, 5) : undefined;

package/extensions/agent-browser/lib/results/network.ts CHANGED Viewed

@@ -28,6 +28,12 @@ function isFailedNetworkRequest(request: Record<string, unknown>): boolean {
 	return (typeof request.status === "number" && request.status >= 400) || request.failed === true || typeof request.error === "string";
 }
+export function isNetworkArtifactNoiseRequest(request: Record<string, unknown>): boolean {
+	const url = getStringRecordField(request, "url") ?? "";
+	const resourceType = (getStringRecordField(request, "resourceType") ?? getStringRecordField(request, "mimeType") ?? "").toLowerCase();
+	return /^data:image\//i.test(url) || (url.startsWith("data:") && resourceType.includes("image"));
+}
 function isBenignAssetFailure(request: Record<string, unknown>, url: string | undefined, resourceType: string | undefined): boolean {
 	const path = getNetworkRequestUrlPath(url);
 	if (!path) return false;
@@ -58,7 +64,7 @@ export function classifyNetworkRequestFailure(request: Record<string, unknown>):
 export function summarizeNetworkFailures(requests: unknown[]): NetworkFailureSummary {
 	const failures = requests.flatMap((request) => {
-		if (!isRecord(request)) return [];
+		if (!isRecord(request) || isNetworkArtifactNoiseRequest(request)) return [];
 		const classification = classifyNetworkRequestFailure(request);
 		return classification ? [classification] : [];
 	});