pi-agent-browser-native 0.2.34 → 0.2.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +14 -14
  3. package/docs/ARCHITECTURE.md +19 -13
  4. package/docs/COMMAND_REFERENCE.md +257 -42
  5. package/docs/ELECTRON.md +3 -3
  6. package/docs/RELEASE.md +11 -11
  7. package/docs/REQUIREMENTS.md +5 -5
  8. package/docs/SUPPORT_MATRIX.md +23 -21
  9. package/docs/TOOL_CONTRACT.md +38 -27
  10. package/extensions/agent-browser/index.ts +518 -2402
  11. package/extensions/agent-browser/lib/argv-descriptor.ts +90 -0
  12. package/extensions/agent-browser/lib/argv-grammar.ts +128 -0
  13. package/extensions/agent-browser/lib/command-policy.ts +71 -0
  14. package/extensions/agent-browser/lib/command-taxonomy.ts +336 -0
  15. package/extensions/agent-browser/lib/electron/cleanup.ts +1 -0
  16. package/extensions/agent-browser/lib/executable-path.ts +19 -0
  17. package/extensions/agent-browser/lib/input-modes/params.ts +6 -6
  18. package/extensions/agent-browser/lib/orchestration/batch-stdin.ts +65 -0
  19. package/extensions/agent-browser/lib/orchestration/browser-run/browser-action-model.ts +154 -0
  20. package/extensions/agent-browser/lib/orchestration/browser-run/click-dispatch.ts +149 -0
  21. package/extensions/agent-browser/lib/orchestration/browser-run/diagnostics.ts +10 -28
  22. package/extensions/agent-browser/lib/orchestration/browser-run/final-result.ts +6 -2
  23. package/extensions/agent-browser/lib/orchestration/browser-run/index.ts +33 -27
  24. package/extensions/agent-browser/lib/orchestration/browser-run/prepare.ts +48 -22
  25. package/extensions/agent-browser/lib/orchestration/browser-run/process-output.ts +33 -10
  26. package/extensions/agent-browser/lib/orchestration/browser-run/prompt-guards.ts +93 -0
  27. package/extensions/agent-browser/lib/orchestration/browser-run/session-state.ts +19 -123
  28. package/extensions/agent-browser/lib/orchestration/browser-run/types.ts +26 -1
  29. package/extensions/agent-browser/lib/orchestration/electron-host/index.ts +860 -0
  30. package/extensions/agent-browser/lib/playbook.ts +9 -9
  31. package/extensions/agent-browser/lib/prompt-policy.ts +122 -0
  32. package/extensions/agent-browser/lib/results/action-recommendations.ts +3 -23
  33. package/extensions/agent-browser/lib/results/presentation/navigation.ts +2 -34
  34. package/extensions/agent-browser/lib/runtime.ts +93 -227
  35. package/extensions/agent-browser/lib/session-page-state.ts +31 -14
  36. package/extensions/agent-browser/lib/temp.ts +148 -23
  37. package/package.json +4 -4
  38. package/scripts/agent-browser-capability-baseline.mjs +198 -1
@@ -24,8 +24,8 @@ export const QUICK_START_GUIDELINES = [
24
24
  "Locator-first clicks/fills and native select changes without hand-building argv: { semanticAction: { action: \"click\", locator: \"text\", value: \"Close\" } }, { semanticAction: { action: \"fill\", locator: \"label\", value: \"Email\", text: \"user@example.com\" } }, or { semanticAction: { action: \"select\", selector: \"#flavor\", value: \"chocolate\" } }; add semanticAction.session when targeting a named upstream browser session; details.compiledSemanticAction shows the semantic target, while details.effectiveArgs may show a resolved current @ref for active-session role/name click/check/uncheck actions to avoid hidden duplicate matches; selector-not-found failures may append bounded click try-*-candidate next actions or, for fill misses with current editable refs, details.richInputRecovery with focus/click actions that do not copy fill text; stale-ref failures can return retry-semantic-action-after-stale-ref for compiled find actions when retry safety is provable.",
25
25
  `Common advanced calls: { args: ["batch"], stdin: "[[\"open\",\"https://example.com\"],[\"snapshot\",\"-i\"]]" }, { job: { steps: [{ action: "open", url: "https://example.com" }, { action: "assertText", text: "Example Domain" }, { action: "screenshot", path: ".dogfood/example.png" }] } }, { qa: { url: "https://example.com", expectedText: "Example Domain", screenshotPath: ".dogfood/qa-example.png" } } (example.com smoke only; elsewhere match exact visible text from snapshot -i), { electron: { action: "list", query: "code" } }, { electron: { action: "launch", appName: "Visual Studio Code", handoff: "snapshot" } }, { electron: { action: "probe" } }, { qa: { attached: true, expectedText: "Explorer" } }, { args: ["eval", "--stdin"], stdin: "document.title" }, { args: ["auth", "save", "name", "--password-stdin"], stdin: "<password from user-approved secret source>" }, { args: ["--profile", "Default", "open", "https://example.com/account"], sessionMode: "fresh" }, and { args: ["open", "--enable", "react-devtools", "https://example.com"], sessionMode: "fresh" }. For app pages with a native dropdown, job steps can include { action: "select", selector: "#flavor", value: "chocolate" } before the dependent assertion.`,
26
26
  "Constrained job navigation is explicit only: click (and select/submit flows that may navigate) does not prove the next page loaded; add assertUrl and/or assertText after navigation-prone steps before screenshot or later interactions. Example: { job: { steps: [{ action: \"open\", url: \"https://shop.example/checkout\" }, { action: \"fill\", selector: \"#email\", text: \"user@example.com\" }, { action: \"click\", selector: \"#continue\" }, { action: \"assertUrl\", url: \"**/shipping\" }, { action: \"assertText\", text: \"Shipping address\" }, { action: \"screenshot\", path: \".dogfood/shipping.png\" }] } }. Top-level click may add navigationSummary hints, but job never auto-inserts post-click asserts.",
27
- "High-value command reference: select <selector> <value...> changes native dropdown values; download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation.",
28
- "For artifact-producing commands, read the visible artifact block and details.artifactVerification before using files: check requested path, absolute path, existence, size bytes, artifact kind, optional mediaType, status, optional limitation, and verified/missing/pending/unverified counts. details.artifacts contains per-file metadata. Browser close does not delete explicit saved files; if close reports details.artifactCleanup, use host file tools to remove paths listed in explicitArtifactPaths (when non-empty) after inspection. For annotated screenshots inside batch, put --annotate in top-level args (for example { args: [\"--annotate\", \"batch\"], stdin: \"[[\\\"screenshot\\\",\\\"/tmp/page.png\\\"]]\" }) rather than inside the screenshot step.",
27
+ "High-value command reference: click <selector> --new-tab opens link-like targets in a new tab; select <selector> <value...> changes native dropdown values; scroll <dir> [px] --selector <sel> targets nested scrollers; download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [selector] [path] captures a page or element image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation; tap <selector> and swipe <direction> [distance] support iOS/provider touch flows.",
28
+ "For artifact-producing commands, read the visible artifact block and details.artifactVerification before using files: check requested path, absolute path, existence, size bytes, artifact kind, optional mediaType, status, optional limitation, and verified/missing/pending/unverified counts. details.artifacts contains per-file metadata. Browser close does not delete explicit saved files; if close reports details.artifactCleanup, use host file tools to remove paths listed in explicitArtifactPaths (when non-empty) after inspection. If close fails with details.promptGuard.reason=requested-artifacts-missing-before-close, save the exact required artifact path before closing. For annotated screenshots inside batch, put --annotate in top-level args (for example { args: [\"--annotate\", \"batch\"], stdin: \"[[\\\"screenshot\\\",\\\"/tmp/page.png\\\"]]\" }) rather than inside the screenshot step.",
29
29
  "When details.nextActions is present, prefer those exact native agent_browser follow-up payloads over prose guidance; they may include args, stdin, sessionMode, networkSourceLookup, safety notes, or artifactPath for saved files.",
30
30
  ] as const;
31
31
 
@@ -45,13 +45,13 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
45
45
  "If you already used the implicit session and now need launch-scoped flags like --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device, retry with sessionMode set to fresh or pass an explicit --session for the new launch. After a successful unnamed fresh launch, later auto calls follow that new session.",
46
46
  "For React introspection, launch the page with --enable react-devtools before first navigation, then use react tree, react inspect <fiberId>, sourceLookup candidates for local UI source hints, react renders start/stop, or react suspense; sourceLookup is experimental and reports confidence/evidence instead of guaranteed DOM-to-file mappings. For failed fetches and APIs, networkSourceLookup (experimental) correlates failed network requests with initiator metadata and bounded workspace URL literals—candidates only, not definitive blame. Use vitals [url] for Core Web Vitals and hydration timing, and pushstate <url> for client-side SPA navigation.",
47
47
  "For first-navigation setup, use open without a URL plus network route --resource-type <csv>, cookies set --curl <file>, or --init-script/--enable before navigate/opening the target page.",
48
- "For stateful browser context work, prefer purpose-specific page actions before dumping browser data: use auth save --password-stdin with the tool stdin field for credentials, state save/load for portable test state, cookies get/set/clear and storage local|session only when the task needs those values, and expect cookie/storage/auth/state summaries to redact credential-like fields.",
48
+ "For stateful browser context work, prefer purpose-specific page actions before dumping browser data: use auth save --password-stdin with the tool stdin field for credentials, auth list/show/delete/remove for local auth-profile maintenance, auth login when you need the browser to fill a saved profile, state save/load for portable test state, state list/show/rename/clear/clear -a/clean for saved-state lifecycle cleanup, cookies get/set/clear and storage local|session only when the task needs those values, and expect cookie/storage/auth/state summaries to redact credential-like fields.",
49
49
  "For batch chains that touch cookies, storage, auth, or other secret-bearing commands, use details.batchSteps for per-step artifacts, categories, spill paths, and full structured errors; top-level details.data on batch is only a compact redacted step matrix (success, argv-redacted command, redacted result or scrubbed error text) built from the same presentation rules as standalone calls.",
50
- "For non-core families, pass current upstream commands through the native tool directly: network route/requests/har, diff snapshot/screenshot/url, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, and chat. For compact network requests output, prefer details.nextActions for request detail, actionable failed-request networkSourceLookup, filtering, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done.",
51
- "For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: \"tabs\" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close only closes the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.",
52
- "For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
50
+ "For non-core families, pass current upstream commands through the native tool directly: network route/requests/har (including request filters like --type/--method/--status), diff snapshot/screenshot/url with scoped/baseline options, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, device list for iOS simulator inventory, and chat. For compact network requests output, prefer details.nextActions for request detail, actionable failed-request networkSourceLookup, filtering, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done.",
51
+ "For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: \"tabs\" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close commands (`close`, `quit`, or `exit`) only close the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.",
52
+ "For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
53
53
  "For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; when --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.",
54
- "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort, must stay below the wrapper IPC budget (wait 30000 is intentionally blocked), and a successful payload like \"waited\":\"timeout\" means elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
54
+ "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.27.0, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort, must stay below the wrapper IPC budget (wait 30000 is intentionally blocked), and a successful payload like \"waited\":\"timeout\" means elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
55
55
  "For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
56
56
  "For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
57
57
  "For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.",
@@ -61,7 +61,7 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
61
61
  "When details.pageChangeSummary is present, use changeType and summary as a compact signal for navigation, DOM mutation, confirmations, or artifacts; when nextActionIds is set, match those ids to entries in details.nextActions (or per-step nextActions inside batch) for concrete follow-up payloads instead of inferring from prose alone. If a no-navigation click surfaces details.overlayBlockers, inspect the fresh snapshot evidence before using a close/dismiss candidate nextAction; ordinary page chrome without dialog/alertdialog evidence should not trigger this diagnostic.",
62
62
  "When commands save or spill files (screenshots, downloads, PDFs, traces, recordings, HAR, large snapshot spills), use the user's exact requested paths when given and treat paths as provisional until details.artifactVerification shows every row verified: branch on missingCount, pendingCount, unverifiedCount, per-entry state, and optional limitation before downstream file use or PASS/FAIL reporting.",
63
63
  "For evidence-only screenshots, QA captures, or other audit artifacts, save to an explicit path and branch on details.artifactVerification plus details.artifacts before reporting PASS/FAIL; do not require vision review of inline image attachments unless the user asked for visual inspection.",
64
- "Respect explicit user stop boundaries: if the user says to stop before order/post/purchase/submit, do not click that final action.",
64
+ "Respect explicit user stop boundaries: if the user says to stop before order/post/purchase/submit, do not click that final action. If the wrapper returns details.promptGuard.reason=explicit-user-stop-boundary, gather evidence on the current page instead of retrying the blocked final action.",
65
65
  "Successful record stop needs ffmpeg on PATH; the wrapper may warn after record start when ffmpeg is missing.",
66
66
  "Do not call --help or other exploratory inspection commands unless the user explicitly asks for them or debugging the browser integration is necessary.",
67
67
  ] as const;
@@ -100,7 +100,7 @@ export const RUNTIME_PROMPT_GUIDELINES = [
100
100
  "Use exactly one input mode: args (open→snapshot -i→@refs), semanticAction, job, qa, sourceLookup/networkSourceLookup (candidate hints), or electron. stdin only for batch/eval/auth or wrapper batch; electron rejects stdin. Do not pass --json in args; wrapper injects it.",
101
101
  "Common flow: open, snapshot -i, use current @refs or semanticAction, then re-snapshot after navigation/scroll/rerender/DOM change. Batch same-snapshot fills unless they may submit/navigate/rerender. Respect explicit stop boundaries: if the user says stop before order/post/purchase/submit, do not click the final action.",
102
102
  "Use sessionMode=fresh for launch-scoped flags on an active implicit session. For signed-in/account-specific content, start with --profile Default plus sessionMode=fresh unless asked otherwise; visible content is model-visible.",
103
- "For artifacts, save the exact user path and check details.artifactVerification/details.artifacts before claiming success. record stop needs ffmpeg on PATH; close does not delete saved files; \"waited\":\"timeout\" is not proof.",
103
+ "For artifacts, save the exact user path and check details.artifactVerification/details.artifacts before claiming success. If close is blocked by details.promptGuard, save the required artifact first. record stop needs ffmpeg on PATH; close does not delete saved files; \"waited\":\"timeout\" is not proof.",
104
104
  "When details.nextActions is present, prefer exact payloads over prose/guessed selectors. For dense snapshots, check Omitted high-value controls/details.data.highValueControlRefIds. For dashboards, verify scroll with screenshot/snapshot; if nothing moved, target the real scroll region.",
105
105
  "For extraction, prefer get title/url/text/html/value/attr/count or eval --stdin with plain expression, not console.log. Batch three or more known refs/selectors (e.g. [[\"get\",\"text\",\"@e1\"],[\"get\",\"text\",\"@e2\"]]); selector visibility warnings → visible @refs/nextActions.",
106
106
  ] as const;
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Purpose: Derive operator prompt constraints for browser-run preflight guards and legacy bash policy.
3
+ * Responsibilities: Parse the latest user message into stop boundaries, requested artifact paths, and legacy bash allowance.
4
+ * Scope: Pure prompt-text policy; enforcement lives in orchestration prompt-guards and the extension entrypoint.
5
+ */
6
+
7
+ export interface PromptRequestedArtifact {
8
+ kind: "recording" | "screenshot";
9
+ path: string;
10
+ required: boolean;
11
+ }
12
+
13
+ export interface PromptStopBoundary {
14
+ reason: "avoid-final-submit-action";
15
+ }
16
+
17
+ export interface PromptPolicy {
18
+ allowLegacyAgentBrowserBash: boolean;
19
+ requestedArtifacts: PromptRequestedArtifact[];
20
+ stopBoundary?: PromptStopBoundary;
21
+ }
22
+
23
+ const BROWSER_PROMPT_PATTERNS = [
24
+ /\b(?:agent[_ -]?browser|browser automation|eval\s+--stdin|screenshot|snapshot|tab\s+list)\b/i,
25
+ /\b(?:react\s+(?:tree|inspect|renders|suspense)|web\s+vitals|core\s+web\s+vitals|pushstate)\b/i,
26
+ /\b(?:live\s+docs?|online\s+research|research\s+(?:online|the\s+web)|search\s+(?:online|the\s+web)|web\s+research)\b/i,
27
+ /\bbrowser\b.*\b(?:automation|click|fill|navigate|open|page|screenshot|site|snapshot|tab|url|visit|web(?:site| page)?)\b/i,
28
+ /\b(?:browse|click|fill|login|navigate|open|visit)\b.*\b(?:https?:\/\/\S+|page|site|tab|url|web(?:site| page)?)\b/i,
29
+ ];
30
+
31
+ const LEGACY_BASH_ALLOW_PATTERNS = [
32
+ /\b(?:bash-oriented workflow|bash workflow)\b/i,
33
+ /\b(?:use|via|through|with)\s+bash\b/i,
34
+ /\bnpx\s+agent-browser\b/i,
35
+ /\bagent-browser\s+--(?:help|version)\b/i,
36
+ /\bdebug(?:ging)?\b.*\b(?:agent[_ -]?browser|agent_browser|browser integration)\b/i,
37
+ ];
38
+
39
+ const STOP_BOUNDARY_PATTERNS = [
40
+ /\b(?:do\s+not|don't|dont|never)\s+(?:place|submit|complete|finish|finali[sz]e|confirm)\s+(?:the\s+)?(?:order|purchase|checkout|payment)\b/i,
41
+ /\b(?:do\s+not|don't|dont|never)\s+click\s+(?:the\s+)?(?:finish|submit|place\s+order|complete\s+order|confirm\s+order|buy\s+now|pay\s+now)\b/i,
42
+ /\bstop\s+(?:on|at|before)\b[^.\n]*(?:checkout\s+overview|finish|place\s+(?:the\s+)?order|submit\s+(?:the\s+)?order|complete\s+(?:the\s+)?order|purchase|payment)\b/i,
43
+ /\bwithout\s+(?:placing|submitting|completing|finishing|confirming)\s+(?:the\s+)?(?:order|purchase|payment)\b/i,
44
+ ];
45
+
46
+ const PROMPT_ARTIFACT_PATH_PATTERN = /(?:^|[\s"'`(:])((?:\/[^\s"'`),;]+|[A-Za-z]:[\\/][^\s"'`),;]+|\.{1,2}[\\/][^\s"'`),;]+|[^\s"'`),;:\\/]+(?:[\\/][^\s"'`),;]+)+|[^\s"'`),;:\\/]+)\.(?:png|jpe?g|webp|gif|webm|mp4|har|pdf|trace|json))(?:[\s"'`),;.]|$)/gi;
47
+
48
+ function buildPromptStopBoundary(prompt: string): PromptStopBoundary | undefined {
49
+ return STOP_BOUNDARY_PATTERNS.some((pattern) => pattern.test(prompt)) ? { reason: "avoid-final-submit-action" } : undefined;
50
+ }
51
+
52
+ function extractPromptRequestedArtifacts(prompt: string): PromptRequestedArtifact[] {
53
+ const artifacts: PromptRequestedArtifact[] = [];
54
+ const seen = new Set<string>();
55
+ for (const line of prompt.split(/\r?\n/)) {
56
+ const lowerLine = line.toLowerCase();
57
+ const kind = lowerLine.includes("screenshot")
58
+ ? "screenshot"
59
+ : /\b(?:screen\s+recording|recording|webm|video)\b/.test(lowerLine)
60
+ ? "recording"
61
+ : undefined;
62
+ if (!kind) continue;
63
+ PROMPT_ARTIFACT_PATH_PATTERN.lastIndex = 0;
64
+ for (const match of line.matchAll(PROMPT_ARTIFACT_PATH_PATTERN)) {
65
+ const path = match[1]?.trim();
66
+ if (!path) continue;
67
+ const key = `${kind}:${path}`;
68
+ if (seen.has(key)) continue;
69
+ seen.add(key);
70
+ artifacts.push({
71
+ kind,
72
+ path,
73
+ required: kind === "screenshot" || !/\b(?:if|when)\s+(?:recording\s+)?(?:is\s+)?available\b/i.test(line),
74
+ });
75
+ }
76
+ }
77
+ return artifacts;
78
+ }
79
+
80
+ export function buildPromptPolicy(prompt: string): PromptPolicy {
81
+ return {
82
+ allowLegacyAgentBrowserBash: LEGACY_BASH_ALLOW_PATTERNS.some((pattern) => pattern.test(prompt)),
83
+ requestedArtifacts: extractPromptRequestedArtifacts(prompt),
84
+ stopBoundary: buildPromptStopBoundary(prompt),
85
+ };
86
+ }
87
+
88
+ function getMessageText(content: unknown): string {
89
+ if (typeof content === "string") return content;
90
+ if (!Array.isArray(content)) return "";
91
+
92
+ return content
93
+ .map((item) => {
94
+ if (typeof item !== "object" || item === null) return "";
95
+ return item.type === "text" && typeof item.text === "string" ? item.text : "";
96
+ })
97
+ .filter((text) => text.length > 0)
98
+ .join("\n");
99
+ }
100
+
101
+ export function shouldAppendBrowserSystemPrompt(prompt: string): boolean {
102
+ const normalizedPrompt = prompt.trim();
103
+ if (normalizedPrompt.length === 0) {
104
+ return false;
105
+ }
106
+ return BROWSER_PROMPT_PATTERNS.some((pattern) => pattern.test(normalizedPrompt));
107
+ }
108
+
109
+ export function getLatestUserPrompt(branch: unknown[]): string {
110
+ for (let index = branch.length - 1; index >= 0; index -= 1) {
111
+ const entry = branch[index];
112
+ if (typeof entry !== "object" || entry === null || !("type" in entry) || entry.type !== "message") {
113
+ continue;
114
+ }
115
+ const message = "message" in entry ? entry.message : undefined;
116
+ if (typeof message !== "object" || message === null || !("role" in message) || message.role !== "user") {
117
+ continue;
118
+ }
119
+ return getMessageText("content" in message ? message.content : undefined);
120
+ }
121
+ return "";
122
+ }
@@ -6,6 +6,7 @@
6
6
  * Invariants/Assumptions: Action ids are public machine-readable contracts; preserve first-observed order.
7
7
  */
8
8
 
9
+ import { isOpenNavigationCommand, isPageMutationCommand } from "../command-taxonomy.js";
9
10
  import { isPendingRecordingArtifact } from "./artifact-state.js";
10
11
  import type {
11
12
  AgentBrowserFailureCategory,
@@ -56,27 +57,6 @@ function buildElectronToolAction(options: {
56
57
  };
57
58
  }
58
59
 
59
- const MUTATING_COMMANDS = new Set([
60
- "back",
61
- "check",
62
- "click",
63
- "dblclick",
64
- "dialog",
65
- "fill",
66
- "forward",
67
- "hover",
68
- "press",
69
- "pushstate",
70
- "reload",
71
- "scroll",
72
- "scrollintoview",
73
- "select",
74
- "swipe",
75
- "tap",
76
- "type",
77
- "uncheck",
78
- ]);
79
-
80
60
  function getDownloadRetryPath(args: string[] | undefined, fallback: string | undefined): string | undefined {
81
61
  if (fallback) return fallback;
82
62
  if (!args || args.length === 0) return undefined;
@@ -170,13 +150,13 @@ export function buildAgentBrowserNextActions(options: {
170
150
  }
171
151
  }
172
152
  if (options.resultCategory === "success") {
173
- if (options.command === "open") {
153
+ if (isOpenNavigationCommand(options.command)) {
174
154
  actions.push(buildNextToolAction({
175
155
  args: ["snapshot", "-i"],
176
156
  id: "inspect-opened-page",
177
157
  reason: "Inspect the opened page before choosing interactive refs.",
178
158
  }));
179
- } else if (options.command && MUTATING_COMMANDS.has(options.command)) {
159
+ } else if (isPageMutationCommand(options.command)) {
180
160
  actions.push(buildNextToolAction({
181
161
  args: ["snapshot", "-i"],
182
162
  id: "inspect-after-mutation",
@@ -4,41 +4,13 @@
4
4
  * Scope: Navigation and get/eval extraction formatting only.
5
5
  */
6
6
 
7
+ import { isNavigationObservableCommandName, isPageChangeSummaryCommand } from "../../command-taxonomy.js";
7
8
  import { isRecord } from "../../parsing.js";
8
9
  import type { CommandInfo } from "../../runtime.js";
9
10
  import { detectConfirmationRequired } from "../confirmation.js";
10
11
  import type { AgentBrowserPageChangeSummary, FileArtifactMetadata } from "../contracts.js";
11
12
  import { redactModelFacingText, stringifyModelFacing } from "./common.js";
12
13
 
13
- const NAVIGATION_SUMMARY_COMMANDS = new Set(["back", "click", "dblclick", "forward", "reload"]);
14
-
15
- const PAGE_CHANGE_SUMMARY_COMMANDS = new Set([
16
- "back",
17
- "check",
18
- "click",
19
- "dblclick",
20
- "dialog",
21
- "download",
22
- "fill",
23
- "forward",
24
- "goto",
25
- "hover",
26
- "navigate",
27
- "open",
28
- "pdf",
29
- "press",
30
- "pushstate",
31
- "reload",
32
- "screenshot",
33
- "scroll",
34
- "scrollintoview",
35
- "select",
36
- "swipe",
37
- "tap",
38
- "type",
39
- "uncheck",
40
- ]);
41
-
42
14
  const NAVIGATION_SUMMARY_FIELD = "navigationSummary";
43
15
 
44
16
  interface NavigationSummary {
@@ -108,7 +80,7 @@ export function formatExtractionText(commandInfo: CommandInfo, data: Record<stri
108
80
  }
109
81
 
110
82
  export function isNavigationObservableCommand(command: string | undefined): boolean {
111
- return command !== undefined && NAVIGATION_SUMMARY_COMMANDS.has(command);
83
+ return isNavigationObservableCommandName(command);
112
84
  }
113
85
 
114
86
  function isNavigationSummary(value: unknown): value is NavigationSummary {
@@ -142,10 +114,6 @@ export function formatNavigationSummary(summary: NavigationSummary): string | un
142
114
  return normalized.title ?? normalized.url;
143
115
  }
144
116
 
145
- function isPageChangeSummaryCommand(command: string | undefined): boolean {
146
- return command !== undefined && PAGE_CHANGE_SUMMARY_COMMANDS.has(command);
147
- }
148
-
149
117
  export function buildPageChangeSummary(options: {
150
118
  artifacts?: FileArtifactMetadata[];
151
119
  commandInfo: CommandInfo;