pi-agent-browser-native 0.2.41 → 0.2.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -3
- package/README.md +9 -9
- package/docs/ARCHITECTURE.md +1 -1
- package/docs/COMMAND_REFERENCE.md +10 -11
- package/docs/SUPPORT_MATRIX.md +6 -5
- package/docs/TOOL_CONTRACT.md +27 -24
- package/extensions/agent-browser/index.ts +71 -2
- package/extensions/agent-browser/lib/input-modes/params.ts +1 -1
- package/extensions/agent-browser/lib/input-modes/types.ts +1 -1
- package/extensions/agent-browser/lib/navigation-policy.ts +95 -0
- package/extensions/agent-browser/lib/orchestration/browser-run/diagnostics.ts +2 -7
- package/extensions/agent-browser/lib/orchestration/browser-run/final-result.ts +1 -0
- package/extensions/agent-browser/lib/orchestration/browser-run/prepare.ts +2 -2
- package/extensions/agent-browser/lib/orchestration/browser-run/process-output.ts +103 -12
- package/extensions/agent-browser/lib/orchestration/browser-run/session-state.ts +20 -3
- package/extensions/agent-browser/lib/orchestration/browser-run/types.ts +6 -1
- package/extensions/agent-browser/lib/playbook.ts +4 -4
- package/extensions/agent-browser/lib/results/action-recommendations.ts +15 -0
- package/extensions/agent-browser/lib/results/contracts.ts +17 -0
- package/extensions/agent-browser/lib/results/network-routes.ts +80 -0
- package/extensions/agent-browser/lib/results/network.ts +10 -2
- package/extensions/agent-browser/lib/results/presentation/artifacts.ts +14 -0
- package/extensions/agent-browser/lib/results/presentation/batch.ts +36 -13
- package/extensions/agent-browser/lib/results/presentation/diagnostics.ts +154 -16
- package/extensions/agent-browser/lib/results/presentation/errors.ts +62 -2
- package/extensions/agent-browser/lib/results/presentation/semantic-action.ts +2 -4
- package/extensions/agent-browser/lib/results/presentation.ts +31 -1
- package/extensions/agent-browser/lib/results/selector-recovery.ts +11 -3
- package/extensions/agent-browser/lib/results/shared.ts +1 -0
- package/extensions/agent-browser/lib/results.ts +3 -0
- package/extensions/agent-browser/lib/runtime.ts +6 -0
- package/package.json +1 -1
- package/scripts/config.mjs +18 -18
package/docs/TOOL_CONTRACT.md
CHANGED
|
@@ -152,9 +152,9 @@ The extension always plans normal browser commands with `--json` prepended in `e
|
|
|
152
152
|
- If you already used the implicit session and now need launch-scoped flags (--auto-connect, --cdp, --enable, --executable-path, --init-script, --device, --profile, --provider, -p, --session-name, --state), retry with top-level sessionMode set to fresh or pass an explicit --session for the new launch; never pass --session-mode inside args. After a successful unnamed fresh launch, later auto calls follow that new session.
|
|
153
153
|
- For React introspection, launch the page with --enable react-devtools before first navigation, then use react tree, react inspect <fiberId>, sourceLookup candidates for local UI source hints, react renders start/stop, or react suspense; sourceLookup is experimental and reports confidence/evidence instead of guaranteed DOM-to-file mappings. For failed fetches and APIs, networkSourceLookup (experimental) correlates failed network requests with initiator metadata and bounded workspace URL literals—candidates only, not definitive blame. Use vitals [url] for Core Web Vitals and hydration timing, and pushstate <url> for client-side SPA navigation.
|
|
154
154
|
- For first-navigation setup, use open without a URL plus network route --resource-type <csv>, cookies set --curl <file>, or --init-script/--enable before navigate/opening the target page.
|
|
155
|
-
- For stateful browser context work, prefer purpose-specific page actions before dumping browser data: use auth save --password-stdin with the tool stdin field for credentials, auth list/show/delete/remove for local auth-profile maintenance, auth login when you need the browser to fill a saved profile, state save/load for portable test state, state list/show/rename/clear/clear -a/clean for saved-state lifecycle cleanup, cookies get/set/clear and storage local|session only when the task needs those values, and expect cookie/storage/auth/state summaries to redact credential-like fields.
|
|
155
|
+
- For stateful browser context work, prefer purpose-specific page actions before dumping browser data: use auth save --password-stdin with the tool stdin field for credentials, auth list/show/delete/remove for local auth-profile maintenance, auth login when you need the browser to fill a saved profile, state save/load for portable test state, state list/show/rename/clear/clear -a/clean for saved-state lifecycle cleanup, cookies get/set/clear and storage local|session only when the task needs those values, and expect cookie/storage/auth/state summaries to redact credential-like fields while allowing benign primitive storage values when useful for local QA.
|
|
156
156
|
- For batch chains that touch cookies, storage, auth, or other secret-bearing commands, use details.batchSteps for per-step artifacts, categories, spill paths, and full structured errors; top-level details.data on batch is only a compact redacted step matrix (success, argv-redacted command, redacted result or scrubbed error text) built from the same presentation rules as standalone calls.
|
|
157
|
-
- For non-core families, pass current upstream commands through the native tool directly: network route/requests/har (including request filters like --type/--method/--status), diff snapshot/screenshot/url with scoped/baseline options, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, device list for iOS simulator inventory, and chat. For compact network requests output, prefer details.nextActions for request detail, actionable failed-request networkSourceLookup, filtering, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done.
|
|
157
|
+
- For non-core families, pass current upstream commands through the native tool directly: network route/requests/har (including request filters like --type/--method/--status), diff snapshot/screenshot/url with scoped/baseline options, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, device list for iOS simulator inventory, and chat. For compact network requests output, prefer details.nextActions for request detail, route-mock diagnostics, actionable failed-request networkSourceLookup, filtering, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done; stream enable already-enabled outcomes are treated as idempotent success with status/disable follow-ups.
|
|
158
158
|
- For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: "tabs" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close commands (`close`, `quit`, or `exit`) only close the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.
|
|
159
159
|
- For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.
|
|
160
160
|
- For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; when --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.
|
|
@@ -218,8 +218,9 @@ Examples:
|
|
|
218
218
|
- optional; mutually exclusive with `args`, `job`, `qa`, `sourceLookup`, `networkSourceLookup`, and `electron` (omit all of them when using this field)
|
|
219
219
|
- top-level tool input only: `batch` stdin remains upstream argv arrays; express find steps inside batch as string arrays such as `["find","role","button","click","--name","Export"]`, not nested `semanticAction` objects
|
|
220
220
|
- thin intent schema compiled by this wrapper into existing upstream commands; locator actions compile to `find`, while native dropdown selection compiles to `select <selector> <value...>`; behavior and locator/selector semantics stay upstream-owned
|
|
221
|
-
- supported actions: `click`, `fill`, `check`, `select
|
|
222
|
-
- supported locators for `click` / `fill` / `check
|
|
221
|
+
- supported actions: `click`, `fill`, `check`, `select`
|
|
222
|
+
- supported locators for `click` / `fill` / `check`: `role`, `text`, `label`, `placeholder`, `alt`, `title`, `testid`
|
|
223
|
+
- `semanticAction` does not expose `uncheck` while upstream `find ... uncheck` is not runtime-supported; use raw `args: ["uncheck", <selector-or-ref>]` after a stable selector or current snapshot ref
|
|
223
224
|
- for locator actions, `value` is the locator argument (for example ARIA role token `"button"`, label text, or visible substring), must be a non-empty string after trim; for `locator: "role"`, callers may provide `role` instead of redundant `value`
|
|
224
225
|
- `fill` requires non-empty `text` (compiled as the trailing value argument to `find`)
|
|
225
226
|
- `select` requires non-empty `selector` plus either `value` (single option value) or `values` (non-empty array of option values). `select` does not accept `locator`, `role`, `name`, or `text`; upstream `find` does not expose a verified `select` action, so role/name/label dropdown targeting must first be resolved to a stable selector or current `@ref`.
|
|
@@ -231,15 +232,15 @@ Compilation (then `--json` and session handling apply like any other call):
|
|
|
231
232
|
|
|
232
233
|
| Fields | Compiled `args` (conceptually) |
|
|
233
234
|
| --- | --- |
|
|
234
|
-
| `click
|
|
235
|
-
| `click` / `check` / `
|
|
235
|
+
| `click` or `check` + non-`role` locator | `["find", <locator>, <value>, <action>]` |
|
|
236
|
+
| `click` / `check` / `fill` + `role` or `value` + optional `name` | `["find","role",<role-or-value>,<action>]` plus `["--name",<name>]` when `name` is set; active-session fill may pre-resolve to `fill @ref <text>` when one exact editable current ref matches |
|
|
236
237
|
| `fill` | `["find",<locator>,<value>,"fill",<text>]` plus optional `["--name",<name>]` after `text` when `locator` is `role` and `name` is set |
|
|
237
238
|
| `select` + `selector` + `value` / `values` | `["select",<selector>,<value...>]` |
|
|
238
239
|
| any supported action + `session` | prepends `["--session",<session>]` before the compiled argv |
|
|
239
240
|
|
|
240
|
-
When `semanticAction` compiles successfully, `details.compiledSemanticAction` echoes `{ action, locator, args }` for `find` actions or `{ action: "select", selector, values, args }` for `select`, with `args` redacted the same way as other invocation details. Expect it on the initial wrapper validation return (when that path still builds the early `details` object) and on the unified result after `agent-browser` runs. It is omitted when the call used `args` only, when compilation never produced argv, and on some in-`execute` error returns that attach a slimmer `details` shape before the unified merge (for example certain session-plan, stdin-contract, tab-pinning, or missing-binary guard paths); compare `extensions/agent-browser/index.ts` where `compiledSemanticAction` is assigned. For active sessions, role/name `click`, `check`, and `
|
|
241
|
+
When `semanticAction` compiles successfully, `details.compiledSemanticAction` echoes `{ action, locator, args }` for `find` actions or `{ action: "select", selector, values, args }` for `select`, with `args` redacted the same way as other invocation details. Expect it on the initial wrapper validation return (when that path still builds the early `details` object) and on the unified result after `agent-browser` runs. It is omitted when the call used `args` only, when compilation never produced argv, and on some in-`execute` error returns that attach a slimmer `details` shape before the unified merge (for example certain session-plan, stdin-contract, tab-pinning, or missing-binary guard paths); compare `extensions/agent-browser/index.ts` where `compiledSemanticAction` is assigned. For active sessions, role/name `click`, `check`, and guarded `fill` semantic actions may be resolved through one fresh `snapshot -i` to a current visible `@ref` before execution; fill only resolves when one exact editable `combobox`, `searchbox`, or `textbox` ref matches. This avoids hidden duplicate matches stealing an upstream `find` action. In that case `details.compiledSemanticAction` still records the original semantic target while `details.effectiveArgs` shows the executed ref action.
|
|
241
242
|
|
|
242
|
-
If a raw `find` or compiled `semanticAction` fails with `failureCategory: "selector-not-found"`, the wrapper may run one fresh session-scoped `snapshot -i` and add visible `Current snapshot ref fallback` plus `details.visibleRefFallback` when that snapshot contains exact role/name matches for the failed target. Non-fill matches can also add `try-current-visible-ref` / `try-current-visible-ref-N` next actions. The matcher is bounded to current snapshot refs and exact normalized role/name matches: role locators require `--name`, text-click falls back only to exact-name `button`/`link` refs, label-fill to exact-name `textbox`, and placeholder-fill to exact-name `searchbox`/`textbox`. It never fuzzy-matches names such as prefixes; when several exact refs match, each action carries safety copy telling agents to inspect the snapshot and choose only if unambiguous. For `fill` matches, `visibleRefFallback.candidates[].args` and `visibleRefFallback.target.text` are omitted so recovery details do not repeat the fill text.
|
|
243
|
+
If a raw `find` or compiled `semanticAction` fails with `failureCategory: "selector-not-found"`, the wrapper may run one fresh session-scoped `snapshot -i` and add visible `Current snapshot ref fallback` plus `details.visibleRefFallback` when that snapshot contains exact role/name matches for the failed target. Non-fill matches can also add `try-current-visible-ref` / `try-current-visible-ref-N` next actions. The matcher is bounded to current snapshot refs and exact normalized role/name matches: role locators require `--name`, text-click falls back only to exact-name `button`/`link` refs, label-fill to exact-name `textbox`, and placeholder-fill to exact-name `searchbox`/`textbox`. It never fuzzy-matches names such as prefixes; when several exact refs match, each action carries safety copy telling agents to inspect the snapshot and choose only if unambiguous. For post-failure `fill` matches, `visibleRefFallback.candidates[].args` and `visibleRefFallback.target.text` are omitted so recovery details do not repeat the fill text.
|
|
243
244
|
|
|
244
245
|
If a compiled `semanticAction` fails with `failureCategory: "selector-not-found"`, visible content can also include an `Agent-browser candidate fallbacks` block when the wrapper has bounded role/name retries for that locator and action, and `details.nextActions` includes the normal `refresh-interactive-refs` snapshot step plus those entries. When `session` was provided, candidate retry args preserve the same `--session <session>` prefix. Today `buildSemanticActionCandidateActions` in `extensions/agent-browser/index.ts` only appends click candidates for `click` + `text` → `try-button-name-candidate` and `try-link-name-candidate`. Fill misses no longer emit `find … fill <text>` retry actions because those would repeat potentially sensitive text. Instead, when the same selector-miss snapshot finds exact current editable refs (`searchbox` or `textbox`), the wrapper emits `details.richInputRecovery`, visible `Rich input recovery`, and `focus-current-editable-ref` / `click-current-editable-ref` (numbered when ambiguous) next actions. Those actions carry only focus/click argv for the candidate ref; they do not copy fill text, press `Enter`, or submit. Use `keyboard inserttext` or `keyboard type` with the intended text only after focusing the right current ref, and submit only when the user flow explicitly calls for it. Candidate fallbacks are heuristics, not proof that an element exists; inspect the page when several controls could share the same name.
|
|
245
246
|
|
|
@@ -257,7 +258,6 @@ Examples:
|
|
|
257
258
|
{ "semanticAction": { "action": "select", "selector": "#flavor", "value": "chocolate" } }
|
|
258
259
|
{ "semanticAction": { "action": "select", "selector": "#multi", "values": ["dark", "compact"] } }
|
|
259
260
|
{ "semanticAction": { "action": "check", "locator": "label", "value": "Remember me" } }
|
|
260
|
-
{ "semanticAction": { "action": "uncheck", "locator": "label", "value": "Remember me" } }
|
|
261
261
|
{ "semanticAction": { "action": "click", "locator": "text", "value": "Close", "session": "named-browser" } }
|
|
262
262
|
```
|
|
263
263
|
|
|
@@ -325,7 +325,7 @@ On app pages that expose a native dropdown, add a `select` step such as `{ "acti
|
|
|
325
325
|
|
|
326
326
|
Use raw `args` plus `stdin` for upstream `batch` when a flow needs commands, flags, stdin forms, or failure policies outside this constrained schema.
|
|
327
327
|
|
|
328
|
-
Because `job` still executes as upstream `batch` with generated stdin, the same wrapper page-scoped `@e…` preflight applies: if you pass `@refs` in `click`/`fill`/`select` selectors after an `open`, `click`,
|
|
328
|
+
Because `job` still executes as upstream `batch` with generated stdin, the same wrapper page-scoped `@e…` preflight applies: if you pass `@refs` in `click`/`fill`/`select` selectors after an `open`, non-form `click`, or another step that can navigate or mutate the page, split the work across tool calls or switch to raw `batch` and insert your own `snapshot -i` rows between steps—the constrained `job` vocabulary does not emit snapshot steps for you. Multiple same-snapshot `fill @e…` rows may run before the first click/submit-style step. Raw `args:["batch"]` stdin can also batch native form-control rows (`check`/`uncheck` checkbox or radio refs and `select` combobox refs) before that click.
|
|
329
329
|
|
|
330
330
|
### `qa`
|
|
331
331
|
|
|
@@ -645,31 +645,33 @@ Recommended details:
|
|
|
645
645
|
Stable category fields are part of the machine-readable contract:
|
|
646
646
|
|
|
647
647
|
- `resultCategory`: always either `"success"` or `"failure"`.
|
|
648
|
-
- `successCategory`: present on successful results. Current values are `"completed"`, `"artifact-saved"`, `"artifact-unverified"`, and `"inspection"`. `artifact-unverified` means upstream reported success but the merged `artifactVerification` summary still
|
|
649
|
-
- `failureCategory`: present on failed results. Current values are `"aborted"`, `"cleanup-failed"`, `"confirmation-required"`, `"download-not-verified"`, `"missing-binary"`, `"parse-failure"`, `"policy-blocked"`, `"qa-failure"`, `"selector-not-found"`, `"selector-unsupported"`, `"stale-ref"`, `"tab-drift"`, `"timeout"`, `"upstream-error"`, and `"validation-error"`.
|
|
648
|
+
- `successCategory`: present on successful results. Current values are `"completed"`, `"artifact-saved"`, `"artifact-unverified"`, and `"inspection"`. `artifact-unverified` means upstream reported success but the merged `artifactVerification` summary still has unverified non-missing rows (for example bounded spill rows) or the legacy artifact classifier still sees a non-pending file without confirmed disk presence; inspect `artifactVerification` (counts and per-entry `state` / optional `limitation`) before treating paths as durable.
|
|
649
|
+
- `failureCategory`: present on failed results. Current values are `"aborted"`, `"artifact-missing"`, `"cleanup-failed"`, `"confirmation-required"`, `"download-not-verified"`, `"missing-binary"`, `"parse-failure"`, `"policy-blocked"`, `"qa-failure"`, `"selector-not-found"`, `"selector-unsupported"`, `"stale-ref"`, `"tab-drift"`, `"timeout"`, `"upstream-error"`, and `"validation-error"`. `artifact-missing` means upstream reported a saved/completed artifact path, but the wrapper verified the non-pending file is absent and failed closed.
|
|
650
650
|
|
|
651
651
|
These categories are intentionally bounded and stable so agents can branch on them instead of parsing prose. They do not replace raw diagnostics: `details.error`, `details.stderr`, `details.parseError`, `details.validationError`, and visible content still preserve the specific upstream or wrapper message after normal redaction.
|
|
652
652
|
|
|
653
|
-
|
|
653
|
+
For argv-supplied `--allowed-domains`, the wrapper preserves the allowlist for the managed session and checks the final observed `http(s)` page URL after successful-looking browser commands. If a click, redirect, or other post-load navigation ends outside the allowlist, the wrapper reclassifies the result as `failureCategory: "policy-blocked"` and includes the requested allowlist plus observed URL/host in the failure text. Direct upstream blocks may still surface as upstream errors when upstream rejects navigation before a page target is available.
|
|
654
|
+
|
|
655
|
+
Real Pi custom tools only mark a tool result failed when the tool throws during `execute`; returned `isError` fields are not authoritative. The extension therefore also registers a `tool_result` handler that treats any `agent_browser` result with `details.resultCategory: "failure"` as a real Pi tool error. For normal prose output, it appends `Result category: failure; failureCategory: …; Pi tool isError: true.` to model-visible text. For caller-requested `--json` output, it only patches `isError` and preserves the visible JSON string unchanged so the content remains parseable. The hook treats `--json` as requested when echoed `details.args` or the original tool `input.args` includes that flag; it skips appending the prose notice only when a text content item is non-empty parseable JSON (so mixed or invalid JSON bodies still get the visible line). Implementation: `buildAgentBrowserToolResultPatch` in `extensions/agent-browser/index.ts`. This keeps Pi transcript semantics aligned with the machine-readable result contract, including wrapper-side reclassifications such as `qa-failure` after an upstream-successful batch and `artifact-missing` after an upstream-successful artifact command whose requested file is absent.
|
|
654
656
|
|
|
655
657
|
For `batch`, top-level `details` still carries `resultCategory` plus `successCategory` or `failureCategory` for the **aggregate** tool outcome: if any step fails, the overall result is a failure (`resultCategory: "failure"`) even when later steps succeed—inspect `batchSteps[]` for per-step outcomes. Each `batchSteps[]` entry includes its own `resultCategory` and either `successCategory` or `failureCategory` for that step. `batchFailure.failedStep` duplicates the first failing step’s details, including its `failureCategory` and any `nextActions`.
|
|
656
658
|
|
|
657
|
-
Top-level `details.data` on `batch` is a compact per-step roll-up (not a verbatim replay of raw upstream batch JSON): each element is `{ success, command, result? | error? }` where `command` is argv-redacted the same way as echoed invocation args (including `cookies set` cookie values, `storage local|session set` values, and other sensitive flags/positionals), `result` is the presentation-layer data for that step after the same structured redaction as non-batch commands, and `error` is failure text with cookie/storage/password literals stripped when those values appeared in argv. Prefer `batchSteps[]` for full per-step `details` (artifacts, categories, spill paths); use the roll-up when you only need a redacted matrix of what ran.
|
|
659
|
+
Top-level `details.data` on `batch` is a compact per-step roll-up (not a verbatim replay of raw upstream batch JSON): each element is `{ success, command, result? | error? }` where `command` is argv-redacted the same way as echoed invocation args (including `clipboard write` text, `cookies set` cookie values, `storage local|session set` values, and other sensitive flags/positionals), `result` is the presentation-layer data for that step after the same structured redaction as non-batch commands, and `error` is failure text with clipboard-write/cookie/storage/password literals stripped when those values appeared in argv. Prefer `batchSteps[]` for full per-step `details` (artifacts, categories, spill paths); use the roll-up when you only need a redacted matrix of what ran.
|
|
658
660
|
|
|
659
661
|
`details.refSnapshot` may appear after successful `snapshot` calls and subsequent same-session calls. It records the latest page-scoped ref ids known to the wrapper, optional per-ref accessible `role`/`name` metadata from the same snapshot, and the page target they came from so mutation-prone `@e…` commands can fail fast instead of silently hitting recycled refs after navigation. For wrapper-tracked Electron sessions, `details.electronRefFreshness` may also appear after a successful `@e…` mutation as a softer same-URL rerender warning: run `snapshot -i` before reusing old refs even if the URL did not change.
|
|
660
662
|
|
|
661
663
|
Ref preflight details (command taxonomy in `extensions/agent-browser/lib/command-taxonomy.ts`, orchestration in `extensions/agent-browser/lib/orchestration/browser-run/session-state.ts`):
|
|
662
664
|
|
|
663
665
|
- **URL alignment:** `refSnapshot.target.url` and the session’s current tab URL are compared via `targetsMatch` / `normalizeComparableUrl` in `extensions/agent-browser/index.ts`: values are trimmed, parsed as URLs when possible, compared **after dropping the `#fragment`**, and the query string remains significant. If either side lacks a `url`, `targetsMatch` treats the pair as matching so early-session calls are not blocked.
|
|
664
|
-
- **Batch stdin ordering:** user `batch` JSON is scanned in order. Any step whose first token satisfies `isRefInvalidatingBatchCommand` sets a latch that blocks later steps whose first token satisfies `isRefGuardedCommand` and that mention `@e…` refs. A step whose first token is `snapshot` clears that latch for subsequent steps (pre-spawn intent only; it does not wait for upstream success). These predicates read explicit command capability flags from `command-taxonomy.ts`: navigation/mutation verbs such as `open` / `goto`, `reload`, `click`, and related upstream commands have `invalidatesBatchRefs`; same-snapshot `fill` rows
|
|
666
|
+
- **Batch stdin ordering:** user `batch` JSON is scanned in order. Any step whose first token satisfies `isRefInvalidatingBatchCommand` sets a latch that blocks later steps whose first token satisfies `isRefGuardedCommand` and that mention `@e…` refs, except for same-snapshot native form-control steps whose current snapshot role metadata identifies all refs as safe controls (`check`/`uncheck` on checkbox or radio refs and `select` on combobox refs). A step whose first token is `snapshot` clears that latch for subsequent steps (pre-spawn intent only; it does not wait for upstream success). These predicates read explicit command capability flags from `command-taxonomy.ts`: navigation/mutation verbs such as `open` / `goto`, `reload`, non-form `click`, and related upstream commands have `invalidatesBatchRefs`; same-snapshot `fill` rows and the role-checked native form-control rows stay guarded against missing/stale refs but do not set the latch, allowing ordinary form batches before a click/submit step; direct `click @e…` remains invalidating even when the snapshot role is checkbox or radio because role metadata alone does not prove native element semantics. Ref-guarded commands accept page-scoped refs for interaction (`click`, `fill`, `download`, `scrollintoview` / `scrollinto`, and others centralized in the command taxonomy). Changing either capability requires updating this contract, [`docs/SUPPORT_MATRIX.md`](SUPPORT_MATRIX.md) `RQ-0072`/`RQ-0087` notes, README and command-reference pitfalls, and `test/agent-browser.extension-validation.test.ts`.
|
|
665
667
|
|
|
666
|
-
**Presentation redaction (implementation map):** Successful non-`batch` tool calls and each successful `batchSteps[]` row run upstream `data` through `redactPresentationData` in `extensions/agent-browser/lib/results/presentation/diagnostics.ts`: `cookies`
|
|
668
|
+
**Presentation redaction (implementation map):** Successful non-`batch` tool calls and each successful `batchSteps[]` row run upstream `data` through `redactPresentationData` in `extensions/agent-browser/lib/results/presentation/diagnostics.ts`: `cookies` still walk objects/arrays and replace case-insensitive `value` keys with `"[REDACTED]"`; `storage` redacts values when the key or value looks credential-like (token, cookie, auth, secret, JWT, bearer/basic credential, high-entropy token-like string, or nested sensitive JSON) but keeps low-risk primitive QA values such as booleans, numbers, and short strings visible. Redacted storage entries add `valueRedacted` plus `valueRedactionReason` in `details.data`; diagnostic formatters mirror the same decision. Every other command’s payload is recursively scrubbed with `redactStructuredPresentationValue`, which redacts known sensitive key names and applies string-level sensitivity heuristics so network, diff, trace/profiler, stream, dashboard, chat, and other structured results do not echo bearer tokens, proxy credentials, or similar fields verbatim into `details.data`. Echoed `command` arrays in `details` and in batch roll-ups use `redactInvocationArgs` from `extensions/agent-browser/lib/runtime.ts` to mask trailing values for sensitive global flags (including `--body`, `--headers`, `--password`, and `--proxy`), preserve the special positional rules for `cookies set`, `storage local|session set`, and `set credentials`, and scrub other argv tokens for URLs and inline secrets. Failed batch steps additionally run `redactExactValues` on structured step errors so literals taken from that step’s argv (cookie value, storage set value, `--password` / `--password=` tokens) cannot reappear inside formatted error blobs.
|
|
667
669
|
|
|
668
670
|
`nextActions` is an optional machine-readable list of exact native `agent_browser` follow-ups. Each entry includes `tool: "agent_browser"`, an `id`, a short `reason`, optional `safety`, and either `params` (`args`, optional `stdin`, optional `sessionMode`, optional `networkSourceLookup`, optional `electron`) or an `artifactPath` for saved-file workflows. Agents should prefer these payloads over prose when present. Tab/session recovery id strings are centralized in `AGENT_BROWSER_RECOVERY_NEXT_ACTION_IDS`, while rich-input focus/click recovery ids are centralized in `AGENT_BROWSER_RICH_INPUT_RECOVERY_NEXT_ACTION_IDS` plus `getAgentBrowserRichInputRecoveryNextActionId(s)` in `extensions/agent-browser/lib/results/recovery-actions.ts` (both registries are also re-exported from `shared.ts`); docs and tests mirror those registries/helpers rather than inventing recovery ids in prose. Current recommendations include: raw `connect` success → session-scoped `list-connected-session-tabs` only, then the agent should inspect/select a stable `tab t<N>` target and run `snapshot -i` explicitly; `snapshot` failures whose upstream error says `No active page` and whose wrapper result has a known session → `list-tabs-after-no-active-page` only, because this path has no wrapper-observed safe tab id to select atomically; browser profile/user-data-dir resolution failures → `inspect-browser-profiles` (`profiles`) and `run-agent-browser-doctor` (`doctor`) before retrying opens; Electron launches → wrapper-tracked `electron.status` / `electron.probe` / `electron.cleanup` actions plus session-scoped tab/snapshot inspection when attached; Electron status/probe mismatch diagnostics → `reattach-electron-launch` plus fresh tab/snapshot inspection; Electron post-command health failures → status/probe/cleanup for the same `launchId`; Electron fill verification mismatches → `inspect-after-fill-verification` and `verify-filled-value`; Electron same-URL ref freshness warnings → `refresh-electron-refs-after-rerender`; packaged-Electron `sourceLookup` no-candidate diagnostics → session snapshot, launch probe, and tab list; Electron cleanup partial failures → status plus retry-cleanup for the same wrapper-owned `launchId`; `open` success → `snapshot -i`; mutating/navigation commands (see `buildAgentBrowserNextActions` in source for the exact command set) → `snapshot -i`; stale refs and selector failures → `snapshot -i` via `refresh-interactive-refs` (prefixed with `--session <name>` when the failed call ran in a named or managed session); selector misses with exact current snapshot role/name matches → direct ref retries via `try-current-visible-ref` or bounded `try-current-visible-ref-N` for non-fill targets; semantic `fill` selector misses with exact current editable refs → `focus-current-editable-ref` / `click-current-editable-ref` or numbered variants that do not include fill text or submit; unknown getter shortcuts such as `title` / `url` → exact read-only retries like `get title` / `get url` with ids `use-get-title` / `use-get-url`; compact `network requests` results with safe request IDs → bounded read-only request detail, `networkSourceLookup`, path filter, or HAR-capture follow-ups; semantic `selector-not-found` failures that compiled from `semanticAction` may append `try-button-name-candidate` or `try-link-name-candidate` after presentation `nextActions` only for the bounded click pair enumerated under `semanticAction`; semantic `stale-ref` failures that compiled from `semanticAction` `find` argv may also include `retry-semantic-action-after-stale-ref` after that snapshot step; qualifying same-URL non-Electron top-level clicks (see `overlayBlockers` below) with fresh snapshot evidence of likely overlay/banner/dialog close controls may append `inspect-overlay-state` and bounded `try-overlay-blocker-candidate-*` entries; successful top-level `scroll` calls whose pre/post viewport and sampled scroll-container positions do not change may append `inspect-after-noop-scroll` and `verify-noop-scroll-visually`; explicit combobox-targeted actions that focus a combobox without visible options may append `inspect-focused-combobox`, `try-open-combobox-with-arrow`, and `try-open-combobox-with-enter`; `get text <selector>` calls with hidden/multiple CSS matches may append `inspect-visible-text-candidates` with a read-only `eval --stdin` probe (each prefixed with `--session <name>` when `details.sessionName` is set, same `sessionPrefixArgs` rule as other session-scoped follow-ups); confirmations → exact `confirm <id>` and `deny <id>` choices; generic tab drift → `list-tabs-for-recovery` with `tab list` first, then select or confirm the stable target before running `snapshot -i`; about:blank or tab-drift recovery with a wrapper-known target → `list-tabs-for-about-blank-recovery` or `list-tabs-for-tab-drift-recovery`, plus `select-intended-tab-after-drift` and `snapshot-after-tab-recovery` when the wrapper already observed the stable `t<N>` tab id; `wait --text` assertion failures → `inspect-after-text-assertion-failure` with a read-only snapshot; download verification failures or missing successful download artifacts → `wait --download [path]`; saved artifacts → the artifact path to inspect/consume after checking `artifactVerification`/metadata; missing non-download artifacts → `verify-artifact-path` so agents do not trust an absent file. When nothing applies, the field is omitted.
|
|
669
671
|
|
|
670
672
|
**Unknown-command getter hints (failure presentation):** `buildErrorPresentation` in `extensions/agent-browser/lib/results/presentation/errors.ts` only runs this path when upstream error text (after model-facing redaction) matches `unknown command`, `unknown subcommand`, or `unrecognized command` (case-insensitive) **and** the failed invocation’s primary command token is one of `attr`, `count`, `html`, `text`, `title`, `url`, or `value`. Visible text then includes a grouped-`get` hint line plus per-token guidance (`get text <selector>`, `get html …`, `get attr …`, `get count …`, `get value …`, `get title`, `get url`). Machine `nextActions` with ids `use-get-title` / `use-get-url` are emitted only for `title` / `url`, with `params.args` optionally prefixed by `--session <name>` when the failed call targeted a named session. If the error string already contains `Agent-browser hint:` from selector recovery (stale-ref or unsupported selector dialect appendages), the getter block is skipped so two stacked `Agent-browser hint:` headers are not emitted.
|
|
671
673
|
|
|
672
|
-
For `network requests`, `details.nextActions` is bounded to one selected safe request ID, preferring actionable failed rows, then API/fetch-like rows, then benign failed rows, then the first request with a safe ID. Detail/filter/HAR actions use `params.args` and preserve a known `--session <name>` prefix when the current presentation has `details.sessionName`; source-candidate actions use `params.networkSourceLookup` with the selected `requestId` plus `session` when known and are only emitted for actionable failed rows that the failed-request analyzer can correlate. URLs and query strings are not copied into action params; path filters are skipped when they look sensitive or too large.
|
|
674
|
+
For `network requests`, `details.nextActions` is bounded to one selected safe request ID, preferring actionable failed rows, then API/fetch-like rows, then benign failed rows, then the first request with a safe ID. Detail/filter/HAR actions use `params.args` and preserve a known `--session <name>` prefix when the current presentation has `details.sessionName`; source-candidate actions use `params.networkSourceLookup` with the selected `requestId` plus `session` when known and are only emitted for actionable failed rows that the failed-request analyzer can correlate. URLs and query strings are not copied into action params; path filters are skipped when they look sensitive or too large. If the wrapper has observed `network route` in the same session, matching pending fetch/XHR rows or CORS-looking errors also add `details.networkRouteDiagnostics[]` with `{ reason, routePattern, mode, requestId?, requestUrl?, summary }` and prepend executable route-mock next actions (`inspect-pending-routed-network-request`, `start-network-har-capture-for-route-mock`) before generic request follow-ups; same-origin/CORS fixture retry guidance stays in visible prose. The route tracker is wrapper-session-local, updated on successful `network route`/`network unroute`, and cleared when that session closes or is replaced.
|
|
673
675
|
|
|
674
676
|
For `batch`, each `batchSteps[]` entry can carry its own `nextActions` for that step’s success or failure. Top-level `details.nextActions` on a failed batch duplicates `batchFailure.failedStep.nextActions` so callers can read one aggregate object. On a fully successful batch, top-level `nextActions` may still list artifact follow-ups derived from the combined step artifacts.
|
|
675
677
|
|
|
@@ -730,9 +732,9 @@ When `semanticAction` produced compiled `find` argv and the unified result is `f
|
|
|
730
732
|
Implementation and precedence:
|
|
731
733
|
|
|
732
734
|
- Shared machine-readable types are centralized in `extensions/agent-browser/lib/results/contracts.ts` (including re-exports such as `AgentBrowserNextAction` from `next-actions.ts`). Classifiers live in `categories.ts` (`classifyAgentBrowserSuccessCategory`, `classifyAgentBrowserFailureCategory`, `buildAgentBrowserResultCategoryDetails`—the last prefers an explicit `failureCategory` when the caller already knows the bucket, otherwise it runs the classifier). Generic follow-up assembly lives in `action-recommendations.ts` (`buildAgentBrowserNextActions`). Tab/session recovery ids live in `recovery-actions.ts` (`AGENT_BROWSER_RECOVERY_NEXT_ACTION_IDS`, `AGENT_BROWSER_RICH_INPUT_RECOVERY_NEXT_ACTION_IDS`, `getAgentBrowserRichInputRecoveryNextActionId`, `getAgentBrowserRichInputRecoveryNextActionIds`, `buildRecoveryNextActions`) and session-aware wrappers live in `recovery-next-actions.ts`. Selector miss and rich-input diagnostic shapes/actions live in `selector-recovery.ts`. `extensions/agent-browser/lib/results/shared.ts` re-exports focused modules for compatibility only. Failed upstream `network requests` rows flow through `classifyNetworkRequestFailure` / `summarizeNetworkFailures` in `network.ts` for QA analysis (`analyzeQaPresetResults` in `extensions/agent-browser/index.ts`) and for actionable-vs-benign lines plus request-specific nextActions in `network requests` presentation (`extensions/agent-browser/lib/results/presentation/diagnostics.ts`).
|
|
733
|
-
- Artifact verification: `ArtifactVerificationSummary` / `ArtifactVerificationEntry` types live in `contracts.ts`. `buildArtifactVerificationSummary`, `getArtifactVerificationEntry`, and `getManifestVerificationEntry` in `presentation/artifacts.ts` merge each resolved file artifact with manifest rows whose `storageScope` is not `explicit-path` (those rows duplicate file artifacts) and whose `path` is in the current result’s spill path set.
|
|
734
|
-
- Inner success categories (`classifyAgentBrowserSuccessCategory` in `categories.ts`, after verification counts are clear): if `inspection` is true → `"inspection"`; else if any non-pending artifact lacks confirmed on-disk presence (`exists !== true`) → `"artifact-unverified"`; else if there is a `savedFile` or any `artifacts` → `"artifact-saved"`; else → `"completed"`.
|
|
735
|
-
- Failure: the classifier walks a single ordered chain (first match wins): `confirmation-required` → `timeout` → `missing-binary` → `parse-failure` → `aborted` → `policy-blocked` → `cleanup-failed` → `tab-drift` → `stale-ref` (including “unknown ref” text and a narrow `@eN` plus “element not found” heuristic) → `selector-unsupported` → `selector-not-found` → `download-not-verified` (download / wait-download style failures) → `validation-error` when a wrapper `validationError` is present → default `upstream-error`.
|
|
735
|
+
- Artifact verification: `ArtifactVerificationSummary` / `ArtifactVerificationEntry` types live in `contracts.ts`. `buildArtifactVerificationSummary`, `getArtifactVerificationEntry`, and `getManifestVerificationEntry` in `presentation/artifacts.ts` merge each resolved file artifact with manifest rows whose `storageScope` is not `explicit-path` (those rows duplicate file artifacts) and whose `path` is in the current result’s spill path set. Presentation then fails closed with `failureCategory: "artifact-missing"` when any resolved non-pending file artifact has `exists: false`; pending recording-start artifacts and non-explicit spill rows do not trigger this failure. If no hard-missing file artifact exists, successful presentation merges run `classifyPresentationSuccessCategory`, which can still force `successCategory: "artifact-unverified"` for unverified counts before delegating to `classifyAgentBrowserSuccessCategory`.
|
|
736
|
+
- Inner success categories (`classifyAgentBrowserSuccessCategory` in `categories.ts`, after verification counts are clear): if `inspection` is true → `"inspection"`; else if any non-pending artifact lacks confirmed on-disk presence (`exists !== true`) and was not upgraded to an `artifact-missing` failure → `"artifact-unverified"`; else if there is a `savedFile` or any `artifacts` → `"artifact-saved"`; else → `"completed"`.
|
|
737
|
+
- Failure: the classifier walks a single ordered chain (first match wins): `confirmation-required` → `timeout` → `missing-binary` → `parse-failure` → `aborted` → `policy-blocked` → `cleanup-failed` → `tab-drift` → `stale-ref` (including “unknown ref” text and a narrow `@eN` plus “element not found” heuristic) → `selector-unsupported` → `selector-not-found` → `download-not-verified` (download / wait-download style failures) → `validation-error` when a wrapper `validationError` is present → default `upstream-error`. Wrapper-known missing artifact checks pass an explicit `artifact-missing` category rather than relying on this text classifier.
|
|
736
738
|
- The main tool implementation merges these fields into Pi-facing `details` from `extensions/agent-browser/index.ts` and from `extensions/agent-browser/lib/results/presentation.ts` for presentation-time failures.
|
|
737
739
|
|
|
738
740
|
Additional structured fields can appear when relevant:
|
|
@@ -743,6 +745,7 @@ Additional structured fields can appear when relevant:
|
|
|
743
745
|
- `sourceLookup` when the call used `sourceLookup`: `{ status, candidates, limitations, summary, workspaceRoot?, electronContext? }`; wrapper-tracked packaged Electron no-candidate diagnostics may carry `workspaceRoot` plus `electronContext` and live Electron nextActions without marking the successful batch as a tool failure.
|
|
744
746
|
- `compiledNetworkSourceLookup` / `networkSourceLookup` when the call used `networkSourceLookup`: the generated batch plan plus bounded failed-request/candidate evidence as described above.
|
|
745
747
|
- `qaPreset` when the call used `qa`: `{ passed, failedChecks, warnings, summary }`. Network rows inside the `network requests` batch step use `summarizeNetworkFailures` / `classifyNetworkRequestFailure` in `network.ts`: actionable failures appear in `failedChecks` (and fail the tool when the upstream batch still succeeded); benign icon-classified failures appear only in `warnings` and in `summary` as `QA preset passed with warnings: …` when nothing else failed.
|
|
748
|
+
- `networkRouteDiagnostics` after successful `network requests` when the wrapper has observed active `network route` patterns for that session and a matching request row is pending/no-status or carries CORS/preflight-looking error text. Each row includes `reason` (`"pending-routed-request"` or `"cors-likely-routed-request"`), `routePattern`, `mode`, optional `requestId`, optional `requestUrl`, and `summary`; visible text starts with `Network route diagnostics`, and `details.nextActions` prepends executable route-mock inspection/HAR follow-ups before generic request follow-ups.
|
|
746
749
|
- `compiledElectron` when the call used `electron`: redacted action plan for `list`, `launch`, `status`, `cleanup`, or `probe`.
|
|
747
750
|
- `electron` when the call used `electron`: action-specific lifecycle, discovery, probe, and cleanup data; see the `electron` section below.
|
|
748
751
|
- `batchFailure` and `batchSteps` for `batch` rendering, including mixed-success runs
|
|
@@ -758,7 +761,7 @@ Additional structured fields can appear when relevant:
|
|
|
758
761
|
- `comboboxFocus` after a successful explicit combobox-targeted `click` / `fill` / `find … click|fill` (for example `semanticAction` with role `combobox`, including when that semantic action resolves through a current visible `@ref` before execution) when a read-only probe sees the active element is combobox-like, `aria-expanded` is explicitly present (`false` or `true`), and no visible `listbox` / `option` / menu option elements are open. Shape: `{ reason: "focused-combobox-without-visible-options", message, activeElement, visibleListboxCount, visibleOptionCount, recommendations }`; `activeElement` includes bounded role/tag/expanded/hasPopup/name metadata with normal text redaction. Visible text appends `Combobox diagnostic: focused combobox did not expose visible options`, and `details.nextActions` gains `inspect-focused-combobox` (`snapshot -i`), `try-open-combobox-with-arrow` (`press ArrowDown`), and `try-open-combobox-with-enter` (`press Enter`), session-prefixed when applicable. The diagnostic is deliberately gated to explicit combobox-targeted calls to avoid extra probes or false positives on ordinary clicks/textboxes.
|
|
759
762
|
- `recordingDependencyWarning` after a successful `record start` or `record restart` when the wrapper cannot find an executable `ffmpeg` on the Pi process `PATH`. Shape: `{ reason: "ffmpeg-missing-for-recording", dependency: "ffmpeg", command, message, recommendations }`. Visible text appends `Recording dependency warning: ffmpeg not found on PATH`. This is a non-blocking preflight warning: upstream may start recording, but `record stop` needs `ffmpeg` to encode the WebM.
|
|
760
763
|
- `selectorTextVisibility` after a **successful** upstream `get text <selector>` (standalone or inside a successful `batch`) when the wrapper’s follow-up probe finds a hazard: more than one DOM match (upstream reads the first `querySelectorAll` hit, which may be the wrong tab/panel), or the first match is hidden while at least one other match is visible (requires multiple DOM nodes so a visible peer exists; a lone hidden match is not flagged). The probe is a read-only `eval --stdin` script (`buildVisibleTextProbeScript` in `extensions/agent-browser/lib/orchestration/browser-run/diagnostics.ts`) that counts matches, applies a small visibility heuristic (`display`/`visibility`/`opacity` plus non-zero client rects), may include a redacted `firstVisibleTextPreview`, and may include up to eight `visibleCandidates` entries (`index` in `querySelectorAll`, `tagName`, optional `role`, optional redacted `textPreview`). It is **not** run for page-scoped `@e…` selectors or when the selector string is withheld because `selectorMayExposeSensitiveLiteral` would risk echoing secrets in probe output. `details.selectorTextVisibility` mirrors the primary diagnostic (first sorted entry); when several selectors in one `batch` qualify, `selectorTextVisibilityAll` lists every diagnostic sorted so hidden-first cases precede generic multi-match ambiguity. Appended visible warning text names the matching `details.nextActions` id and may list visible candidate previews. Appended `details.nextActions` use ids `inspect-visible-text-candidates` and `inspect-visible-text-candidates-2`, … with the probe replayed via `eval --stdin` for each hazardous selector. If the probe still leaves more than one visible candidate, it is only ambiguity evidence; agents should narrow the selector, use a current visible `@ref`, or run a targeted visible-element `eval --stdin` rather than trusting the broad selector.
|
|
761
|
-
- `electronGetTextScopeWarning` after a successful attached Electron `get text <selector>` (standalone or successful `batch`) when a broad non-ref CSS selector such as `body`, `html`, `main`, `div`, or `[role=application]` may read the whole app shell. Shape: `{ selector, summary, electronContext: { launchId?, sessionName?, url? } }`; multiple batched diagnostics use `electronGetTextScopeWarnings`. Visible text appends `Broad Electron get text selector warning`, and next actions use `snapshot-for-electron-text-scope` ids with session-scoped `snapshot -i` payloads.
|
|
764
|
+
- `electronGetTextScopeWarning` after a successful wrapper-tracked attached Electron `get text <selector>` (standalone or successful `batch`) when a broad non-ref CSS selector such as `body`, `html`, `main`, `div`, or `[role=application]` may read the whole app shell. Ordinary browser pages, including `file://` fixtures, do not qualify without wrapper-owned Electron launch provenance. Shape: `{ selector, summary, electronContext: { launchId?, sessionName?, url? } }`; multiple batched diagnostics use `electronGetTextScopeWarnings`. Visible text appends `Broad Electron get text selector warning`, and next actions use `snapshot-for-electron-text-scope` ids with session-scoped `snapshot -i` payloads.
|
|
762
765
|
- `evalStdinHint` after a successful `eval --stdin` when caller stdin (trimmed) looks function-shaped to the wrapper’s lightweight detector (in `extensions/agent-browser/lib/orchestration/browser-run/diagnostics.ts`: leading `function` / `async function`, parenthesized arrow `(…) =>`, or a concise `name =>` / `async name =>` form) **and** upstream JSON `data` is an object whose `result` field is a plain empty object (`{}`). Arrays such as `[]` do not qualify. It includes `reason` and `suggestion`; visible output appends `Eval stdin hint` with the same guidance. This is a heuristic for the common mistake of returning a function object instead of invoking it or passing a plain expression, not a JavaScript parser or proof that the page returned no useful data. Before this diagnostic path runs, the wrapper also recovers the common malformed native-tool call `args: ["eval", "--stdin", "..."]` with no top-level `stdin` by moving trailing `args` tokens after `--stdin` into the process stdin stream.
|
|
763
766
|
- `evalResultWarning` after a successful `eval --stdin` when the current or prior page URL is `file:` (from navigation summary, session tab target, or persisted session page state), upstream JSON `data.result` is strictly `null`, and stdin is non-empty and not a trivial literal `null`/`undefined`. Fields: `reason`, `suggestion`. Visible output appends `Eval result warning` without failing the tool. Use snapshot -i, ref-based getters, screenshots, or http(s) fixtures when file:// null results are inconclusive.
|
|
764
767
|
- `timeoutPartialProgress` after `runAgentBrowserProcess` reports `timedOut` (wrapper child-process watchdog) when best-effort recovery finds useful context. `summary` is a short sentence counting how many declared artifact paths exist on disk versus how many were scanned, and whether page context came from live session reads or only from a planned URL (when nothing in the plan declares an artifact path, the fraction may read `0/0` while `currentPage` can still carry session or planned URL context). `steps` lists planned argv from the compiled `job` or `qa` batch plan (`compiledJob` in `extensions/agent-browser/index.ts`, which is only populated for those top-level modes) or, when that object is absent, from the same JSON-array `batch` stdin the tool sends upstream—whether caller-authored or wrapper-generated for `sourceLookup` / `networkSourceLookup` (1-based indices; only JSON-array stdin whose elements are string[] argv arrays is parsed); timeouts on other argv shapes may still emit `currentPage` / summary evidence without `steps`. `currentPage` comes from session-scoped `get url` / `get title` when the session answers, otherwise a fallback URL may be inferred from the last `open` / `navigate` / `pushstate` step in the plan. `artifacts` covers declared output paths on `screenshot`, `pdf`, `download`, and `wait --download` steps (absolute path, existence, optional `sizeBytes`, `stepIndex`). Visible text repeats the same block under `Timeout partial progress`, applying URL and path-segment redaction; the prose `Planned steps` list shows at most six steps, then an omitted-count line when the plan is longer. This is recovery evidence only; missing entries do not prove the upstream step never ran or that no other side effects occurred.
|
|
@@ -791,15 +794,15 @@ Worth doing in v1:
|
|
|
791
794
|
- file artifacts such as PDFs, downloads, `wait --download` files, `state save` state files, diff screenshot output images, traces, CPU profiles, completed WebM recordings, and path-bearing HAR captures → concise saved-path summaries plus metadata in `details.artifacts` and bounded recent metadata in `details.artifactManifest`; `record start` reports recording lifecycle state and the future output path without adding a missing manifest entry; upstream needs `ffmpeg` on `PATH` for `record stop` to encode the WebM, and successful `record start` / `record restart` calls may also expose `details.recordingDependencyWarning` when the wrapper cannot find `ffmpeg`; direct saved-file workflows also expose `details.savedFilePath` / `details.savedFile`; large or binary artifacts are not inlined into model context; the recent manifest cap can age out explicit-file metadata but does not remove explicit saved files from disk
|
|
792
795
|
- `diff screenshot` → same file-artifact pattern as above for the **diff** image path only (summary text uses “Saved diff image”); baseline paths and other fields stay in the structured payload but are not echoed as separate saved artifacts in the visible artifact block, and there is no Pi inline image attachment for the diff output
|
|
793
796
|
- `state load` → completion text may mention the loaded path, but the wrapper does **not** treat that path as a new saved artifact (`artifacts` / `artifactManifest` stay unset) the way `state save` does
|
|
794
|
-
- auth, cookies, storage, dialog, frame, state, network, debug, diff, stream, dashboard, chat, and other structured results → concise summaries that avoid expanding secret-bearing payloads; credential-like keys, values, URLs, body snippets, bearer/basic credentials, and
|
|
797
|
+
- auth, cookies, storage, clipboard, dialog, frame, state, network, debug, diff, stream, dashboard, chat, and other structured results → concise summaries that avoid expanding secret-bearing payloads; credential-like keys, values, URLs, body snippets, bearer/basic credentials, clipboard write text, cookie values, and likely secret storage values are redacted before model-facing output and `details.data`, while benign primitive storage values may remain visible for local QA
|
|
795
798
|
- TUI display → custom `agent_browser` call/result rendering with colorized command/output text and a built-in-style collapsed view for long visible output; `ctrl+o` expansion reveals the full rendered tool result without changing the model-facing content
|
|
796
799
|
- snapshots → origin + ref count + main-content-first compact preview, with the raw snapshot spill path printed directly in content and kept in `details.fullOutputPath` plus `details.artifactManifest` when the inline result would otherwise be too large
|
|
797
800
|
- oversized generic outputs such as large `eval --stdin` payloads → compact preview plus the actual spill file path instead of dumping the whole payload into model context
|
|
798
801
|
- extraction-style commands like `eval --stdin` and `get title` → scalar-first text with lightweight origin context when available
|
|
799
802
|
- navigation actions like `click`, `back`, `forward`, and `reload` → lightweight post-action title/url summary when available
|
|
800
803
|
- tab lists → compact summary/table
|
|
801
|
-
- stream status → enabled/connected/port summary plus WebSocket URL and frame format when a port is known; if the caller explicitly passed `--json`, visible text is valid JSON instead of a prose summary
|
|
802
|
-
- diagnostic/status families (`session`, `session list`, `profiles`, `doctor`, `auth list`/`show`, `cookies`, `storage`, `dialog`, `frame`, `state`, `network requests`, `console`, `errors`, and dashboard start/stop/status outputs) → compact readable summaries with counts and stable fields; network request lists include an actionable-vs-benign failed-request summary and mark low-impact browser icon failures separately; request-detail URLs from `network request` remain diagnostic-only rather than session page targets; large log/request/error outputs use previews plus `fullOutputPath` spill files; sensitive nested auth/header/token fields are not expanded in the model-facing text
|
|
804
|
+
- stream status → enabled/connected/port summary plus WebSocket URL and frame format when a port is known; `stream enable` errors that only say streaming is already enabled are normalized to a successful idempotent no-op with `details.data.alreadyEnabled: true` and status/disable nextActions; if the caller explicitly passed `--json`, visible text is valid JSON instead of a prose summary
|
|
805
|
+
- diagnostic/status families (`session`, `session list`, `profiles`, `doctor`, `auth list`/`show`, `cookies`, `storage`, `dialog`, `frame`, `state`, `network requests`, `console`, `errors`, and dashboard start/stop/status outputs) → compact readable summaries with counts and stable fields; network request lists include an actionable-vs-benign failed-request summary and mark low-impact browser icon failures separately; active route mocks can add pending/CORS route diagnostics; request-detail URLs from `network request` remain diagnostic-only rather than session page targets; large log/request/error outputs use previews plus `fullOutputPath` spill files; sensitive nested auth/header/token fields are not expanded in the model-facing text
|
|
803
806
|
- trace/profiler owner conflicts → when the wrapper has observed one owner active for a session, block conflicting starts/stops with "wrapper believes ..." wording because upstream or external CLI use can desynchronize wrapper-local state
|
|
804
807
|
|
|
805
808
|
## Missing binary behavior
|
|
@@ -84,6 +84,7 @@ import {
|
|
|
84
84
|
type CompiledAgentBrowserSemanticAction,
|
|
85
85
|
type CompiledAgentBrowserSourceLookup,
|
|
86
86
|
} from "./lib/input-modes.js";
|
|
87
|
+
import { parseAllowedDomainsPolicyFromArgs, type AllowedDomainsPolicy } from "./lib/navigation-policy.js";
|
|
87
88
|
import { closeManagedSession, runAgentBrowserTool, type BrowserRunState, type TraceOwner } from "./lib/orchestration/browser-run.js";
|
|
88
89
|
import { findElectronLaunchRecordForSession, getActiveElectronRecords } from "./lib/orchestration/browser-run/session-state.js";
|
|
89
90
|
import { parseBatchStdinJsonArray } from "./lib/orchestration/batch-stdin.js";
|
|
@@ -96,6 +97,7 @@ import {
|
|
|
96
97
|
type ElectronLaunchRecord,
|
|
97
98
|
} from "./lib/orchestration/electron-host/index.js";
|
|
98
99
|
import { buildValidationFailureResult, resolveAgentBrowserInput } from "./lib/orchestration/input-plan.js";
|
|
100
|
+
import type { NetworkRouteRecord } from "./lib/results/contracts.js";
|
|
99
101
|
import type { SessionArtifactManifest } from "./lib/results/contracts.js";
|
|
100
102
|
import {
|
|
101
103
|
buildEvictedSessionArtifactEntries,
|
|
@@ -597,6 +599,57 @@ function restoreArtifactManifestFromBranch(branch: unknown[]): SessionArtifactMa
|
|
|
597
599
|
return restoredManifest;
|
|
598
600
|
}
|
|
599
601
|
|
|
602
|
+
function getToolResultArgs(details: Record<string, unknown>): string[] {
|
|
603
|
+
if (Array.isArray(details.args) && details.args.every((arg) => typeof arg === "string")) return details.args;
|
|
604
|
+
if (Array.isArray(details.effectiveArgs) && details.effectiveArgs.every((arg) => typeof arg === "string")) return details.effectiveArgs;
|
|
605
|
+
return [];
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
function restoreAllowedDomainsBySessionFromBranch(branch: unknown[]): Map<string, AllowedDomainsPolicy> {
|
|
609
|
+
const restoredPolicies = new Map<string, AllowedDomainsPolicy>();
|
|
610
|
+
for (const entry of branch) {
|
|
611
|
+
if (!isRecord(entry) || entry.type !== "message") continue;
|
|
612
|
+
const message = isRecord(entry.message) ? entry.message : undefined;
|
|
613
|
+
if (!message || message.toolName !== "agent_browser") continue;
|
|
614
|
+
const details = isRecord(message.details) ? message.details : undefined;
|
|
615
|
+
if (!details) continue;
|
|
616
|
+
const succeeded = getSuccessfulToolResult(details, message);
|
|
617
|
+
const args = getToolResultArgs(details);
|
|
618
|
+
const command = typeof details.command === "string" ? details.command : extractCommandTokens(args)[0];
|
|
619
|
+
const sessionName = typeof details.sessionName === "string" ? details.sessionName : undefined;
|
|
620
|
+
const explicitSessionName = extractExplicitSessionName(args);
|
|
621
|
+
const outcome = getManagedSessionOutcome(details);
|
|
622
|
+
const outcomeSucceeded = outcome?.succeeded === true;
|
|
623
|
+
const outcomeStatus = typeof outcome?.status === "string" ? outcome.status : undefined;
|
|
624
|
+
const outcomeCurrentSessionName = typeof outcome?.currentSessionName === "string" ? outcome.currentSessionName : undefined;
|
|
625
|
+
const outcomeAttemptedSessionName = typeof outcome?.attemptedSessionName === "string" ? outcome.attemptedSessionName : undefined;
|
|
626
|
+
if (outcomeSucceeded && outcomeStatus === "closed") {
|
|
627
|
+
const closedSessionName = outcomeAttemptedSessionName ?? outcomeCurrentSessionName ?? sessionName;
|
|
628
|
+
if (closedSessionName) restoredPolicies.delete(closedSessionName);
|
|
629
|
+
}
|
|
630
|
+
if (outcomeSucceeded && outcomeStatus === "replaced") {
|
|
631
|
+
const replacedSessionName = typeof outcome.replacedSessionName === "string" ? outcome.replacedSessionName : undefined;
|
|
632
|
+
if (replacedSessionName) restoredPolicies.delete(replacedSessionName);
|
|
633
|
+
}
|
|
634
|
+
if (succeeded && isCloseCommand(command)) {
|
|
635
|
+
const closedSessionName = explicitSessionName ?? sessionName ?? outcomeAttemptedSessionName ?? outcomeCurrentSessionName;
|
|
636
|
+
if (closedSessionName) restoredPolicies.delete(closedSessionName);
|
|
637
|
+
}
|
|
638
|
+
const electron = isRecord(details.electron) ? details.electron : undefined;
|
|
639
|
+
const cleanup = isRecord(electron?.cleanup) ? electron.cleanup : undefined;
|
|
640
|
+
const cleanupResults = Array.isArray(cleanup?.results) ? cleanup.results : [];
|
|
641
|
+
for (const cleanupResult of cleanupResults) {
|
|
642
|
+
for (const closedSessionName of getCleanupResultClosedManagedSessionNames(cleanupResult)) restoredPolicies.delete(closedSessionName);
|
|
643
|
+
}
|
|
644
|
+
const outcomeKeepsSessionCurrent = outcome?.activeAfter === true
|
|
645
|
+
&& (outcomeStatus === "created" || outcomeStatus === "replaced" || outcomeStatus === "unchanged")
|
|
646
|
+
&& outcomeCurrentSessionName === sessionName;
|
|
647
|
+
const policy = (succeeded || outcomeKeepsSessionCurrent) && sessionName && !isCloseCommand(command) ? parseAllowedDomainsPolicyFromArgs(args) : undefined;
|
|
648
|
+
if (policy && sessionName) restoredPolicies.set(sessionName, policy);
|
|
649
|
+
}
|
|
650
|
+
return restoredPolicies;
|
|
651
|
+
}
|
|
652
|
+
|
|
600
653
|
function trackOwnedManagedSession(
|
|
601
654
|
sessions: Map<string, OwnedManagedSession>,
|
|
602
655
|
sessionName: string | undefined,
|
|
@@ -634,7 +687,7 @@ function syncOwnedManagedSessionsFromResult(sessions: Map<string, OwnedManagedSe
|
|
|
634
687
|
const status = typeof outcome.status === "string" ? outcome.status : undefined;
|
|
635
688
|
const currentSessionName = typeof outcome.currentSessionName === "string" ? outcome.currentSessionName : undefined;
|
|
636
689
|
const attemptedSessionName = typeof outcome.attemptedSessionName === "string" ? outcome.attemptedSessionName : undefined;
|
|
637
|
-
if (
|
|
690
|
+
if (outcome.activeAfter === true && (status === "created" || status === "replaced" || status === "unchanged")) {
|
|
638
691
|
trackOwnedManagedSession(sessions, currentSessionName, cwd);
|
|
639
692
|
}
|
|
640
693
|
if (succeeded && status === "closed") {
|
|
@@ -966,6 +1019,8 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
966
1019
|
let sessionPageState = new SessionPageState();
|
|
967
1020
|
let traceOwners = new Map<string, TraceOwner>();
|
|
968
1021
|
let artifactManifest: SessionArtifactManifest | undefined;
|
|
1022
|
+
let allowedDomainsBySession = new Map<string, AllowedDomainsPolicy>();
|
|
1023
|
+
let networkRoutesBySession = new Map<string, NetworkRouteRecord[]>();
|
|
969
1024
|
let electronLaunchRecords = new Map<string, ElectronLaunchRecord>();
|
|
970
1025
|
let ownedElectronLaunchRecords = new Map<string, ElectronLaunchRecord>();
|
|
971
1026
|
let branchOwnedElectronLaunchIds = new Set<string>();
|
|
@@ -974,6 +1029,12 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
974
1029
|
const managedSessionExecutionQueue = new AsyncExecutionQueue();
|
|
975
1030
|
let branchStateGeneration = 0;
|
|
976
1031
|
|
|
1032
|
+
const clearSessionScopedBrowserState = (sessionName: string): void => {
|
|
1033
|
+
allowedDomainsBySession.delete(sessionName);
|
|
1034
|
+
networkRoutesBySession.delete(sessionName);
|
|
1035
|
+
sessionPageState.clearSession(sessionName);
|
|
1036
|
+
};
|
|
1037
|
+
|
|
977
1038
|
const restoreBranchBackedState = (ctx: ExtensionContext, options: { resetRuntimeOwnership: boolean }): void => {
|
|
978
1039
|
branchStateGeneration += 1;
|
|
979
1040
|
const previousManagedSessionActive = managedSessionActive;
|
|
@@ -1007,6 +1068,8 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
1007
1068
|
sessionPageState = SessionPageState.fromBranch(branch);
|
|
1008
1069
|
traceOwners = new Map<string, TraceOwner>();
|
|
1009
1070
|
artifactManifest = restoreArtifactManifestFromBranch(branch);
|
|
1071
|
+
allowedDomainsBySession = restoreAllowedDomainsBySessionFromBranch(branch);
|
|
1072
|
+
networkRoutesBySession = new Map<string, NetworkRouteRecord[]>();
|
|
1010
1073
|
electronLaunchRecords = restoreElectronLaunchRecordsFromBranch(branch);
|
|
1011
1074
|
if (options.resetRuntimeOwnership) {
|
|
1012
1075
|
ownedManagedSessions.clear();
|
|
@@ -1085,6 +1148,8 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
1085
1148
|
sessionPageState.reset();
|
|
1086
1149
|
traceOwners = new Map<string, TraceOwner>();
|
|
1087
1150
|
artifactManifest = undefined;
|
|
1151
|
+
allowedDomainsBySession = new Map<string, AllowedDomainsPolicy>();
|
|
1152
|
+
networkRoutesBySession = new Map<string, NetworkRouteRecord[]>();
|
|
1088
1153
|
electronLaunchRecords = new Map<string, ElectronLaunchRecord>();
|
|
1089
1154
|
ownedElectronLaunchRecords = new Map<string, ElectronLaunchRecord>();
|
|
1090
1155
|
branchOwnedElectronLaunchIds = new Set<string>();
|
|
@@ -1191,7 +1256,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
1191
1256
|
const closedSessionNames = getCleanupResultsClosedManagedSessionNames(cleanupRecords);
|
|
1192
1257
|
syncElectronCleanupManagedSessions(ownedManagedSessions, cleanupRecords);
|
|
1193
1258
|
for (const closedSessionName of closedSessionNames) {
|
|
1194
|
-
|
|
1259
|
+
clearSessionScopedBrowserState(closedSessionName);
|
|
1195
1260
|
if (closedSessionName === managedSessionName) {
|
|
1196
1261
|
managedSessionActive = false;
|
|
1197
1262
|
freshSessionOrdinal += 1;
|
|
@@ -1219,6 +1284,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
1219
1284
|
const generationAtStart = branchStateGeneration;
|
|
1220
1285
|
const sessionPageStateUpdate = sessionPageState.beginUpdate();
|
|
1221
1286
|
const browserRunState: BrowserRunState = {
|
|
1287
|
+
allowedDomainsBySession,
|
|
1222
1288
|
artifactManifest,
|
|
1223
1289
|
closedManagedSessionNames: new Set<string>(),
|
|
1224
1290
|
electronChildProcesses,
|
|
@@ -1229,6 +1295,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
1229
1295
|
managedSessionBaseName,
|
|
1230
1296
|
managedSessionCwd,
|
|
1231
1297
|
managedSessionName,
|
|
1298
|
+
networkRoutesBySession,
|
|
1232
1299
|
sessionPageState,
|
|
1233
1300
|
traceOwners,
|
|
1234
1301
|
};
|
|
@@ -1249,6 +1316,8 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
1249
1316
|
});
|
|
1250
1317
|
const branchStateStillCurrent = generationAtStart === branchStateGeneration;
|
|
1251
1318
|
if (serializeBrowserCommand || branchStateStillCurrent) {
|
|
1319
|
+
allowedDomainsBySession = browserRunState.allowedDomainsBySession;
|
|
1320
|
+
networkRoutesBySession = browserRunState.networkRoutesBySession;
|
|
1252
1321
|
artifactManifest = browserRunState.artifactManifest;
|
|
1253
1322
|
freshSessionOrdinal = Math.max(freshSessionOrdinal, browserRunState.freshSessionOrdinal);
|
|
1254
1323
|
managedSessionActive = browserRunState.managedSessionActive;
|
|
@@ -36,7 +36,7 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
|
|
|
36
36
|
description: "Intent action to compile to an existing agent-browser find command, or to upstream select when action=select.",
|
|
37
37
|
}),
|
|
38
38
|
locator: Type.Optional(StringEnum(AGENT_BROWSER_SEMANTIC_LOCATORS, {
|
|
39
|
-
description: "Upstream find locator family to use for check/click/fill
|
|
39
|
+
description: "Upstream find locator family to use for check/click/fill actions.",
|
|
40
40
|
})),
|
|
41
41
|
value: Type.Optional(Type.String({ description: "Locator value for find actions, or a single option value for select actions. For locator=role, role may be supplied instead." })),
|
|
42
42
|
values: Type.Optional(Type.Array(Type.String({ description: "Option value for select actions." }), { description: "One or more option values for select actions.", minItems: 1 })),
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
export const DEFAULT_SESSION_MODE = "auto" as const;
|
|
8
8
|
|
|
9
|
-
export const AGENT_BROWSER_SEMANTIC_ACTIONS = ["check", "click", "fill", "select"
|
|
9
|
+
export const AGENT_BROWSER_SEMANTIC_ACTIONS = ["check", "click", "fill", "select"] as const;
|
|
10
10
|
|
|
11
11
|
export const AGENT_BROWSER_SEMANTIC_LOCATORS = ["alt", "label", "placeholder", "role", "testid", "text", "title"] as const;
|
|
12
12
|
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Purpose: Keep wrapper-side navigation policy parsing and evaluation small and explicit.
|
|
3
|
+
* Responsibilities: Parse allowed-domain argv values and detect final-page host escapes.
|
|
4
|
+
* Scope: Wrapper diagnostics only; upstream remains responsible for browser-time enforcement.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export interface AllowedDomainsPolicy {
|
|
8
|
+
allowedDomains: string[];
|
|
9
|
+
display: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface AllowedDomainsViolation {
|
|
13
|
+
allowedDomains: string[];
|
|
14
|
+
allowedDisplay: string;
|
|
15
|
+
observedHost: string;
|
|
16
|
+
observedUrl: string;
|
|
17
|
+
summary: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function normalizeDomainEntry(value: string): string | undefined {
|
|
21
|
+
let candidate = value.trim().toLowerCase();
|
|
22
|
+
if (!candidate) return undefined;
|
|
23
|
+
try {
|
|
24
|
+
if (/^[a-z][a-z0-9+.-]*:\/\//i.test(candidate)) {
|
|
25
|
+
candidate = new URL(candidate).hostname;
|
|
26
|
+
}
|
|
27
|
+
} catch {
|
|
28
|
+
return undefined;
|
|
29
|
+
}
|
|
30
|
+
candidate = candidate.replace(/^\*\./, "").replace(/\.$/, "");
|
|
31
|
+
if (candidate.includes("/")) candidate = candidate.split("/")[0] ?? "";
|
|
32
|
+
if (candidate.includes(":")) candidate = candidate.split(":")[0] ?? "";
|
|
33
|
+
return candidate.length > 0 ? candidate : undefined;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function splitAllowedDomainsValue(value: string): string[] {
|
|
37
|
+
return value.split(/[,\s]+/).map((entry) => entry.trim()).filter(Boolean);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function parseAllowedDomainsPolicyFromArgs(args: readonly string[]): AllowedDomainsPolicy | undefined {
|
|
41
|
+
const domains: string[] = [];
|
|
42
|
+
for (let index = 0; index < args.length; index += 1) {
|
|
43
|
+
const arg = args[index];
|
|
44
|
+
if (arg === "--allowed-domains") {
|
|
45
|
+
const value = args[index + 1];
|
|
46
|
+
if (value && !value.startsWith("-")) {
|
|
47
|
+
domains.push(...splitAllowedDomainsValue(value));
|
|
48
|
+
index += 1;
|
|
49
|
+
}
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
if (arg?.startsWith("--allowed-domains=")) {
|
|
53
|
+
domains.push(...splitAllowedDomainsValue(arg.slice("--allowed-domains=".length)));
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
const allowedDomains = [...new Set(domains.flatMap((domain) => {
|
|
57
|
+
const normalized = normalizeDomainEntry(domain);
|
|
58
|
+
return normalized ? [normalized] : [];
|
|
59
|
+
}))];
|
|
60
|
+
if (allowedDomains.length === 0) return undefined;
|
|
61
|
+
return { allowedDomains, display: allowedDomains.join(", ") };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function normalizeObservedHost(url: string): string | undefined {
|
|
65
|
+
try {
|
|
66
|
+
const parsed = new URL(url);
|
|
67
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return undefined;
|
|
68
|
+
return parsed.hostname.toLowerCase().replace(/\.$/, "");
|
|
69
|
+
} catch {
|
|
70
|
+
return undefined;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export function isHostAllowedByDomains(host: string, allowedDomains: readonly string[]): boolean {
|
|
75
|
+
const normalizedHost = host.toLowerCase().replace(/\.$/, "");
|
|
76
|
+
return allowedDomains.some((domain) => normalizedHost === domain || normalizedHost.endsWith(`.${domain}`));
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export function getAllowedDomainsViolation(options: {
|
|
80
|
+
policy?: AllowedDomainsPolicy;
|
|
81
|
+
url?: string;
|
|
82
|
+
}): AllowedDomainsViolation | undefined {
|
|
83
|
+
if (!options.policy || !options.url) return undefined;
|
|
84
|
+
const observedHost = normalizeObservedHost(options.url);
|
|
85
|
+
if (!observedHost) return undefined;
|
|
86
|
+
if (isHostAllowedByDomains(observedHost, options.policy.allowedDomains)) return undefined;
|
|
87
|
+
const summary = `Navigation policy blocked: --allowed-domains ${options.policy.display} does not allow ${observedHost} (${options.url}).`;
|
|
88
|
+
return {
|
|
89
|
+
allowedDomains: options.policy.allowedDomains,
|
|
90
|
+
allowedDisplay: options.policy.display,
|
|
91
|
+
observedHost,
|
|
92
|
+
observedUrl: options.url,
|
|
93
|
+
summary,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
@@ -403,10 +403,6 @@ export function buildSelectorTextVisibilityNextActions(options: { diagnostics: S
|
|
|
403
403
|
return options.diagnostics.map((diagnostic, index) => ({ id: index === 0 ? "inspect-visible-text-candidates" : `inspect-visible-text-candidates-${index + 1}`, params: { args: withOptionalSessionArgs(options.sessionName, ["eval", "--stdin"]), stdin: buildVisibleTextProbeScript(diagnostic.selector) }, reason: "Inspect selector match count and visible text before trusting get text on tabbed or hidden DOM content.", safety: "Read-only DOM inspection; use a more specific visible selector or current @ref before acting on hidden-tab text.", tool: "agent_browser" as const }));
|
|
404
404
|
}
|
|
405
405
|
|
|
406
|
-
function isElectronLikeRendererUrl(url: string | undefined): boolean {
|
|
407
|
-
return !!url && /^(?:app|file|vscode-file|vscode|chrome-extension):/i.test(url);
|
|
408
|
-
}
|
|
409
|
-
|
|
410
406
|
function normalizeSelectorForScopeHeuristic(selector: string): string {
|
|
411
407
|
return selector.trim().replace(/\s+/g, " ").toLowerCase();
|
|
412
408
|
}
|
|
@@ -419,10 +415,9 @@ function isBroadGetTextSelector(selector: string | undefined): selector is strin
|
|
|
419
415
|
|
|
420
416
|
function getElectronTextScopeContext(options: { currentTarget?: SessionTabTarget; electronLaunchRecords: Map<string, ElectronLaunchRecord>; priorTarget?: SessionTabTarget; sessionName?: string }): ElectronBroadGetTextScopeDiagnostic["electronContext"] | undefined {
|
|
421
417
|
const record = findElectronLaunchRecordForSession(options.sessionName, options.electronLaunchRecords);
|
|
418
|
+
if (!record) return undefined;
|
|
422
419
|
const url = options.currentTarget?.url ?? options.priorTarget?.url;
|
|
423
|
-
|
|
424
|
-
if (isElectronLikeRendererUrl(url)) return { sessionName: options.sessionName, url };
|
|
425
|
-
return undefined;
|
|
420
|
+
return { launchId: record.launchId, sessionName: record.sessionName ?? options.sessionName, url };
|
|
426
421
|
}
|
|
427
422
|
|
|
428
423
|
export function getSourceLookupElectronContext(options: { currentTarget?: SessionTabTarget; electronLaunchRecords: Map<string, ElectronLaunchRecord>; priorTarget?: SessionTabTarget; sessionName?: string }): AgentBrowserSourceLookupAnalysis["electronContext"] | undefined {
|
|
@@ -396,6 +396,7 @@ function buildAgentBrowserResultDetails(options: FinalResultInput, nextActions:
|
|
|
396
396
|
savedFilePath: options.presentation.savedFilePath,
|
|
397
397
|
sourceLookup: options.sourceLookup,
|
|
398
398
|
networkSourceLookup: options.networkSourceLookup,
|
|
399
|
+
networkRouteDiagnostics: options.presentation.networkRouteDiagnostics,
|
|
399
400
|
sessionMode: options.sessionMode,
|
|
400
401
|
sessionTabCorrection: options.sessionTabCorrection,
|
|
401
402
|
sessionTabTarget: options.currentSessionTabTarget,
|
|
@@ -323,9 +323,9 @@ export async function resolveSemanticActionVisibleRefArgs(options: {
|
|
|
323
323
|
sessionName?: string;
|
|
324
324
|
signal?: AbortSignal;
|
|
325
325
|
}): Promise<SemanticActionVisibleRefResolution | undefined> {
|
|
326
|
-
if (!options.compiled || !options.sessionName || options.compiled.locator !== "role" || !["check", "click", "
|
|
326
|
+
if (!options.compiled || !options.sessionName || options.compiled.locator !== "role" || !["check", "click", "fill"].includes(options.compiled.action)) return undefined;
|
|
327
327
|
const snapshotData = await runSessionCommandData({ args: ["snapshot", "-i"], cwd: options.cwd, sessionName: options.sessionName, signal: options.signal });
|
|
328
|
-
const resolution = resolveVisibleRefActionFromSnapshot({ compiledAction: options.compiled, snapshotData });
|
|
328
|
+
const resolution = resolveVisibleRefActionFromSnapshot({ allowFill: true, compiledAction: options.compiled, snapshotData });
|
|
329
329
|
if (!resolution) return undefined;
|
|
330
330
|
return { args: [...getCompiledSemanticActionSessionPrefix(options.compiled), ...resolution.args], snapshot: resolution.snapshot };
|
|
331
331
|
}
|