pi-agent-browser-native 0.2.27 → 0.2.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/README.md +15 -1
- package/docs/COMMAND_REFERENCE.md +8 -4
- package/docs/RELEASE.md +25 -2
- package/docs/SUPPORT_MATRIX.md +14 -6
- package/docs/TOOL_CONTRACT.md +7 -3
- package/extensions/agent-browser/index.ts +416 -5
- package/extensions/agent-browser/lib/playbook.ts +24 -4
- package/extensions/agent-browser/lib/runtime.ts +2 -2
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.28 - 2026-05-15
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Compact runtime guidance now points agents to the installed package's `README.md`, `docs/COMMAND_REFERENCE.md`, and `docs/TOOL_CONTRACT.md` for on-demand detail instead of injecting the full browser playbook into every browser-oriented turn.
|
|
7
|
+
- Successful top-level `scroll` calls can now report `details.scrollNoop`, visible no-op scroll diagnostics, and exact snapshot/screenshot recovery `nextActions` when wrapper-side probes show the viewport and sampled scroll containers did not move.
|
|
8
|
+
- Successful explicit combobox-targeted actions can now report `details.comboboxFocus` and exact `snapshot -i`, `press ArrowDown`, and `press Enter` recovery `nextActions` when a focused combobox has explicit `aria-expanded` state but no visible options, including after active-session semanticAction role/name clicks resolve through current visible `@ref`s.
|
|
9
|
+
- Successful `record start` / `record restart` calls now warn early with `details.recordingDependencyWarning` when executable `ffmpeg` is missing from the Pi process `PATH`, so agents can fix recording prerequisites before `record stop` needs to encode the WebM.
|
|
10
|
+
- `docs/RELEASE.md` now includes a repeatable public Grafana Play stress checklist for dense-dashboard release dogfood without bundling private dogfood/VFR skills or adding a recipe runtime.
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- Network request redaction now treats secret-like query and field names such as `sentry_key` and `writeKey` as sensitive in model-visible summaries and details.
|
|
14
|
+
- README and command-reference setup notes now call out `ffmpeg` as the external dependency required for recording workflows.
|
|
15
|
+
|
|
3
16
|
## 0.2.27 - 2026-05-14
|
|
4
17
|
|
|
5
18
|
### Fixed
|
package/README.md
CHANGED
|
@@ -65,6 +65,9 @@ The result is optimized for agent work:
|
|
|
65
65
|
| Stale `@eN` refs fail mysteriously | Records per-session `details.refSnapshot`, rejects mismatched URLs / unknown refs / unsafe `batch` stdin ordering before spawn, adds recovery guidance to rerun `snapshot -i` or use stable `find` locators | `extensions/agent-browser/index.ts`, `test/agent-browser.results.test.ts`, `test/agent-browser.extension-validation.test.ts` |
|
|
66
66
|
| Agents need stable success/failure buckets | Exposes bounded `resultCategory`, `successCategory`, and `failureCategory` on tool `details` for branching without parsing prose | [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#details), `extensions/agent-browser/lib/results/shared.ts`, `test/agent-browser.results.test.ts` |
|
|
67
67
|
| Models re-snapshot after every click without new URL/title context | Adds optional `details.pageChangeSummary` (and per-batch-step summaries) with `changeType`, compact text, optional `title`/`url`, artifact hints, and `nextActionIds` aligned to `nextActions`; no-navigation clicks can also surface evidence-backed `details.overlayBlockers` candidates | [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#details), `extensions/agent-browser/lib/results/presentation.ts`, `test/agent-browser.presentation.test.ts` |
|
|
68
|
+
| Dashboard scroll commands can look successful while nothing moves | Samples viewport and prominent scroll-container positions around top-level `scroll` calls; unchanged positions produce `details.scrollNoop`, visible recovery guidance, and exact `nextActions` for snapshot/screenshot verification | [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#details), [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#core-page-and-element-commands), `test/agent-browser.extension-validation.test.ts` |
|
|
69
|
+
| Combobox clicks can focus the field without opening options | For explicit combobox-targeted actions, detects focused combobox-like controls with explicit `aria-expanded` state but no visible options and returns `details.comboboxFocus` plus exact `nextActions` for snapshot, ArrowDown, and Enter recovery | [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#details), [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#core-page-and-element-commands), `test/agent-browser.extension-validation.test.ts` |
|
|
70
|
+
| Recording workflows fail late when `ffmpeg` is missing | After successful `record start` / `record restart`, warns when `ffmpeg` is not on `PATH` so agents can install or fix PATH before `record stop` | [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#details), [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#diagnostics-performance-and-recording), `test/agent-browser.extension-validation.test.ts` |
|
|
68
71
|
| Direct binary help may be blocked in agent sessions | Publishes a repo-readable command reference and verifies it against the target upstream version | `npm run verify` |
|
|
69
72
|
| Agents need bundled `skills` text without touching the live session | Treats `skills list`, `skills get …`, and `skills path …` as stateless JSON reads: no implicit managed `--session` under default `sessionMode: "auto"` (same session-ownership goal as plain-text `--help` / `--version`), while provider workflows stay thin passthroughs that require upstream setup and credentials | [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#built-in-skills), `extensions/agent-browser/lib/runtime.ts` |
|
|
70
73
|
|
|
@@ -75,6 +78,17 @@ Install upstream `agent-browser` first and make sure it is on `PATH`:
|
|
|
75
78
|
- https://agent-browser.dev/
|
|
76
79
|
- https://github.com/vercel-labs/agent-browser
|
|
77
80
|
|
|
81
|
+
Optional external tools unlock the full command surface:
|
|
82
|
+
|
|
83
|
+
| Dependency | Required for | macOS install example |
|
|
84
|
+
| --- | --- | --- |
|
|
85
|
+
| `agent-browser` | All browser automation through this extension | See upstream install docs |
|
|
86
|
+
| `ffmpeg` | `record stop` WebM encoding after `record start` / `record restart` | `brew install ffmpeg` or `brew install ffmpeg-full` |
|
|
87
|
+
|
|
88
|
+
Keep both binaries on `PATH`. `record start` can begin without a file on disk, but `record stop` needs `ffmpeg` to encode the WebM.
|
|
89
|
+
|
|
90
|
+
The native tool also gives agents absolute installed-package doc paths in its compact runtime guidance. Agents should read `README.md` for setup/dependencies, `docs/COMMAND_REFERENCE.md` for targeted command workflows, and `docs/TOOL_CONTRACT.md` for result/detail contracts only when deeper guidance is needed.
|
|
91
|
+
|
|
78
92
|
Then install this Pi package:
|
|
79
93
|
|
|
80
94
|
```bash
|
|
@@ -421,7 +435,7 @@ pi --no-extensions -e .
|
|
|
421
435
|
|
|
422
436
|
This bypasses Pi settings and configured extensions. After editing extension code, restart that Pi process to test the new checkout.
|
|
423
437
|
|
|
424
|
-
For a concrete expanded native-tool smoke matrix (version/help/skills through dashboard/chat families), see [Local development validation](docs/RELEASE.md#local-development-validation) in `docs/RELEASE.md`.
|
|
438
|
+
For a concrete expanded native-tool smoke matrix (version/help/skills through dashboard/chat families), see [Local development validation](docs/RELEASE.md#local-development-validation) in `docs/RELEASE.md`. When changes affect dense dashboards, diagnostics, artifacts, recording, scroll, or combobox behavior, use the public [Grafana stress checklist](docs/RELEASE.md#public-grafana-stress-checklist) for repeatable release dogfood without bundling private skills or recipes.
|
|
425
439
|
|
|
426
440
|
Configured-source lifecycle validation:
|
|
427
441
|
|
|
@@ -246,7 +246,7 @@ Prefer `download <selector> <path>` when the target element itself is the downlo
|
|
|
246
246
|
Wrapper result rendering is metadata-first for saved files:
|
|
247
247
|
- screenshots return a saved-path summary, visible artifact metadata, structured `details.artifacts` metadata, and an inline image attachment when safe; the visible block includes artifact type, requested path, absolute path, existence, size, cwd, session, and repair/copy status when applicable
|
|
248
248
|
- downloads, PDFs, `wait --download` files, `state save` state files, diff screenshot output images, traces, CPU profiles, completed WebM recordings from `record stop`, and path-bearing HAR captures return concise saved-path summaries plus structured `details.artifacts` metadata without inlining large files
|
|
249
|
-
- `record start <path>` reports that recording started and that output will be written on `record stop`; the target file may not exist until recording stops
|
|
249
|
+
- `record start <path>` reports that recording started and that output will be written on `record stop`; the target file may not exist until recording stops, and upstream needs `ffmpeg` on `PATH` at stop time to encode the WebM. If `ffmpeg` is missing after a successful `record start` / `record restart`, the wrapper appends `Recording dependency warning: ffmpeg not found on PATH` and sets `details.recordingDependencyWarning` without blocking the upstream command.
|
|
250
250
|
- `batch` keeps each step's artifacts in `details.batchSteps[].artifacts` and aggregates them in top-level `details.artifacts` in step order
|
|
251
251
|
|
|
252
252
|
`diff screenshot` follows the file-artifact path above for the **diff** image: model-visible text and `details.artifacts` focus on that output, while baseline paths stay out of the artifact summary block, and Pi does **not** auto-inline the diff the way it inlines trusted `screenshot` captures. `state load` may print the loaded path in prose but does not add a saved-file artifact entry the way `state save` does.
|
|
@@ -379,6 +379,10 @@ Session note: `skills list`, `skills get …`, and `skills path …` are **state
|
|
|
379
379
|
| `connect <port|url>` | Connect to a browser through CDP. |
|
|
380
380
|
| `close [--all]` | Close the current browser or all sessions. |
|
|
381
381
|
|
|
382
|
+
On dashboards and other apps with nested scroll containers, `scroll <dir> [px]` may report a successful wheel action while the viewport appears unchanged because the page-level scroller was not the one containing the content. For top-level `scroll` calls without startup-scoped launch flags, the wrapper samples viewport and prominent scroll-container positions before and after the command; when nothing changes it appends `Scroll diagnostic: no observed scroll movement`, exposes `details.scrollNoop`, and adds exact `details.nextActions` for a fresh `snapshot -i` and screenshot. Use those before repeating page scrolls; when you need a specific panel, prefer `scrollintoview <@ref>` or a scoped interaction with the actual scrollable region.
|
|
383
|
+
|
|
384
|
+
Comboboxes vary by app. A `click` or `semanticAction` role/name click may focus a searchable combobox without opening its option list. For explicit combobox-targeted actions such as `semanticAction` role `combobox`, the wrapper checks whether a combobox-like element is focused, has explicit `aria-expanded` state, and has no visible listbox/options open; this still applies when the semantic action first resolves to a current visible `@ref` before execution. When that happens it appends `Combobox diagnostic: focused combobox did not expose visible options`, exposes `details.comboboxFocus`, and adds exact `details.nextActions` for a fresh `snapshot -i`, `press ArrowDown`, and `press Enter`. Use those instead of assuming click alone expanded the control; prefer visible option refs or `select` when options are exposed.
|
|
385
|
+
|
|
382
386
|
### Navigation
|
|
383
387
|
|
|
384
388
|
| Command | Purpose |
|
|
@@ -479,8 +483,8 @@ When a snapshot is too large for inline output, the Pi wrapper renders a compact
|
|
|
479
483
|
| `diff url <u1> <u2>` | Compare two pages. |
|
|
480
484
|
| `trace start|stop [path]` | Record a Chrome DevTools trace. |
|
|
481
485
|
| `profiler start|stop [path]` | Record a Chrome DevTools profile. |
|
|
482
|
-
| `record start <path> [url]` | Start WebM video recording; output is written on `record stop`. |
|
|
483
|
-
| `record stop` | Stop and save video. |
|
|
486
|
+
| `record start <path> [url]` | Start WebM video recording; output is written on `record stop`. Requires `ffmpeg` on `PATH` for the final encode. |
|
|
487
|
+
| `record stop` | Stop and save video. If this fails with `ffmpeg not found`, install `ffmpeg` / `ffmpeg-full` and rerun the recording. |
|
|
484
488
|
| `record restart <path> [url]` | Stop any current recording and start a new WebM recording. |
|
|
485
489
|
| `console [--clear]` | View or clear console logs. |
|
|
486
490
|
| `errors [--clear]` | View or clear page errors. |
|
|
@@ -499,7 +503,7 @@ When a snapshot is too large for inline output, the Pi wrapper renders a compact
|
|
|
499
503
|
| `pushstate <url>` | Perform SPA client-side navigation; detects Next.js router pushes and falls back to history navigation events. |
|
|
500
504
|
| `removeinitscript <id>` | Remove an init script registered through upstream init-script mechanisms. |
|
|
501
505
|
|
|
502
|
-
When these diagnostic commands are invoked through the native `agent_browser` tool, structured console, page-error, React, Web Vitals, and SPA outputs render as compact summaries when possible, with large outputs previewed and spilled instead of dumped into context. Large outputs are previewed with a `Full output path:` spill file instead of dumping the entire payload into context. Artifact-producing commands such as `network har stop`, `diff screenshot`, `trace stop`, `profiler stop`, and `record stop` report `details.artifacts[]` plus `details.artifactVerification`; `record start` is reported as pending until `record stop` completes.
|
|
506
|
+
When these diagnostic commands are invoked through the native `agent_browser` tool, structured console, page-error, React, Web Vitals, and SPA outputs render as compact summaries when possible, with large outputs previewed and spilled instead of dumped into context. Large outputs are previewed with a `Full output path:` spill file instead of dumping the entire payload into context. Artifact-producing commands such as `network har stop`, `diff screenshot`, `trace stop`, `profiler stop`, and `record stop` report `details.artifacts[]` plus `details.artifactVerification`; `record start` is reported as pending until `record stop` completes. For video workflows, keep `ffmpeg` on `PATH` first; on macOS with Homebrew, `brew install ffmpeg` or `brew install ffmpeg-full` is sufficient. Successful `record start` / `record restart` results warn early with `details.recordingDependencyWarning` when the wrapper cannot find `ffmpeg`, so fix PATH before `record stop` instead of discovering the missing encoder after the capture. The README install section keeps the concise external-dependency list for maximal extension use.
|
|
503
507
|
|
|
504
508
|
Long-running or lifecycle commands should be explicitly paired with cleanup calls: `stream enable` → `stream disable`, `dashboard start` → `dashboard stop`, `trace start` → `trace stop`, `profiler start` → `profiler stop`, and `record start` → `record stop`. The wrapper keeps each subprocess bounded by its normal timeout; it does not keep an interactive `chat` REPL open, so prefer `chat <message>` with `--model` or `AI_GATEWAY_MODEL` for single-shot AI use.
|
|
505
509
|
|
package/docs/RELEASE.md
CHANGED
|
@@ -36,7 +36,7 @@ npm run verify -- release
|
|
|
36
36
|
|
|
37
37
|
`prepublishOnly` intentionally does **not** run `npm run verify -- lifecycle`, `npm run verify -- real-upstream`, or `npm run verify -- benchmark`; those are separate `npm run verify` modes in [`scripts/project.mjs`](../scripts/project.mjs). Treat the bullets below as the full pre-publish contract even though only the `release` slice is automated at publish time.
|
|
38
38
|
|
|
39
|
-
Every release also requires interactive `tmux`-driven Pi dogfood with the native `agent_browser` tool against real sites. Use `pi --no-extensions -e .` from the checkout before publish, drive prompts with `tmux send-keys`, exercise at least one simple static site and one real documentation/product site, include the higher-level `qa` or `job`/`batch` surfaces when they changed, close every opened browser session, remove screenshots/temp artifacts, and record the outcome in the release notes or support-matrix evidence. Automated localhost and fake-upstream gates do not replace this human-readable live-site transcript evidence.
|
|
39
|
+
Every release also requires interactive `tmux`-driven Pi dogfood with the native `agent_browser` tool against real sites. Use `pi --no-extensions -e .` from the checkout before publish, drive prompts with `tmux send-keys`, exercise at least one simple static site and one real documentation/product site, include the higher-level `qa` or `job`/`batch` surfaces when they changed, close every opened browser session, remove screenshots/temp artifacts, and record the outcome in the release notes or support-matrix evidence. Automated localhost and fake-upstream gates do not replace this human-readable live-site transcript evidence. For dense-dashboard stress coverage, use the [public Grafana stress checklist](#public-grafana-stress-checklist) below; it is a maintainer workflow, not bundled product skill or recipe runtime.
|
|
40
40
|
|
|
41
41
|
The configured-source lifecycle regression harness is required before release because it launches an interactive `pi` process under `tmux` and validates `/reload` plus restart/`/resume` behavior:
|
|
42
42
|
|
|
@@ -46,6 +46,29 @@ npm run verify -- lifecycle
|
|
|
46
46
|
|
|
47
47
|
Use `npm run verify -- lifecycle --keep-artifacts` when debugging failures, then remove retained artifacts after inspection.
|
|
48
48
|
|
|
49
|
+
## Public Grafana stress checklist
|
|
50
|
+
|
|
51
|
+
Use this optional-but-recommended checklist when a release touches dashboard behavior, snapshots, refs, scroll, comboboxes, artifacts, network diagnostics, recording, or prompt guidance. It keeps the useful public Grafana dogfood target repeatable without bundling private dogfood/VFR skills or adding a reusable browser recipe layer.
|
|
52
|
+
|
|
53
|
+
Target:
|
|
54
|
+
|
|
55
|
+
```text
|
|
56
|
+
https://play.grafana.org/d/rYdddlPWk/node-exporter-full?orgId=1&from=now-6h&to=now&timezone=browser&var-datasource=default&var-job=node&var-node=All
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Minimum pass:
|
|
60
|
+
|
|
61
|
+
1. Open the URL with the native `agent_browser` tool in a fresh session.
|
|
62
|
+
2. Run `snapshot -i`; confirm the output is useful on a dense dashboard, including high-value controls and bounded spill behavior when needed.
|
|
63
|
+
3. Exercise one dashboard scroll path. If page-level `scroll` does not move visible content, confirm `details.scrollNoop` / next actions or equivalent guidance points to snapshot/screenshot verification and nested-scroll recovery.
|
|
64
|
+
4. Exercise one explicit combobox-targeted action such as a role/name `semanticAction` on a dashboard variable. If it only focuses the field, confirm `details.comboboxFocus` / next actions point to `snapshot -i`, `press ArrowDown`, and `press Enter` when the closed-state evidence qualifies.
|
|
65
|
+
5. Capture at least one screenshot artifact and verify `details.artifactVerification` before using the file.
|
|
66
|
+
6. If `ffmpeg` is on `PATH`, run a short `record start` / visible interaction / `record stop` cycle and verify the WebM artifact. If `ffmpeg` is absent, confirm `details.recordingDependencyWarning` appears after `record start` and stop before relying on recording evidence.
|
|
67
|
+
7. Inspect `network requests`, `console`, and `errors` summaries. Treat Grafana Play-side noise such as analytics/Sentry requests, public-demo 403s, and console errors as site noise unless the wrapper leaks secrets, hides actionable failed rows, misclassifies artifacts, or suggests unsafe follow-ups.
|
|
68
|
+
8. Close the browser session and delete temporary screenshots, HARs, recordings, and scratch reports after extracting any release evidence.
|
|
69
|
+
|
|
70
|
+
Record release evidence as a short note with: date, package/checkout source, target URL, browser command families exercised, artifacts collected and cleaned up, known Grafana-side noise observed, and any product findings converted into CueLoop tasks. Do not commit private dogfood scripts, VFR harness files, raw browser profiles, HARs, videos, or `.dogfood/` run output as product docs.
|
|
71
|
+
|
|
49
72
|
## Deterministic agent efficiency benchmark
|
|
50
73
|
|
|
51
74
|
[`scripts/agent-browser-efficiency-benchmark.mjs`](../scripts/agent-browser-efficiency-benchmark.mjs) is an accounting-only benchmark: it does not shell out to `agent-browser`, launch a browser, or read or write Pi sessions. It models representative `agent_browser` call shapes (including optional `stdin` for `batch` and top-level `job`, `qa`, or experimental `sourceLookup` / `networkSourceLookup` objects that compile to batch) and aggregates success rate, tool-call counts, UTF-8 size of model-visible strings, stale-ref failure and recovery counts, artifact success, distinct failure-category coverage, and summed elapsed-time estimates. When extending scenarios, keep them aligned with the closed `RQ-0068` “no reusable recipe layer” rationale in [`ARCHITECTURE.md`](ARCHITECTURE.md#no-reusable-recipe-layer-yet) (benchmark ids cited there are the canonical inventory for that evidence bar).
|
|
@@ -206,7 +229,7 @@ Before publishing:
|
|
|
206
229
|
- run `npm run doctor` and confirm any duplicate-source remediation matches the active package/checkout setup
|
|
207
230
|
- run `npm run verify -- real-upstream` for upstream runtime, result-presentation, or managed-session changes
|
|
208
231
|
- confirm both local-checkout modes still work for pre-release validation: isolated `pi --no-extensions -e .` smoke testing and configured-source lifecycle validation
|
|
209
|
-
- complete interactive `tmux` live-site dogfood with `pi --no-extensions -e .` and the native `agent_browser` tool (at least one simple static site and one real documentation/product site; include `qa` or `job`/`batch` when those surfaces changed; close sessions and remove screenshots/temp artifacts; record evidence)—see [Pre-release checks](#pre-release-checks); automated gates are not a substitute
|
|
232
|
+
- complete interactive `tmux` live-site dogfood with `pi --no-extensions -e .` and the native `agent_browser` tool (at least one simple static site and one real documentation/product site; include `qa` or `job`/`batch` when those surfaces changed; use the [public Grafana stress checklist](#public-grafana-stress-checklist) when dashboard/diagnostic/artifact behavior changed; close sessions and remove screenshots/temp artifacts; record evidence)—see [Pre-release checks](#pre-release-checks); automated gates are not a substitute
|
|
210
233
|
- rerun `npm run verify -- release`
|
|
211
234
|
- run `npm run verify -- lifecycle` for configured-source `/reload` plus restart/`/resume` regression coverage (required before publish; see [Pre-release checks](#pre-release-checks))
|
|
212
235
|
- confirm [`SUPPORT_MATRIX.md`](SUPPORT_MATRIX.md) still maps every current baseline inventory section to docs, runtime handling, tests, and validation status
|
package/docs/SUPPORT_MATRIX.md
CHANGED
|
@@ -28,7 +28,7 @@ When upstream ships a new `agent-browser` or the inventory changes:
|
|
|
28
28
|
- Source of truth: `CAPABILITY_BASELINE.inventorySections` in the same file (stable `id` keys: `skills`, `core-commands`, `state-tabs-frames-dialogs`, `network-storage-artifacts-diagnostics`, `batch-auth-setup-ai`, `options-and-env`).
|
|
29
29
|
- Status: supported for the current wrapper contract.
|
|
30
30
|
- High-priority support gaps: none identified in the baseline audit.
|
|
31
|
-
- Remaining queued work:
|
|
31
|
+
- Remaining queued work: only `RQ-0084` remains active, covering the `0.2.28` npm/GitHub release after npm authentication is restored. Dogfood-driven improvements `RQ-0080` through `RQ-0083` and `RQ-0085` are implemented and are beyond the current baseline support promise for thin upstream command coverage. Constrained `job` (`RQ-0064`), the lightweight `qa` preset (`RQ-0065`), the experimental `sourceLookup` helper (`RQ-0066`), and the experimental `networkSourceLookup` helper (`RQ-0067`) are implemented; see [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#job), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#qa), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sourcelookup), and [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#networksourcelookup). Reusable browser recipes (`RQ-0068`) are intentionally not adopted as a runtime surface; see [`ARCHITECTURE.md`](ARCHITECTURE.md#no-reusable-recipe-layer-yet).
|
|
32
32
|
|
|
33
33
|
## Verification evidence
|
|
34
34
|
|
|
@@ -36,10 +36,10 @@ Re-run the gates below before each release; this table records what the closure
|
|
|
36
36
|
|
|
37
37
|
| Gate | Evidence | Status |
|
|
38
38
|
| --- | --- | --- |
|
|
39
|
-
| Default local gate | `npm run verify` checks generated playbook drift, `tsc --noEmit`, unit/fake tests, generated command-reference blocks, and live command-reference sampling. | Pass on 2026-05-
|
|
39
|
+
| Default local gate | `npm run verify` checks generated playbook drift, `tsc --noEmit`, unit/fake tests, generated command-reference blocks, and live command-reference sampling. | Pass on 2026-05-15 (`npm run verify`, `agent-browser 0.27.0` on `PATH`). |
|
|
40
40
|
| Real upstream contract | `npm run verify -- real-upstream` runs the localhost fixture matrix against the real installed `agent-browser` matching the baseline. | Pass on 2026-05-14 (`npm run verify -- real-upstream`). |
|
|
41
|
-
| Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads exactly one packaged `agent_browser` tool, and executes fake-upstream `--version`. | Pass on 2026-05-
|
|
42
|
-
| `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with packaged Pi smoke (`verifySteps` `release` in [`scripts/project.mjs`](../scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits lifecycle, real-upstream, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). |
|
|
41
|
+
| Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads exactly one packaged `agent_browser` tool, and executes fake-upstream `--version`. | Pass on 2026-05-15 as part of `npm run verify -- release`. |
|
|
42
|
+
| `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with packaged Pi smoke (`verifySteps` `release` in [`scripts/project.mjs`](../scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits lifecycle, real-upstream, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). | Pass on 2026-05-15; `prepublishOnly` also passed during the blocked `npm publish` attempt before npm returned `ENEEDAUTH`. |
|
|
43
43
|
| Configured-source lifecycle | `npm run verify -- lifecycle` (`scripts/verify-lifecycle.mjs`) drives `/reload`, restart, `/resume`, session continuity, slash-command sentinel tokens (`v1` then `v2` after rewriting the packaged extension to simulate pickup), and persisted spill reachability with a fake upstream on `PATH`. Passthrough flags are defined in `validatePassthrough` in [`scripts/project.mjs`](../scripts/project.mjs): `--keep-artifacts`, `--verbose`, and `--timeout-ms` plus a separate positive integer value (for example `npm run verify -- lifecycle --keep-artifacts --verbose --timeout-ms 600000`). | Pass on 2026-05-14 (`npm run verify -- lifecycle --keep-artifacts --verbose --timeout-ms 600000`) during release cleanup; retained temp artifacts were removed after inspection. Treat any future unexplained red lifecycle gate as a release blocker. |
|
|
44
44
|
| Quick isolated Pi smoke | `pi --no-extensions -e .` from repo root; native `agent_browser` only. | Covered version/help/skills, open/snapshot/click, eval stdin, batch stdin, screenshot, explicit session, `sessionMode: "fresh"`, network requests, console/errors, diff snapshot, stream status/disable, dashboard start/stop, and chat credential-failure pass-through during RQ-0055; RQ-0056 cleanup spot-check found no lingering tmux or repo-local smoke artifacts. |
|
|
45
45
|
|
|
@@ -48,9 +48,9 @@ Re-run the gates below before each release; this table records what the closure
|
|
|
48
48
|
| Baseline section | Baseline items | Documentation | Runtime handling | Test coverage | Validation status |
|
|
49
49
|
| --- | --- | --- | --- | --- | --- |
|
|
50
50
|
| Built-in skills | `skills list`, `skills get core`, `skills get core --full`, `skills get <name>`, `skills get electron`, `skills get slack`, `skills get dogfood`, `skills get vercel-sandbox`, `skills get agentcore`, `skills path [name]` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#built-in-skills), generated baseline block, README proof section, release docs. | `isStatelessInspectionCommand` keeps read-only `skills list` / `skills get` / `skills path` JSON inspection stateless while preserving thin upstream passthrough. | `test/agent-browser.runtime.test.ts`; `test/agent-browser.extension-validation.test.ts` skills/provider matrix; real-upstream inspection/skills group. | Supported. Real upstream covers `skills list`, `skills get core --full`, `skills path core`; fake matrix covers specialized skills. |
|
|
51
|
-
| Core page, element, navigation, and extraction commands | `open <url>`, `click <sel>`, `dblclick <sel>`, `type <sel> <text>`, `fill <sel> <text>`, `press <key>`, `keyboard type <text>`, `keyboard inserttext <text>`, `keydown Shift`, `keyup Shift`, `hover <sel>`, `focus <sel>`, `check <sel>`, `uncheck <sel>`, `select <sel> <val...>`, `drag <src> <dst>`, `upload <sel> <files...>`, `download <sel> <path>`, `scroll <dir> [px]`, `scrollintoview <sel>`, `wait <sel|ms>`, `screenshot [path]`, `screenshot --full`, `screenshot --annotate`, `pdf <path>`, `snapshot`, `eval <js>`, `connect <port|url>`, `close [--all]`, `back`, `forward`, `reload`, `pushstate <url>`, `get <what> [selector]`, `is <what> <selector>`, `find <locator> <value> <action>`, `mouse <action> [args]`, `set <setting> [value]` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#core-page-and-element-commands), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md), README quick start. | Thin upstream passthrough with wrapper-owned `--json`, managed-session planning, stale-ref guidance, artifact verification, page-change summaries, and redaction. | Real-upstream core matrix covers representative interactions/navigation/extraction/artifacts; fake core matrix covers additional passthrough and ordering; presentation/results/runtime tests lock wrapper behavior. | Supported. Some upstream semantics remain upstream-owned; wrapper contract and artifact metadata are tested. |
|
|
51
|
+
| Core page, element, navigation, and extraction commands | `open <url>`, `click <sel>`, `dblclick <sel>`, `type <sel> <text>`, `fill <sel> <text>`, `press <key>`, `keyboard type <text>`, `keyboard inserttext <text>`, `keydown Shift`, `keyup Shift`, `hover <sel>`, `focus <sel>`, `check <sel>`, `uncheck <sel>`, `select <sel> <val...>`, `drag <src> <dst>`, `upload <sel> <files...>`, `download <sel> <path>`, `scroll <dir> [px]`, `scrollintoview <sel>`, `wait <sel|ms>`, `screenshot [path]`, `screenshot --full`, `screenshot --annotate`, `pdf <path>`, `snapshot`, `eval <js>`, `connect <port|url>`, `close [--all]`, `back`, `forward`, `reload`, `pushstate <url>`, `get <what> [selector]`, `is <what> <selector>`, `find <locator> <value> <action>`, `mouse <action> [args]`, `set <setting> [value]` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#core-page-and-element-commands), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md), README quick start. | Thin upstream passthrough with wrapper-owned `--json`, managed-session planning, stale-ref guidance, artifact verification, page-change summaries, no-op scroll diagnostics, focused-combobox diagnostics, and redaction. | Real-upstream core matrix covers representative interactions/navigation/extraction/artifacts; fake core matrix covers additional passthrough and ordering plus no-op scroll and combobox-focus diagnostics; presentation/results/runtime tests lock wrapper behavior. | Supported. Some upstream semantics remain upstream-owned; wrapper contract and artifact metadata are tested. |
|
|
52
52
|
| Sessions, state, tabs, frames, dialogs, and windows | `session`, `session list`, `state save <path>`, `state load <path>`, `tab list`, `tab new --label <name> [url]`, `tab <t<N>|label>`, `frame <selector|main>`, `dialog accept [text]`, `dialog dismiss`, `dialog status`, `window new` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#session-state-frames-dialogs-windows-and-inspection-commands) (session/state/tabs/frames/dialogs/windows), stateful workflow notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Stateful presentation summaries/redaction; state save artifact handling; explicit/implicit session restore; tab target pinning; frame/dialog/window passthrough. | `test/agent-browser.extension-validation.test.ts` stateful matrix; runtime session/resume tests; presentation stateful redaction tests; lifecycle harness for reload/resume. | Supported. External profile/auth state remains operator-owned and documented. |
|
|
53
|
-
| Network, storage, artifacts, diagnostics, and performance | `network <action>`, `network route <url> [--abort|--body <json>] [--resource-type <csv>]`, `network request <requestId>`, `cookies [get|set|clear]`, `cookies set --curl <file>`, `storage <local|session>`, `diff snapshot`, `diff screenshot --baseline`, `diff url <u1> <u2>`, `trace start|stop [path]`, `profiler start|stop [path]`, `record start <path> [url]`, `record restart <path> [url]`, `record stop`, `console [--clear]`, `errors [--clear]`, `highlight <sel>`, `inspect`, `clipboard <op> [text]`, `stream enable [--port <n>]`, `stream disable`, `stream status`, `react tree`, `react inspect <id>`, `react renders start`, `react renders stop [--json]`, `react suspense [--only-dynamic] [--json]`, `vitals [url] [--json]`, `removeinitscript <id>` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage) and diagnostic sections; [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Thin passthrough plus command-specific compact diagnostic summaries, artifact metadata for HAR/diff/trace/profile/record, sensitive-data redaction, timeout bounds, and cleanup-pair guidance. | Fake non-core matrix covers network/diff/trace/profiler/record/console/errors/highlight/inspect/clipboard/stream/dashboard/chat JSON shapes and redaction; real-upstream covers safe network requests/HAR, diff, trace/profiler, console/errors/highlight, stream, vitals, and React missing-renderer. | Supported. Browser-opening or environment-sensitive operations (`inspect`, OS clipboard, full React app inspection) are delegated thinly and documented as needing suitable local/browser state. |
|
|
53
|
+
| Network, storage, artifacts, diagnostics, and performance | `network <action>`, `network route <url> [--abort|--body <json>] [--resource-type <csv>]`, `network request <requestId>`, `cookies [get|set|clear]`, `cookies set --curl <file>`, `storage <local|session>`, `diff snapshot`, `diff screenshot --baseline`, `diff url <u1> <u2>`, `trace start|stop [path]`, `profiler start|stop [path]`, `record start <path> [url]`, `record restart <path> [url]`, `record stop`, `console [--clear]`, `errors [--clear]`, `highlight <sel>`, `inspect`, `clipboard <op> [text]`, `stream enable [--port <n>]`, `stream disable`, `stream status`, `react tree`, `react inspect <id>`, `react renders start`, `react renders stop [--json]`, `react suspense [--only-dynamic] [--json]`, `vitals [url] [--json]`, `removeinitscript <id>` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage) and diagnostic sections; [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Thin passthrough plus command-specific compact diagnostic summaries, artifact metadata for HAR/diff/trace/profile/record, early missing-ffmpeg recording warnings, sensitive-data redaction, timeout bounds, and cleanup-pair guidance. | Fake non-core matrix covers network/diff/trace/profiler/record/console/errors/highlight/inspect/clipboard/stream/dashboard/chat JSON shapes and redaction; real-upstream covers safe network requests/HAR, diff, trace/profiler, console/errors/highlight, stream, vitals, and React missing-renderer. | Supported. Browser-opening or environment-sensitive operations (`inspect`, OS clipboard, full React app inspection) are delegated thinly and documented as needing suitable local/browser state. |
|
|
54
54
|
| Batch, auth, confirmations, setup, dashboard, and AI commands | `batch [--bail]`, `auth save <name>`, `auth save <name> --password-stdin`, `auth login <name>`, `auth list`, `auth show <name>`, `auth delete <name>`, `confirm <id>`, `deny <id>`, `chat <message>`, `dashboard start --port <n>`, `dashboard stop`, `install`, `install --with-deps`, `upgrade`, `doctor [--fix]`, `doctor --offline --quick`, `doctor --json`, `profiles` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-and-setup), README security notes, release docs. | Batch stdin is native-tool-only; top-level `job`, `qa`, and experimental `sourceLookup` / `networkSourceLookup` compile to `batch` with generated stdin (caller `stdin` rejected for those modes); auth/confirmation details are redacted; dashboard/chat/setup/doctor are passed through thinly with timeout/cleanup guidance; package doctor remains separate and read-only. | Unit/fake tests cover batch, auth password stdin, confirmations, dashboard/chat summaries, and doctor diagnostics; extension-validation covers `job`, `qa`, `sourceLookup`, and `networkSourceLookup` compilation plus `details.sourceLookup` / `details.networkSourceLookup` evidence; [`scripts/agent-browser-efficiency-benchmark.mjs`](../scripts/agent-browser-efficiency-benchmark.mjs) includes `source-lookup-visible-element` and `network-source-lookup-failed-request` scenarios; quick isolated Pi smoke covered dashboard start/stop and chat credential-failure pass-through. | Supported. `install`, `upgrade`, `doctor --fix`, and interactive auth/chat/setup flows are upstream-owned and should be run only when the operator intends those side effects. |
|
|
55
55
|
| Global flags, config, providers, policy, and environment | `--profile <name|path>`, `AGENT_BROWSER_PROFILE`, `--session <name>`, `AGENT_BROWSER_SESSION`, `--session-name <name>`, `AGENT_BROWSER_SESSION_NAME`, `--state <path>`, `AGENT_BROWSER_STATE`, `--auto-connect`, `AGENT_BROWSER_AUTO_CONNECT`, `--headers <json>`, `--init-script <path>`, `AGENT_BROWSER_INIT_SCRIPTS`, `--enable <feature>`, `AGENT_BROWSER_ENABLE`, `--executable-path <path>`, `AGENT_BROWSER_EXECUTABLE_PATH`, `--extension <path>`, `AGENT_BROWSER_EXTENSIONS`, `--args <args>`, `AGENT_BROWSER_ARGS`, `--user-agent <ua>`, `AGENT_BROWSER_USER_AGENT`, `--proxy <server>`, `AGENT_BROWSER_PROXY`, `HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `--proxy-bypass <hosts>`, `AGENT_BROWSER_PROXY_BYPASS`, `NO_PROXY`, `--ignore-https-errors`, `AGENT_BROWSER_IGNORE_HTTPS_ERRORS`, `--allow-file-access`, `AGENT_BROWSER_ALLOW_FILE_ACCESS`, `--headed`, `AGENT_BROWSER_HEADED`, `--cdp <port>`, `--color-scheme <scheme>`, `AGENT_BROWSER_COLOR_SCHEME`, `--download-path <path>`, `AGENT_BROWSER_DOWNLOAD_PATH`, `--engine <name>`, `AGENT_BROWSER_ENGINE`, `--no-auto-dialog`, `AGENT_BROWSER_NO_AUTO_DIALOG`, `--json`, `AGENT_BROWSER_JSON`, `--annotate`, `AGENT_BROWSER_ANNOTATE`, `--screenshot-dir <path>`, `AGENT_BROWSER_SCREENSHOT_DIR`, `--screenshot-quality <n>`, `AGENT_BROWSER_SCREENSHOT_QUALITY`, `--screenshot-format <fmt>`, `AGENT_BROWSER_SCREENSHOT_FORMAT`, `--content-boundaries`, `AGENT_BROWSER_CONTENT_BOUNDARIES`, `--max-output <chars>`, `AGENT_BROWSER_MAX_OUTPUT`, `--allowed-domains <list>`, `AGENT_BROWSER_ALLOWED_DOMAINS`, `--action-policy <path>`, `AGENT_BROWSER_ACTION_POLICY`, `--confirm-actions <list>`, `AGENT_BROWSER_CONFIRM_ACTIONS`, `--confirm-interactive`, `AGENT_BROWSER_CONFIRM_INTERACTIVE`, `-p, --provider <name>`, `AGENT_BROWSER_PROVIDER`, `browserbase`, `kernel`, `browseruse`, `browserless`, `agentcore`, `--device <name>`, `AGENT_BROWSER_IOS_DEVICE`, `agent-browser -p ios device list`, `agent-browser -p ios swipe up`, `agent-browser -p ios tap @e1`, `--model <name>`, `AI_GATEWAY_MODEL`, `-v, --verbose`, `-q, --quiet`, `--debug`, `AGENT_BROWSER_DEBUG`, `AGENT_BROWSER_CONFIG`, `AGENT_BROWSER_DEFAULT_TIMEOUT`, `AGENT_BROWSER_STREAM_PORT`, `AGENT_BROWSER_IDLE_TIMEOUT_MS`, `AGENT_BROWSER_ENCRYPTION_KEY`, `AGENT_BROWSER_STATE_EXPIRE_DAYS`, `AGENT_BROWSER_IOS_UDID`, `AI_GATEWAY_URL`, `AI_GATEWAY_API_KEY` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#important-global-flags-config-and-environment), README provider/setup notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sessionmode), architecture/runtime docs. | Runtime handles value flags, launch-scoped flags, redacted invocation echoes, `sessionMode: "fresh"` recovery hints, explicit sessions, and provider/device launch-scoping. Process env forwards a curated allowlist/prefix set for upstream/provider credentials without cloning the whole parent env. | Runtime tests cover launch-scoped flags, provider/device planning, redaction, stateless inspections, and explicit/fresh sessions. Process tests cover provider env prefixes. Fake provider/specialized-skill matrix covers provider argv/env passthrough. Package doctor checks version/source drift. | Supported. Provider clouds, iOS/Appium, Browserbase/Kernel/BrowserUse/Browserless/AgentCore, proxies, profiles, and credentials require external setup; the wrapper documents and forwards them thinly rather than emulating provider behavior. |
|
|
56
56
|
|
|
@@ -83,3 +83,11 @@ Native `job`, `qa`, experimental `sourceLookup`, and experimental `networkSource
|
|
|
83
83
|
`RQ-0078` improves getter/eval discoverability: `extensions/agent-browser/lib/results/presentation.ts` matches upstream failure text containing `unknown command`, `unknown subcommand`, or `unrecognized command` (case-insensitive) when the failed command token is one of `attr`, `count`, `html`, `text`, `title`, `url`, or `value`, then adds grouped-`get` prose; only `title` / `url` also emit read-only `nextActions` (`use-get-title` / `use-get-url`, with `--session` when the failed call named a session). The getter block is skipped when selector recovery already injected an `Agent-browser hint:` line into the same error string. `extensions/agent-browser/index.ts` adds `details.evalStdinHint` plus visible `Eval stdin hint` when `looksLikeFunctionEvalStdin` matches trimmed stdin and upstream JSON carries an empty-object `data.result`. Contract: [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details) (`nextActions`, `evalStdinHint`); human workflow: [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md) extraction note and README quick start; fake coverage: `buildToolPresentation suggests grouped getter commands for common unknown getter shortcuts` and `agentBrowserExtension warns when eval stdin returns an empty object from a function-shaped snippet`.
|
|
84
84
|
|
|
85
85
|
`RQ-0079` clarifies artifact lifecycle and cleanup ownership: `extensions/agent-browser/index.ts` adds `details.artifactCleanup` and visible `Artifact lifecycle` copy on successful `close` when `artifactManifest.entries` is non-empty (`getArtifactCleanupGuidance`), stating that close does not delete explicit artifacts; `explicitArtifactPaths` carries up to ten distinct existing `explicit-path` manifest paths after a filesystem existence check, skipping stale paths already removed by host tools (possibly empty when the recent window has no existing explicit rows). Contract: [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details) (`artifactCleanup`); human workflow: [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md) artifact retention section and README artifact notes; fake coverage: `agentBrowserExtension reports artifact lifecycle guidance on close`.
|
|
86
|
+
|
|
87
|
+
`RQ-0080` adds no-op scroll recovery for dense dashboards and nested panes: for successful top-level `scroll`, `extensions/agent-browser/index.ts` samples viewport and prominent scroll-container positions before and after execution with read-only session-scoped `eval --stdin` probes. If no sampled position changes, it emits `details.scrollNoop`, appends visible `Scroll diagnostic: no observed scroll movement`, appends exact `inspect-after-noop-scroll` / `verify-noop-scroll-visually` next actions, and updates `pageChangeSummary.nextActionIds` so agents can branch without parsing prose. Contract: [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details) (`scrollNoop`, `nextActions`); human workflow: [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md) scroll note; fake coverage: `agentBrowserExtension reports no-op scroll diagnostics with recovery next actions`.
|
|
88
|
+
|
|
89
|
+
`RQ-0081` adds focused-combobox recovery for dense dashboard controls: after successful explicit combobox-targeted actions (for example `semanticAction` role `combobox` click), `extensions/agent-browser/index.ts` runs a read-only focused-element probe and emits `details.comboboxFocus` plus visible `Combobox diagnostic` text when a combobox-like control is focused, has explicit `aria-expanded` state, and no visible listbox/options are open. It appends exact `inspect-focused-combobox`, `try-open-combobox-with-arrow`, and `try-open-combobox-with-enter` next actions, all session-prefixed when applicable. The probe is gated to explicit combobox targets to avoid ordinary-click false positives and preserves the original combobox semantic target even when active-session visible-ref resolution rewrites execution to `click @ref`. Contract: [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details) (`comboboxFocus`, `nextActions`); human workflow: [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md) combobox note; fake coverage: `agentBrowserExtension reports focused combobox diagnostics with option-opening next actions` and `agentBrowserExtension preserves combobox diagnostics after semanticAction visible-ref resolution`.
|
|
90
|
+
|
|
91
|
+
`RQ-0082` adds early recording dependency warnings: after successful `record start` / `record restart`, `extensions/agent-browser/index.ts` checks whether executable `ffmpeg` is visible on the Pi process `PATH`. If not, it emits non-blocking `details.recordingDependencyWarning` plus visible `Recording dependency warning: ffmpeg not found on PATH` text so agents can install `ffmpeg` or fix PATH before `record stop` needs to encode the WebM. Contract: [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details) (`recordingDependencyWarning`); human workflow: [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md) recording notes and README dependency table; fake coverage: `agentBrowserExtension warns after record start when ffmpeg is missing`.
|
|
92
|
+
|
|
93
|
+
`RQ-0083` documents a repeatable public Grafana stress checklist in [`RELEASE.md`](RELEASE.md#public-grafana-stress-checklist) instead of bundling private dogfood/VFR skills or adding a recipe runtime. The checklist uses Grafana Play Node Exporter Full to manually exercise dense snapshots, no-op scroll diagnostics, combobox recovery, screenshots/artifacts, optional short recording, network/console/error summaries, and cleanup. Treat known Grafana Play noise (analytics/Sentry requests, public-demo 403s, console errors) as site noise unless the wrapper leaks secrets, hides actionable rows, mishandles artifacts, or suggests unsafe follow-ups. Evidence should be a short release note or CueLoop task, not committed `.dogfood/` outputs, raw HARs, videos, or private scripts. Validation on 2026-05-15 used the native tool against Grafana Play: fresh open, dense `snapshot -i`, scroll, combobox semantic click, screenshots with verified artifacts, `network requests`, `console`, `close`, and host cleanup of `/tmp/pi-agent-browser-grafana-rq0083*.png`; observed 11 public-demo 403 request rows and Grafana console noise as expected site noise.
|
package/docs/TOOL_CONTRACT.md
CHANGED
|
@@ -26,7 +26,7 @@ It also keeps the main UX where it belongs: the agent invokes the tool directly
|
|
|
26
26
|
|
|
27
27
|
The tool guidance should be written for task discovery first, not wrapper implementation first. That means the description should emphasize browser use cases like web research, reading live docs, clicking, filling, screenshots, extraction, and authenticated/profile-based workflows. Low-level wrapper details like `stdin` and exact CLI args belong in the schema and guidelines, not the lead description.
|
|
28
28
|
|
|
29
|
-
The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. The canonical agent-facing playbook lives in `extensions/agent-browser/lib/playbook.ts`;
|
|
29
|
+
The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session, but always-on guidance must stay concise. The canonical agent-facing playbook lives in `extensions/agent-browser/lib/playbook.ts`; it provides compact runtime rules plus absolute installed-package paths to `README.md`, `docs/COMMAND_REFERENCE.md`, and this contract so agents with file tools can read targeted guidance on demand instead of receiving the full docs in prompt context. Generated Markdown fragments are updated by `npm run docs -- playbook write`, and `npm run docs -- playbook check` fails when checked-in documentation drifts.
|
|
30
30
|
|
|
31
31
|
The native command reference in `docs/COMMAND_REFERENCE.md` is driven by the same pattern: canonical metadata lives in `scripts/agent-browser-capability-baseline.mjs`, selected regions are generated into the Markdown by `npm run docs -- command-reference write`, and `npm run docs` plus `npm run verify -- command-reference` catch drift (the latter also samples the installed `agent-browser` on `PATH`). Maintainer workflow details live in `AGENTS.md` under upstream capability baseline.
|
|
32
32
|
|
|
@@ -53,6 +53,7 @@ Agent-facing efficiency claims are measured with `npm run benchmark:agent-browse
|
|
|
53
53
|
- For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.
|
|
54
54
|
- For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.
|
|
55
55
|
- For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.
|
|
56
|
+
- On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, prefer scrollintoview <@ref> or target the actual scrollable region. For comboboxes, a click/semanticAction may only focus the field; re-snapshot and fall back to type, press Enter/arrow keys, select, or visible option refs.
|
|
56
57
|
- When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.
|
|
57
58
|
- When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel. Prefer plain expressions like ({ title: document.title }) or explicitly invoked functions like (() => ({ title: document.title }))(); if a function-shaped snippet returns {}, details.evalStdinHint may warn that the function was serialized instead of called. If get text on a CSS selector surfaces details.selectorTextVisibility or selectorTextVisibilityAll, prefer a visible @ref, a more specific selector, or the inspect-visible-text-candidates nextAction over hidden tab content.
|
|
58
59
|
- When details.pageChangeSummary is present, use changeType and summary as a compact signal for navigation, DOM mutation, confirmations, or artifacts; when nextActionIds is set, match those ids to entries in details.nextActions (or per-step nextActions inside batch) for concrete follow-up payloads instead of inferring from prose alone. If a no-navigation click surfaces details.overlayBlockers, inspect the fresh snapshot evidence before using a close/dismiss candidate nextAction; ordinary page chrome without dialog/alertdialog evidence should not trigger this diagnostic.
|
|
@@ -368,7 +369,7 @@ Ref preflight details (implementation in `extensions/agent-browser/index.ts`):
|
|
|
368
369
|
|
|
369
370
|
**Presentation redaction (implementation map):** Successful non-`batch` tool calls and each successful `batchSteps[]` row run upstream `data` through `redactPresentationData` in `extensions/agent-browser/lib/results/presentation.ts`: `cookies` and `storage` walk objects/arrays and replace case-insensitive `value` keys with `"[REDACTED]"` (diagnostic formatters still describe rows without expanding secrets); every other command’s payload is recursively scrubbed with `redactStructuredPresentationValue`, which redacts known sensitive key names and applies string-level sensitivity heuristics so network, diff, trace/profiler, stream, dashboard, chat, and other structured results do not echo bearer tokens, proxy credentials, or similar fields verbatim into `details.data`. Echoed `command` arrays in `details` and in batch roll-ups use `redactInvocationArgs` from `extensions/agent-browser/lib/runtime.ts` to mask trailing values for sensitive global flags (including `--body`, `--headers`, `--password`, and `--proxy`), preserve the special positional rules for `cookies set`, `storage local|session set`, and `set credentials`, and scrub other argv tokens for URLs and inline secrets. Failed batch steps additionally run `redactExactValues` on structured step errors so literals taken from that step’s argv (cookie value, storage set value, `--password` / `--password=` tokens) cannot reappear inside formatted error blobs.
|
|
370
371
|
|
|
371
|
-
`nextActions` is an optional machine-readable list of exact native `agent_browser` follow-ups. Each entry includes `tool: "agent_browser"`, an `id`, a short `reason`, optional `safety`, and either `params` (`args`, optional `stdin`, optional `sessionMode`) or an `artifactPath` for saved-file workflows. Agents should prefer these payloads over prose when present. Current recommendations include: `open` success → `snapshot -i`; mutating/navigation commands (see `buildAgentBrowserNextActions` in source for the exact command set) → `snapshot -i`; stale refs and selector failures → `snapshot -i` via `refresh-interactive-refs` (prefixed with `--session <name>` when the failed call ran in a named or managed session); unknown getter shortcuts such as `title` / `url` → exact read-only retries like `get title` / `get url` with ids `use-get-title` / `use-get-url`; semantic `selector-not-found` failures that compiled from `semanticAction` may append `try-searchbox-name-candidate`, `try-textbox-name-candidate`, `try-button-name-candidate`, `try-link-name-candidate`, or `try-labeled-textbox-candidate` after presentation `nextActions` only for the bounded fill/click pairs enumerated under `semanticAction` (not for `select`); semantic `stale-ref` failures that compiled from `semanticAction` may also include `retry-semantic-action-after-stale-ref` after that snapshot step; qualifying same-URL top-level clicks (see `overlayBlockers` below) with fresh snapshot evidence of likely overlay/banner/dialog close controls may append `inspect-overlay-state` and bounded `try-overlay-blocker-candidate-*` entries; `get text <selector>` calls with hidden/multiple CSS matches may append `inspect-visible-text-candidates` with a read-only `eval --stdin` probe (each prefixed with `--session <name>` when `details.sessionName` is set, same `sessionPrefixArgs` rule as other session-scoped follow-ups); confirmations → exact `confirm <id>` and `deny <id>` choices; tab drift → `tab list` then `snapshot -i`; download verification failures or missing successful download artifacts → `wait --download [path]`; saved artifacts → the artifact path to inspect/consume after checking `artifactVerification`/metadata; missing non-download artifacts → `verify-artifact-path` so agents do not trust an absent file. When nothing applies, the field is omitted.
|
|
372
|
+
`nextActions` is an optional machine-readable list of exact native `agent_browser` follow-ups. Each entry includes `tool: "agent_browser"`, an `id`, a short `reason`, optional `safety`, and either `params` (`args`, optional `stdin`, optional `sessionMode`) or an `artifactPath` for saved-file workflows. Agents should prefer these payloads over prose when present. Current recommendations include: `open` success → `snapshot -i`; mutating/navigation commands (see `buildAgentBrowserNextActions` in source for the exact command set) → `snapshot -i`; stale refs and selector failures → `snapshot -i` via `refresh-interactive-refs` (prefixed with `--session <name>` when the failed call ran in a named or managed session); unknown getter shortcuts such as `title` / `url` → exact read-only retries like `get title` / `get url` with ids `use-get-title` / `use-get-url`; semantic `selector-not-found` failures that compiled from `semanticAction` may append `try-searchbox-name-candidate`, `try-textbox-name-candidate`, `try-button-name-candidate`, `try-link-name-candidate`, or `try-labeled-textbox-candidate` after presentation `nextActions` only for the bounded fill/click pairs enumerated under `semanticAction` (not for `select`); semantic `stale-ref` failures that compiled from `semanticAction` may also include `retry-semantic-action-after-stale-ref` after that snapshot step; qualifying same-URL top-level clicks (see `overlayBlockers` below) with fresh snapshot evidence of likely overlay/banner/dialog close controls may append `inspect-overlay-state` and bounded `try-overlay-blocker-candidate-*` entries; successful top-level `scroll` calls whose pre/post viewport and sampled scroll-container positions do not change may append `inspect-after-noop-scroll` and `verify-noop-scroll-visually`; explicit combobox-targeted actions that focus a combobox without visible options may append `inspect-focused-combobox`, `try-open-combobox-with-arrow`, and `try-open-combobox-with-enter`; `get text <selector>` calls with hidden/multiple CSS matches may append `inspect-visible-text-candidates` with a read-only `eval --stdin` probe (each prefixed with `--session <name>` when `details.sessionName` is set, same `sessionPrefixArgs` rule as other session-scoped follow-ups); confirmations → exact `confirm <id>` and `deny <id>` choices; tab drift → `tab list` then `snapshot -i`; download verification failures or missing successful download artifacts → `wait --download [path]`; saved artifacts → the artifact path to inspect/consume after checking `artifactVerification`/metadata; missing non-download artifacts → `verify-artifact-path` so agents do not trust an absent file. When nothing applies, the field is omitted.
|
|
372
373
|
|
|
373
374
|
**Unknown-command getter hints (failure presentation):** `buildToolPresentation` in `extensions/agent-browser/lib/results/presentation.ts` only runs this path when upstream error text (after model-facing redaction) matches `unknown command`, `unknown subcommand`, or `unrecognized command` (case-insensitive) **and** the failed invocation’s primary command token is one of `attr`, `count`, `html`, `text`, `title`, `url`, or `value`. Visible text then includes a grouped-`get` hint line plus per-token guidance (`get text <selector>`, `get html …`, `get attr …`, `get count …`, `get value …`, `get title`, `get url`). Machine `nextActions` with ids `use-get-title` / `use-get-url` are emitted only for `title` / `url`, with `params.args` optionally prefixed by `--session <name>` when the failed call targeted a named session. If the error string already contains `Agent-browser hint:` from selector recovery (stale-ref or unsupported selector dialect appendages), the getter block is skipped so two stacked `Agent-browser hint:` headers are not emitted.
|
|
374
375
|
|
|
@@ -441,6 +442,9 @@ Additional structured fields can appear when relevant:
|
|
|
441
442
|
- `navigationSummary` for navigation-style commands like `click`, `back`, `forward`, and `reload`
|
|
442
443
|
- `pageChangeSummary` for compact mutation/artifact/navigation summaries on commands that can change browser state
|
|
443
444
|
- `overlayBlockers` for conservative post-click overlay/banner/dialog blocker candidates when a direct click stays on the same URL and a fresh snapshot provides evidence (`candidates`, `summary`, and `snapshot` per `OverlayBlockerDiagnostic` in `extensions/agent-browser/index.ts`)
|
|
445
|
+
- `scrollNoop` after a successful **top-level** `scroll` when wrapper-side read-only probes before and after the command show no change in `window.scrollX` / `window.scrollY` and no change in the sampled prominent scrollable containers. To avoid pre-launching a session without caller startup state, this probe is skipped when the invocation includes startup-scoped flags such as `--profile`, `--state`, `--session-name`, `--cdp`, providers, init scripts, or similar launch settings. Shape: `{ reason: "no-observed-scroll-position-change", message, before, after, recommendations }`; `before` / `after` include viewport dimensions, document scroll dimensions, and up to ten sampled container descriptors plus scroll offsets. Container descriptors use only sample index, tag name, and ARIA role; DOM ids/classes are intentionally not stored. This diagnostic is conservative evidence that the page-level scroll likely missed a nested pane, not proof that every app-specific region is unchanged. Visible text appends `Scroll diagnostic: no observed scroll movement`, and `details.nextActions` gains `inspect-after-noop-scroll` (`snapshot -i`) plus `verify-noop-scroll-visually` (`screenshot`), session-prefixed when applicable.
|
|
446
|
+
- `comboboxFocus` after a successful explicit combobox-targeted `click` / `fill` / `find … click|fill|select` (for example `semanticAction` with role `combobox`, including when that semantic action resolves through a current visible `@ref` before execution) when a read-only probe sees the active element is combobox-like, `aria-expanded` is explicitly present (`false` or `true`), and no visible `listbox` / `option` / menu option elements are open. Shape: `{ reason: "focused-combobox-without-visible-options", message, activeElement, visibleListboxCount, visibleOptionCount, recommendations }`; `activeElement` includes bounded role/tag/expanded/hasPopup/name metadata with normal text redaction. Visible text appends `Combobox diagnostic: focused combobox did not expose visible options`, and `details.nextActions` gains `inspect-focused-combobox` (`snapshot -i`), `try-open-combobox-with-arrow` (`press ArrowDown`), and `try-open-combobox-with-enter` (`press Enter`), session-prefixed when applicable. The diagnostic is deliberately gated to explicit combobox-targeted calls to avoid extra probes or false positives on ordinary clicks/textboxes.
|
|
447
|
+
- `recordingDependencyWarning` after a successful `record start` or `record restart` when the wrapper cannot find an executable `ffmpeg` on the Pi process `PATH`. Shape: `{ reason: "ffmpeg-missing-for-recording", dependency: "ffmpeg", command, message, recommendations }`. Visible text appends `Recording dependency warning: ffmpeg not found on PATH`. This is a non-blocking preflight warning: upstream may start recording, but `record stop` needs `ffmpeg` to encode the WebM.
|
|
444
448
|
- `selectorTextVisibility` after a **successful** upstream `get text <selector>` (standalone or inside a successful `batch`) when the wrapper’s follow-up probe finds a hazard: more than one DOM match (upstream reads the first `querySelectorAll` hit, which may be the wrong tab/panel), or the first match is hidden while at least one other match is visible (requires multiple DOM nodes so a visible peer exists; a lone hidden match is not flagged). The probe is a read-only `eval --stdin` script (`buildVisibleTextProbeScript` in `extensions/agent-browser/index.ts`) that counts matches, applies a small visibility heuristic (`display`/`visibility`/`opacity` plus non-zero client rects), and may include a redacted `firstVisibleTextPreview`. It is **not** run for page-scoped `@e…` selectors or when the selector string is withheld because `selectorMayExposeSensitiveLiteral` would risk echoing secrets in probe output. `details.selectorTextVisibility` mirrors the primary diagnostic (first sorted entry); when several selectors in one `batch` qualify, `selectorTextVisibilityAll` lists every diagnostic sorted so hidden-first cases precede generic multi-match ambiguity. Appended `details.nextActions` use ids `inspect-visible-text-candidates` and `inspect-visible-text-candidates-2`, … with the probe replayed via `eval --stdin` for each hazardous selector.
|
|
445
449
|
- `evalStdinHint` after a successful `eval --stdin` when caller stdin (trimmed) looks function-shaped to the wrapper’s lightweight detector (`looksLikeFunctionEvalStdin` in `extensions/agent-browser/index.ts`: leading `function` / `async function`, parenthesized arrow `(…) =>`, or a concise `name =>` / `async name =>` form) **and** upstream JSON `data` is an object whose `result` field is an empty object (`{}`). It includes `reason` and `suggestion`; visible output appends `Eval stdin hint` with the same guidance. This is a heuristic for the common mistake of returning a function object instead of invoking it or passing a plain expression, not a JavaScript parser or proof that the page returned no useful data.
|
|
446
450
|
- `timeoutPartialProgress` after `runAgentBrowserProcess` reports `timedOut` (wrapper child-process watchdog) when best-effort recovery finds useful context. `summary` is a short sentence counting how many declared artifact paths exist on disk versus how many were scanned, and whether page context came from live session reads or only from a planned URL (when nothing in the plan declares an artifact path, the fraction may read `0/0` while `currentPage` can still carry session or planned URL context). `steps` lists planned argv from the compiled `job` or `qa` batch plan (`compiledJob` in `extensions/agent-browser/index.ts`, which is only populated for those top-level modes) or, when that object is absent, from the same JSON-array `batch` stdin the tool sends upstream—whether caller-authored or wrapper-generated for `sourceLookup` / `networkSourceLookup` (1-based indices; only JSON-array stdin whose elements are string[] argv arrays is parsed); timeouts on other argv shapes may still emit `currentPage` / summary evidence without `steps`. `currentPage` comes from session-scoped `get url` / `get title` when the session answers, otherwise a fallback URL may be inferred from the last `open` / `navigate` / `pushstate` step in the plan. `artifacts` covers declared output paths on `screenshot`, `pdf`, `download`, and `wait --download` steps (absolute path, existence, optional `sizeBytes`, `stepIndex`). Visible text repeats the same block under `Timeout partial progress`, applying URL and path-segment redaction; the prose `Planned steps` list shows at most six steps, then an omitted-count line when the plan is longer. This is recovery evidence only; missing entries do not prove the upstream step never ran or that no other side effects occurred.
|
|
@@ -470,7 +474,7 @@ The TUI renderer is user-facing only. It may compact or colorize what the human
|
|
|
470
474
|
|
|
471
475
|
Worth doing in v1:
|
|
472
476
|
- screenshots → saved-path summary, visible artifact metadata, `details.artifacts` metadata, and inline image attachment when safe; screenshot paths that upstream would treat ambiguously, such as `.dogfood/run/foo.png`, are normalized to absolute paths before launch and repaired from upstream temp output when possible
|
|
473
|
-
- file artifacts such as PDFs, downloads, `wait --download` files, `state save` state files, diff screenshot output images, traces, CPU profiles, completed WebM recordings, and path-bearing HAR captures → concise saved-path summaries plus metadata in `details.artifacts` and bounded recent metadata in `details.artifactManifest`; `record start` reports recording lifecycle state and the future output path without adding a missing manifest entry; direct saved-file workflows also expose `details.savedFilePath` / `details.savedFile`; large or binary artifacts are not inlined into model context; the recent manifest cap can age out explicit-file metadata but does not remove explicit saved files from disk
|
|
477
|
+
- file artifacts such as PDFs, downloads, `wait --download` files, `state save` state files, diff screenshot output images, traces, CPU profiles, completed WebM recordings, and path-bearing HAR captures → concise saved-path summaries plus metadata in `details.artifacts` and bounded recent metadata in `details.artifactManifest`; `record start` reports recording lifecycle state and the future output path without adding a missing manifest entry; upstream needs `ffmpeg` on `PATH` for `record stop` to encode the WebM, and successful `record start` / `record restart` calls may also expose `details.recordingDependencyWarning` when the wrapper cannot find `ffmpeg`; direct saved-file workflows also expose `details.savedFilePath` / `details.savedFile`; large or binary artifacts are not inlined into model context; the recent manifest cap can age out explicit-file metadata but does not remove explicit saved files from disk
|
|
474
478
|
- `diff screenshot` → same file-artifact pattern as above for the **diff** image path only (summary text uses “Saved diff image”); baseline paths and other fields stay in the structured payload but are not echoed as separate saved artifacts in the visible artifact block, and there is no Pi inline image attachment for the diff output
|
|
475
479
|
- `state load` → completion text may mention the loaded path, but the wrapper does **not** treat that path as a new saved artifact (`artifacts` / `artifactManifest` stay unset) the way `state save` does
|
|
476
480
|
- auth, cookies, storage, dialog, frame, state, network, debug, diff, stream, dashboard, chat, and other structured results → concise summaries that avoid expanding secret-bearing payloads; credential-like keys, values, URLs, body snippets, bearer/basic credentials, and cookie/storage values are redacted before model-facing output and `details.data`
|
|
@@ -6,8 +6,10 @@
|
|
|
6
6
|
* Invariants/Assumptions: agent-browser is installed separately on PATH, the wrapper targets the current locally installed upstream version only, and no backward-compatibility shims are provided.
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
9
|
+
import { constants as fsConstants } from "node:fs";
|
|
10
|
+
import { access, copyFile, mkdir, readFile, readdir, rm, stat } from "node:fs/promises";
|
|
11
|
+
import { delimiter, dirname, extname, isAbsolute, join, resolve } from "node:path";
|
|
12
|
+
import { fileURLToPath } from "node:url";
|
|
11
13
|
|
|
12
14
|
import { StringEnum } from "@earendil-works/pi-ai";
|
|
13
15
|
import {
|
|
@@ -114,6 +116,48 @@ interface CompiledAgentBrowserSemanticAction {
|
|
|
114
116
|
args: string[];
|
|
115
117
|
}
|
|
116
118
|
|
|
119
|
+
interface ScrollPositionSnapshot {
|
|
120
|
+
containerCount: number;
|
|
121
|
+
containers: Array<{ id: string; scrollLeft: number; scrollTop: number }>;
|
|
122
|
+
innerHeight: number;
|
|
123
|
+
innerWidth: number;
|
|
124
|
+
scrollHeight: number;
|
|
125
|
+
scrollWidth: number;
|
|
126
|
+
scrollX: number;
|
|
127
|
+
scrollY: number;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
interface ScrollNoopDiagnostic {
|
|
131
|
+
after: ScrollPositionSnapshot;
|
|
132
|
+
before: ScrollPositionSnapshot;
|
|
133
|
+
message: string;
|
|
134
|
+
reason: "no-observed-scroll-position-change";
|
|
135
|
+
recommendations: string[];
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
interface ComboboxFocusDiagnostic {
|
|
139
|
+
activeElement: {
|
|
140
|
+
expanded?: string;
|
|
141
|
+
hasPopup?: string;
|
|
142
|
+
name?: string;
|
|
143
|
+
role?: string;
|
|
144
|
+
tagName?: string;
|
|
145
|
+
};
|
|
146
|
+
message: string;
|
|
147
|
+
reason: "focused-combobox-without-visible-options";
|
|
148
|
+
recommendations: string[];
|
|
149
|
+
visibleListboxCount: number;
|
|
150
|
+
visibleOptionCount: number;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
interface RecordingDependencyWarning {
|
|
154
|
+
command: "record start" | "record restart";
|
|
155
|
+
dependency: "ffmpeg";
|
|
156
|
+
message: string;
|
|
157
|
+
reason: "ffmpeg-missing-for-recording";
|
|
158
|
+
recommendations: string[];
|
|
159
|
+
}
|
|
160
|
+
|
|
117
161
|
interface CompiledAgentBrowserJobStep {
|
|
118
162
|
action: AgentBrowserJobStepAction;
|
|
119
163
|
args: string[];
|
|
@@ -2680,6 +2724,140 @@ async function collectNavigationSummary(options: {
|
|
|
2680
2724
|
return { title, url };
|
|
2681
2725
|
}
|
|
2682
2726
|
|
|
2727
|
+
function extractScrollPositionSnapshot(data: unknown): ScrollPositionSnapshot | undefined {
|
|
2728
|
+
const result = isRecord(data) && isRecord(data.result) ? data.result : data;
|
|
2729
|
+
if (!isRecord(result)) return undefined;
|
|
2730
|
+
const scrollX = typeof result.scrollX === "number" ? result.scrollX : undefined;
|
|
2731
|
+
const scrollY = typeof result.scrollY === "number" ? result.scrollY : undefined;
|
|
2732
|
+
const innerHeight = typeof result.innerHeight === "number" ? result.innerHeight : undefined;
|
|
2733
|
+
const innerWidth = typeof result.innerWidth === "number" ? result.innerWidth : undefined;
|
|
2734
|
+
const scrollHeight = typeof result.scrollHeight === "number" ? result.scrollHeight : undefined;
|
|
2735
|
+
const scrollWidth = typeof result.scrollWidth === "number" ? result.scrollWidth : undefined;
|
|
2736
|
+
if (scrollX === undefined || scrollY === undefined || innerHeight === undefined || innerWidth === undefined || scrollHeight === undefined || scrollWidth === undefined) return undefined;
|
|
2737
|
+
const containers = Array.isArray(result.containers)
|
|
2738
|
+
? result.containers.flatMap((entry, index): ScrollPositionSnapshot["containers"] => {
|
|
2739
|
+
if (!isRecord(entry)) return [];
|
|
2740
|
+
const rawId = typeof entry.id === "string" ? entry.id : undefined;
|
|
2741
|
+
const id = rawId && /^\d+:[a-z][a-z0-9-]*(?:\[role=[a-z-]+\])?$/i.test(rawId) ? rawId : `sample-${index}`;
|
|
2742
|
+
const scrollTop = typeof entry.scrollTop === "number" ? entry.scrollTop : undefined;
|
|
2743
|
+
const scrollLeft = typeof entry.scrollLeft === "number" ? entry.scrollLeft : undefined;
|
|
2744
|
+
return scrollTop !== undefined && scrollLeft !== undefined ? [{ id, scrollLeft, scrollTop }] : [];
|
|
2745
|
+
})
|
|
2746
|
+
: [];
|
|
2747
|
+
return {
|
|
2748
|
+
containerCount: typeof result.containerCount === "number" ? result.containerCount : containers.length,
|
|
2749
|
+
containers,
|
|
2750
|
+
innerHeight,
|
|
2751
|
+
innerWidth,
|
|
2752
|
+
scrollHeight,
|
|
2753
|
+
scrollWidth,
|
|
2754
|
+
scrollX,
|
|
2755
|
+
scrollY,
|
|
2756
|
+
};
|
|
2757
|
+
}
|
|
2758
|
+
|
|
2759
|
+
const SCROLL_POSITION_EVAL = `(() => {
|
|
2760
|
+
const viewport = {
|
|
2761
|
+
scrollX: window.scrollX,
|
|
2762
|
+
scrollY: window.scrollY,
|
|
2763
|
+
innerHeight: window.innerHeight,
|
|
2764
|
+
innerWidth: window.innerWidth,
|
|
2765
|
+
scrollHeight: Math.max(document.documentElement?.scrollHeight || 0, document.body?.scrollHeight || 0),
|
|
2766
|
+
scrollWidth: Math.max(document.documentElement?.scrollWidth || 0, document.body?.scrollWidth || 0),
|
|
2767
|
+
};
|
|
2768
|
+
const describe = (element, index) => {
|
|
2769
|
+
const role = element.getAttribute("role") || "";
|
|
2770
|
+
const id = element.tagName.toLowerCase();
|
|
2771
|
+
return {
|
|
2772
|
+
id: String(index) + ":" + id + (role ? "[role=" + role + "]" : ""),
|
|
2773
|
+
scrollTop: element.scrollTop,
|
|
2774
|
+
scrollLeft: element.scrollLeft,
|
|
2775
|
+
area: element.clientWidth * element.clientHeight,
|
|
2776
|
+
};
|
|
2777
|
+
};
|
|
2778
|
+
const containers = Array.from(document.querySelectorAll("body *"))
|
|
2779
|
+
.filter((element) => element instanceof HTMLElement && (element.scrollHeight > element.clientHeight + 1 || element.scrollWidth > element.clientWidth + 1))
|
|
2780
|
+
.map(describe)
|
|
2781
|
+
.sort((left, right) => right.area - left.area)
|
|
2782
|
+
.slice(0, 10)
|
|
2783
|
+
.map(({ area, ...entry }) => entry);
|
|
2784
|
+
return { ...viewport, containerCount: containers.length, containers };
|
|
2785
|
+
})()`;
|
|
2786
|
+
|
|
2787
|
+
async function collectScrollPositionSnapshot(options: {
|
|
2788
|
+
cwd: string;
|
|
2789
|
+
sessionName?: string;
|
|
2790
|
+
signal?: AbortSignal;
|
|
2791
|
+
}): Promise<ScrollPositionSnapshot | undefined> {
|
|
2792
|
+
return extractScrollPositionSnapshot(await runSessionCommandData({
|
|
2793
|
+
args: ["eval", "--stdin"],
|
|
2794
|
+
cwd: options.cwd,
|
|
2795
|
+
sessionName: options.sessionName,
|
|
2796
|
+
signal: options.signal,
|
|
2797
|
+
stdin: SCROLL_POSITION_EVAL,
|
|
2798
|
+
}));
|
|
2799
|
+
}
|
|
2800
|
+
|
|
2801
|
+
function sameScrollPositionSnapshot(left: ScrollPositionSnapshot, right: ScrollPositionSnapshot): boolean {
|
|
2802
|
+
if (
|
|
2803
|
+
left.scrollX !== right.scrollX ||
|
|
2804
|
+
left.scrollY !== right.scrollY ||
|
|
2805
|
+
left.scrollHeight !== right.scrollHeight ||
|
|
2806
|
+
left.scrollWidth !== right.scrollWidth ||
|
|
2807
|
+
left.containers.length !== right.containers.length
|
|
2808
|
+
) {
|
|
2809
|
+
return false;
|
|
2810
|
+
}
|
|
2811
|
+
return left.containers.every((container, index) => {
|
|
2812
|
+
const other = right.containers[index];
|
|
2813
|
+
return other?.id === container.id && other.scrollTop === container.scrollTop && other.scrollLeft === container.scrollLeft;
|
|
2814
|
+
});
|
|
2815
|
+
}
|
|
2816
|
+
|
|
2817
|
+
function buildScrollNoopDiagnostic(before: ScrollPositionSnapshot | undefined, after: ScrollPositionSnapshot | undefined): ScrollNoopDiagnostic | undefined {
|
|
2818
|
+
if (!before || !after || !sameScrollPositionSnapshot(before, after)) return undefined;
|
|
2819
|
+
return {
|
|
2820
|
+
after,
|
|
2821
|
+
before,
|
|
2822
|
+
message: "Scroll reported success, but the viewport and sampled scrollable containers did not change position.",
|
|
2823
|
+
reason: "no-observed-scroll-position-change",
|
|
2824
|
+
recommendations: [
|
|
2825
|
+
"Run snapshot -i or screenshot to confirm what is visible before choosing the next action.",
|
|
2826
|
+
"On dashboards and panes with nested scrolling, use scrollintoview <@ref> for a visible target or target the actual scrollable region instead of repeating page scrolls.",
|
|
2827
|
+
],
|
|
2828
|
+
};
|
|
2829
|
+
}
|
|
2830
|
+
|
|
2831
|
+
function buildScrollNoopNextActions(sessionName: string | undefined): AgentBrowserNextAction[] {
|
|
2832
|
+
const withSession = (args: string[]): string[] => sessionName ? ["--session", sessionName, ...args] : args;
|
|
2833
|
+
return [
|
|
2834
|
+
{
|
|
2835
|
+
id: "inspect-after-noop-scroll",
|
|
2836
|
+
params: { args: withSession(["snapshot", "-i"]) },
|
|
2837
|
+
reason: "Refresh interactive refs and inspect whether the intended target is inside a nested scroll container.",
|
|
2838
|
+
safety: "Do not assume repeated page scrolls will move dashboard panels or nested panes.",
|
|
2839
|
+
tool: "agent_browser",
|
|
2840
|
+
},
|
|
2841
|
+
{
|
|
2842
|
+
id: "verify-noop-scroll-visually",
|
|
2843
|
+
params: { args: withSession(["screenshot"]) },
|
|
2844
|
+
reason: "Capture the current viewport to verify whether the scroll actually changed visible content.",
|
|
2845
|
+
safety: "Use screenshot evidence before concluding a dense dashboard did or did not move.",
|
|
2846
|
+
tool: "agent_browser",
|
|
2847
|
+
},
|
|
2848
|
+
];
|
|
2849
|
+
}
|
|
2850
|
+
|
|
2851
|
+
function formatScrollNoopDiagnosticText(diagnostic: ScrollNoopDiagnostic | undefined): string | undefined {
|
|
2852
|
+
if (!diagnostic) return undefined;
|
|
2853
|
+
return [
|
|
2854
|
+
"Scroll diagnostic: no observed scroll movement.",
|
|
2855
|
+
`Reason: ${diagnostic.message}`,
|
|
2856
|
+
`Sampled scrollable containers: ${diagnostic.after.containers.length}/${diagnostic.after.containerCount}.`,
|
|
2857
|
+
...diagnostic.recommendations.map((recommendation) => `- ${recommendation}`),
|
|
2858
|
+
].join("\n");
|
|
2859
|
+
}
|
|
2860
|
+
|
|
2683
2861
|
function mergeNavigationSummaryIntoData(data: unknown, navigationSummary: NavigationSummary): unknown {
|
|
2684
2862
|
if (isRecord(data)) {
|
|
2685
2863
|
return { ...data, navigationSummary };
|
|
@@ -2687,6 +2865,182 @@ function mergeNavigationSummaryIntoData(data: unknown, navigationSummary: Naviga
|
|
|
2687
2865
|
return { navigationSummary, result: data };
|
|
2688
2866
|
}
|
|
2689
2867
|
|
|
2868
|
+
const COMBOBOX_FOCUS_EVAL = `(() => {
|
|
2869
|
+
const isVisible = (element) => {
|
|
2870
|
+
if (!(element instanceof HTMLElement)) return false;
|
|
2871
|
+
const style = window.getComputedStyle(element);
|
|
2872
|
+
if (style.display === "none" || style.visibility === "hidden" || Number(style.opacity) === 0) return false;
|
|
2873
|
+
return element.getClientRects().length > 0;
|
|
2874
|
+
};
|
|
2875
|
+
const active = document.activeElement instanceof HTMLElement ? document.activeElement : null;
|
|
2876
|
+
const role = active?.getAttribute("role") || undefined;
|
|
2877
|
+
const hasPopup = active?.getAttribute("aria-haspopup") || undefined;
|
|
2878
|
+
const expanded = active?.getAttribute("aria-expanded") || undefined;
|
|
2879
|
+
const tagName = active?.tagName.toLowerCase();
|
|
2880
|
+
const name = (active?.getAttribute("aria-label") || active?.getAttribute("placeholder") || active?.getAttribute("title") || active?.textContent || "").trim().slice(0, 80) || undefined;
|
|
2881
|
+
const visibleListboxCount = Array.from(document.querySelectorAll('[role="listbox"], [role="menu"]')).filter(isVisible).length;
|
|
2882
|
+
const visibleOptionCount = Array.from(document.querySelectorAll('[role="option"], option, [role="menuitem"]')).filter(isVisible).length;
|
|
2883
|
+
const comboboxLike = role === "combobox" || hasPopup === "listbox" || hasPopup === "menu" || tagName === "select" || active?.getAttribute("aria-autocomplete") !== null;
|
|
2884
|
+
return { activeElement: active ? { expanded, hasPopup, name, role, tagName } : undefined, comboboxLike, visibleListboxCount, visibleOptionCount };
|
|
2885
|
+
})()`;
|
|
2886
|
+
|
|
2887
|
+
function extractComboboxFocusDiagnostic(data: unknown): ComboboxFocusDiagnostic | undefined {
|
|
2888
|
+
const result = isRecord(data) && isRecord(data.result) ? data.result : data;
|
|
2889
|
+
if (!isRecord(result) || result.comboboxLike !== true || !isRecord(result.activeElement)) return undefined;
|
|
2890
|
+
const visibleListboxCount = typeof result.visibleListboxCount === "number" ? result.visibleListboxCount : 0;
|
|
2891
|
+
const visibleOptionCount = typeof result.visibleOptionCount === "number" ? result.visibleOptionCount : 0;
|
|
2892
|
+
const expanded = typeof result.activeElement.expanded === "string" ? result.activeElement.expanded : undefined;
|
|
2893
|
+
if ((expanded !== "false" && expanded !== "true") || visibleListboxCount > 0 || visibleOptionCount > 0) return undefined;
|
|
2894
|
+
return {
|
|
2895
|
+
activeElement: {
|
|
2896
|
+
expanded,
|
|
2897
|
+
hasPopup: typeof result.activeElement.hasPopup === "string" ? result.activeElement.hasPopup : undefined,
|
|
2898
|
+
name: typeof result.activeElement.name === "string" ? redactSensitiveText(result.activeElement.name) : undefined,
|
|
2899
|
+
role: typeof result.activeElement.role === "string" ? result.activeElement.role : undefined,
|
|
2900
|
+
tagName: typeof result.activeElement.tagName === "string" ? result.activeElement.tagName : undefined,
|
|
2901
|
+
},
|
|
2902
|
+
message: "A combobox-like control is focused, but no listbox or option elements are visibly open.",
|
|
2903
|
+
reason: "focused-combobox-without-visible-options",
|
|
2904
|
+
recommendations: [
|
|
2905
|
+
"Run snapshot -i to inspect whether options appeared under a different role or portal.",
|
|
2906
|
+
"Try ArrowDown or Enter to open the option list before selecting, or use select/visible option refs when available.",
|
|
2907
|
+
],
|
|
2908
|
+
visibleListboxCount,
|
|
2909
|
+
visibleOptionCount,
|
|
2910
|
+
};
|
|
2911
|
+
}
|
|
2912
|
+
|
|
2913
|
+
function isComboboxFocusDiagnosticCommand(command: string | undefined, commandTokens: string[]): boolean {
|
|
2914
|
+
const explicitlyTargetsCombobox = commandTokens.some((token) => /^(?:combobox|listbox)$/i.test(token));
|
|
2915
|
+
if (!explicitlyTargetsCombobox) return false;
|
|
2916
|
+
if (command === "click" || command === "fill") return true;
|
|
2917
|
+
return command === "find" && commandTokens.some((token) => ["click", "fill", "select"].includes(token));
|
|
2918
|
+
}
|
|
2919
|
+
|
|
2920
|
+
function getCompiledSemanticActionRoleValue(compiled: CompiledAgentBrowserSemanticAction): string | undefined {
|
|
2921
|
+
if (compiled.locator !== "role") return undefined;
|
|
2922
|
+
const findIndex = compiled.args.indexOf("find");
|
|
2923
|
+
if (findIndex < 0 || compiled.args[findIndex + 1] !== "role") return undefined;
|
|
2924
|
+
return compiled.args[findIndex + 2];
|
|
2925
|
+
}
|
|
2926
|
+
|
|
2927
|
+
function isComboboxFocusDiagnosticSemanticAction(compiled: CompiledAgentBrowserSemanticAction | undefined): boolean {
|
|
2928
|
+
if (!compiled || !["click", "fill", "select"].includes(compiled.action)) return false;
|
|
2929
|
+
return /^(?:combobox|listbox)$/i.test(getCompiledSemanticActionRoleValue(compiled) ?? "");
|
|
2930
|
+
}
|
|
2931
|
+
|
|
2932
|
+
async function collectComboboxFocusDiagnostic(options: {
|
|
2933
|
+
command?: string;
|
|
2934
|
+
commandTokens: string[];
|
|
2935
|
+
cwd: string;
|
|
2936
|
+
semanticAction?: CompiledAgentBrowserSemanticAction;
|
|
2937
|
+
sessionName?: string;
|
|
2938
|
+
signal?: AbortSignal;
|
|
2939
|
+
}): Promise<ComboboxFocusDiagnostic | undefined> {
|
|
2940
|
+
if (!isComboboxFocusDiagnosticCommand(options.command, options.commandTokens) && !isComboboxFocusDiagnosticSemanticAction(options.semanticAction)) return undefined;
|
|
2941
|
+
return extractComboboxFocusDiagnostic(await runSessionCommandData({
|
|
2942
|
+
args: ["eval", "--stdin"],
|
|
2943
|
+
cwd: options.cwd,
|
|
2944
|
+
sessionName: options.sessionName,
|
|
2945
|
+
signal: options.signal,
|
|
2946
|
+
stdin: COMBOBOX_FOCUS_EVAL,
|
|
2947
|
+
}));
|
|
2948
|
+
}
|
|
2949
|
+
|
|
2950
|
+
function buildComboboxFocusNextActions(sessionName: string | undefined): AgentBrowserNextAction[] {
|
|
2951
|
+
const withSession = (args: string[]): string[] => sessionName ? ["--session", sessionName, ...args] : args;
|
|
2952
|
+
return [
|
|
2953
|
+
{
|
|
2954
|
+
id: "inspect-focused-combobox",
|
|
2955
|
+
params: { args: withSession(["snapshot", "-i"]) },
|
|
2956
|
+
reason: "Inspect the focused combobox and any portal/listbox refs before choosing an option.",
|
|
2957
|
+
safety: "Prefer visible option refs or select when a native/selectable option list is exposed.",
|
|
2958
|
+
tool: "agent_browser",
|
|
2959
|
+
},
|
|
2960
|
+
{
|
|
2961
|
+
id: "try-open-combobox-with-arrow",
|
|
2962
|
+
params: { args: withSession(["press", "ArrowDown"]) },
|
|
2963
|
+
reason: "Many searchable comboboxes open their option list with ArrowDown after focus.",
|
|
2964
|
+
safety: "Use only when the focused combobox is still the intended control, then re-snapshot before selecting.",
|
|
2965
|
+
tool: "agent_browser",
|
|
2966
|
+
},
|
|
2967
|
+
{
|
|
2968
|
+
id: "try-open-combobox-with-enter",
|
|
2969
|
+
params: { args: withSession(["press", "Enter"]) },
|
|
2970
|
+
reason: "Some comboboxes open or confirm their option list with Enter after focus.",
|
|
2971
|
+
safety: "Enter may select a highlighted/default option; prefer ArrowDown first unless Enter is the app's expected opener.",
|
|
2972
|
+
tool: "agent_browser",
|
|
2973
|
+
},
|
|
2974
|
+
];
|
|
2975
|
+
}
|
|
2976
|
+
|
|
2977
|
+
function formatComboboxFocusDiagnosticText(diagnostic: ComboboxFocusDiagnostic | undefined): string | undefined {
|
|
2978
|
+
if (!diagnostic) return undefined;
|
|
2979
|
+
const label = diagnostic.activeElement.name ? ` (${diagnostic.activeElement.name})` : "";
|
|
2980
|
+
return [
|
|
2981
|
+
`Combobox diagnostic: focused combobox did not expose visible options${label}.`,
|
|
2982
|
+
`Reason: ${diagnostic.message}`,
|
|
2983
|
+
...diagnostic.recommendations.map((recommendation) => `- ${recommendation}`),
|
|
2984
|
+
].join("\n");
|
|
2985
|
+
}
|
|
2986
|
+
|
|
2987
|
+
function getRecordStartLikeCommand(command: string | undefined, commandTokens: string[]): RecordingDependencyWarning["command"] | undefined {
|
|
2988
|
+
if (command !== "record") return undefined;
|
|
2989
|
+
const subcommand = commandTokens[1]?.toLowerCase();
|
|
2990
|
+
if (subcommand === "start") return "record start";
|
|
2991
|
+
if (subcommand === "restart") return "record restart";
|
|
2992
|
+
return undefined;
|
|
2993
|
+
}
|
|
2994
|
+
|
|
2995
|
+
async function executableExistsOnPath(command: string): Promise<boolean> {
|
|
2996
|
+
const pathValue = process.env.PATH ?? "";
|
|
2997
|
+
const extensions = process.platform === "win32"
|
|
2998
|
+
? (process.env.PATHEXT ?? ".EXE;.CMD;.BAT;.COM").split(";").filter(Boolean)
|
|
2999
|
+
: [""];
|
|
3000
|
+
for (const directory of pathValue.split(delimiter).filter(Boolean)) {
|
|
3001
|
+
for (const extension of extensions) {
|
|
3002
|
+
try {
|
|
3003
|
+
const candidate = join(directory, `${command}${extension}`);
|
|
3004
|
+
await access(candidate, fsConstants.X_OK);
|
|
3005
|
+
if ((await stat(candidate)).isFile()) return true;
|
|
3006
|
+
} catch {
|
|
3007
|
+
// Try the next candidate.
|
|
3008
|
+
}
|
|
3009
|
+
}
|
|
3010
|
+
}
|
|
3011
|
+
return false;
|
|
3012
|
+
}
|
|
3013
|
+
|
|
3014
|
+
async function collectRecordingDependencyWarning(options: {
|
|
3015
|
+
command: string | undefined;
|
|
3016
|
+
commandTokens: string[];
|
|
3017
|
+
succeeded: boolean;
|
|
3018
|
+
}): Promise<RecordingDependencyWarning | undefined> {
|
|
3019
|
+
if (!options.succeeded) return undefined;
|
|
3020
|
+
const recordCommand = getRecordStartLikeCommand(options.command, options.commandTokens);
|
|
3021
|
+
if (!recordCommand) return undefined;
|
|
3022
|
+
if (await executableExistsOnPath("ffmpeg")) return undefined;
|
|
3023
|
+
return {
|
|
3024
|
+
command: recordCommand,
|
|
3025
|
+
dependency: "ffmpeg",
|
|
3026
|
+
message: `${recordCommand} can begin recording, but record stop needs ffmpeg on PATH to encode the WebM output.`,
|
|
3027
|
+
reason: "ffmpeg-missing-for-recording",
|
|
3028
|
+
recommendations: [
|
|
3029
|
+
"Install ffmpeg before relying on this recording workflow; on macOS with Homebrew, brew install ffmpeg or brew install ffmpeg-full.",
|
|
3030
|
+
"If ffmpeg was just installed, restart pi or ensure the PATH visible to pi includes the ffmpeg binary before running record stop.",
|
|
3031
|
+
],
|
|
3032
|
+
};
|
|
3033
|
+
}
|
|
3034
|
+
|
|
3035
|
+
function formatRecordingDependencyWarningText(warning: RecordingDependencyWarning | undefined): string | undefined {
|
|
3036
|
+
if (!warning) return undefined;
|
|
3037
|
+
return [
|
|
3038
|
+
"Recording dependency warning: ffmpeg not found on PATH.",
|
|
3039
|
+
`Reason: ${warning.message}`,
|
|
3040
|
+
...warning.recommendations.map((recommendation) => `- ${recommendation}`),
|
|
3041
|
+
].join("\n");
|
|
3042
|
+
}
|
|
3043
|
+
|
|
2690
3044
|
function getSnapshotRefRecord(data: unknown): Record<string, unknown> | undefined {
|
|
2691
3045
|
return isRecord(data) && isRecord(data.refs) ? data.refs : undefined;
|
|
2692
3046
|
}
|
|
@@ -3357,10 +3711,19 @@ async function closeManagedSession(options: { cwd: string; sessionName: string;
|
|
|
3357
3711
|
}
|
|
3358
3712
|
}
|
|
3359
3713
|
|
|
3714
|
+
function getInstalledDocsPaths(): { readmePath: string; commandReferencePath: string; toolContractPath: string } {
|
|
3715
|
+
const packageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..", "..");
|
|
3716
|
+
return {
|
|
3717
|
+
readmePath: join(packageRoot, "README.md"),
|
|
3718
|
+
commandReferencePath: join(packageRoot, "docs", "COMMAND_REFERENCE.md"),
|
|
3719
|
+
toolContractPath: join(packageRoot, "docs", "TOOL_CONTRACT.md"),
|
|
3720
|
+
};
|
|
3721
|
+
}
|
|
3722
|
+
|
|
3360
3723
|
export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
3361
3724
|
const ephemeralSessionSeed = createEphemeralSessionSeed();
|
|
3362
3725
|
const hasBraveApiKey = hasUsableBraveApiKey();
|
|
3363
|
-
const toolPromptGuidelines = buildToolPromptGuidelines({ includeBraveSearch: hasBraveApiKey });
|
|
3726
|
+
const toolPromptGuidelines = buildToolPromptGuidelines({ includeBraveSearch: hasBraveApiKey, docs: getInstalledDocsPaths() });
|
|
3364
3727
|
const implicitSessionIdleTimeoutMs = String(getImplicitSessionIdleTimeoutMs());
|
|
3365
3728
|
const implicitSessionCloseTimeoutMs = getImplicitSessionCloseTimeoutMs();
|
|
3366
3729
|
let managedSessionActive = false;
|
|
@@ -3762,6 +4125,14 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
3762
4125
|
}
|
|
3763
4126
|
}
|
|
3764
4127
|
const redactedProcessArgs = redactInvocationArgs(processArgs);
|
|
4128
|
+
const shouldProbeScrollNoop = executionPlan.commandInfo.command === "scroll" && executionPlan.startupScopedFlags.length === 0;
|
|
4129
|
+
const scrollPositionBefore = shouldProbeScrollNoop
|
|
4130
|
+
? await collectScrollPositionSnapshot({
|
|
4131
|
+
cwd: ctx.cwd,
|
|
4132
|
+
sessionName: executionPlan.sessionName,
|
|
4133
|
+
signal,
|
|
4134
|
+
})
|
|
4135
|
+
: undefined;
|
|
3765
4136
|
|
|
3766
4137
|
onUpdate?.({
|
|
3767
4138
|
content: [{ type: "text", text: `Running agent-browser ${buildInvocationPreview(redactedProcessArgs)}` }],
|
|
@@ -4015,6 +4386,31 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
4015
4386
|
signal,
|
|
4016
4387
|
});
|
|
4017
4388
|
}
|
|
4389
|
+
const comboboxFocusDiagnostic = succeeded
|
|
4390
|
+
? await collectComboboxFocusDiagnostic({
|
|
4391
|
+
command: executionPlan.commandInfo.command,
|
|
4392
|
+
commandTokens,
|
|
4393
|
+
cwd: ctx.cwd,
|
|
4394
|
+
semanticAction: compiledSemanticAction,
|
|
4395
|
+
sessionName: executionPlan.sessionName,
|
|
4396
|
+
signal,
|
|
4397
|
+
})
|
|
4398
|
+
: undefined;
|
|
4399
|
+
const recordingDependencyWarning = await collectRecordingDependencyWarning({
|
|
4400
|
+
command: executionPlan.commandInfo.command,
|
|
4401
|
+
commandTokens,
|
|
4402
|
+
succeeded,
|
|
4403
|
+
});
|
|
4404
|
+
const scrollNoopDiagnostic = succeeded && shouldProbeScrollNoop
|
|
4405
|
+
? buildScrollNoopDiagnostic(
|
|
4406
|
+
scrollPositionBefore,
|
|
4407
|
+
await collectScrollPositionSnapshot({
|
|
4408
|
+
cwd: ctx.cwd,
|
|
4409
|
+
sessionName: executionPlan.sessionName,
|
|
4410
|
+
signal,
|
|
4411
|
+
}),
|
|
4412
|
+
)
|
|
4413
|
+
: undefined;
|
|
4018
4414
|
let currentRefSnapshot: SessionRefSnapshot | undefined;
|
|
4019
4415
|
if (executionPlan.sessionName) {
|
|
4020
4416
|
const activeSessionTabTargetState = sessionTabTargets.get(executionPlan.sessionName);
|
|
@@ -4242,6 +4638,12 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
4242
4638
|
if (selectorTextVisibilityDiagnostics.length > 0) {
|
|
4243
4639
|
(nextActions ??= []).push(...buildSelectorTextVisibilityNextActions({ diagnostics: selectorTextVisibilityDiagnostics, sessionName: executionPlan.sessionName }));
|
|
4244
4640
|
}
|
|
4641
|
+
if (scrollNoopDiagnostic) {
|
|
4642
|
+
(nextActions ??= []).push(...buildScrollNoopNextActions(executionPlan.sessionName));
|
|
4643
|
+
}
|
|
4644
|
+
if (comboboxFocusDiagnostic) {
|
|
4645
|
+
(nextActions ??= []).push(...buildComboboxFocusNextActions(executionPlan.sessionName));
|
|
4646
|
+
}
|
|
4245
4647
|
if (categoryDetails.failureCategory === "stale-ref" && redactedCompiledSemanticAction) {
|
|
4246
4648
|
(nextActions ??= []).push({
|
|
4247
4649
|
id: "retry-semantic-action-after-stale-ref",
|
|
@@ -4251,6 +4653,9 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
4251
4653
|
tool: "agent_browser" as const,
|
|
4252
4654
|
});
|
|
4253
4655
|
}
|
|
4656
|
+
const pageChangeSummary = (scrollNoopDiagnostic || comboboxFocusDiagnostic) && presentation.pageChangeSummary
|
|
4657
|
+
? { ...presentation.pageChangeSummary, nextActionIds: nextActions?.map((action) => action.id) }
|
|
4658
|
+
: presentation.pageChangeSummary;
|
|
4254
4659
|
const details = {
|
|
4255
4660
|
args: redactedArgs,
|
|
4256
4661
|
compiledJob: redactedCompiledJob,
|
|
@@ -4284,8 +4689,11 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
4284
4689
|
imagePath: presentation.imagePath,
|
|
4285
4690
|
imagePaths: presentation.imagePaths,
|
|
4286
4691
|
nextActions,
|
|
4287
|
-
pageChangeSummary
|
|
4692
|
+
pageChangeSummary,
|
|
4288
4693
|
overlayBlockers: overlayBlockerDiagnostic,
|
|
4694
|
+
comboboxFocus: comboboxFocusDiagnostic,
|
|
4695
|
+
recordingDependencyWarning,
|
|
4696
|
+
scrollNoop: scrollNoopDiagnostic,
|
|
4289
4697
|
qaPreset,
|
|
4290
4698
|
selectorTextVisibility: selectorTextVisibilityDiagnostics[0],
|
|
4291
4699
|
selectorTextVisibilityAll: selectorTextVisibilityDiagnostics.length > 1 ? selectorTextVisibilityDiagnostics : undefined,
|
|
@@ -4313,11 +4721,14 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
4313
4721
|
const semanticActionCandidateText = nextActions ? formatSemanticActionCandidateText(nextActions) : undefined;
|
|
4314
4722
|
const overlayBlockerText = overlayBlockerDiagnostic ? formatOverlayBlockerText(overlayBlockerDiagnostic) : undefined;
|
|
4315
4723
|
const selectorTextVisibilityText = formatSelectorTextVisibilityText(selectorTextVisibilityDiagnostics);
|
|
4724
|
+
const scrollNoopDiagnosticText = formatScrollNoopDiagnosticText(scrollNoopDiagnostic);
|
|
4725
|
+
const comboboxFocusDiagnosticText = formatComboboxFocusDiagnosticText(comboboxFocusDiagnostic);
|
|
4726
|
+
const recordingDependencyWarningText = formatRecordingDependencyWarningText(recordingDependencyWarning);
|
|
4316
4727
|
const evalStdinHintText = formatEvalStdinHintText(evalStdinHint);
|
|
4317
4728
|
const artifactCleanupText = formatArtifactCleanupGuidanceText(artifactCleanup);
|
|
4318
4729
|
const timeoutPartialProgressText = timeoutPartialProgress ? formatTimeoutPartialProgressText(timeoutPartialProgress) : undefined;
|
|
4319
4730
|
const managedSessionOutcomeText = formatManagedSessionOutcomeText(managedSessionOutcome);
|
|
4320
|
-
const rawAppendedDiagnosticText = [semanticActionCandidateText, overlayBlockerText, selectorTextVisibilityText, evalStdinHintText, artifactCleanupText, timeoutPartialProgressText, managedSessionOutcomeText].filter((item): item is string => item !== undefined).join("\n\n");
|
|
4731
|
+
const rawAppendedDiagnosticText = [semanticActionCandidateText, overlayBlockerText, selectorTextVisibilityText, scrollNoopDiagnosticText, comboboxFocusDiagnosticText, recordingDependencyWarningText, evalStdinHintText, artifactCleanupText, timeoutPartialProgressText, managedSessionOutcomeText].filter((item): item is string => item !== undefined).join("\n\n");
|
|
4321
4732
|
const appendedDiagnosticText = redactSensitiveText(redactExactSensitiveText(rawAppendedDiagnosticText, exactSensitiveValues));
|
|
4322
4733
|
const shouldAppendDiagnosticText = appendedDiagnosticText.length > 0 && (!userRequestedJson || plainTextInspection);
|
|
4323
4734
|
const content = shouldAppendDiagnosticText && redactedContent[0]?.type === "text"
|
|
@@ -13,6 +13,10 @@ export const TOOL_PROMPT_GUIDELINES_PREFIX = [
|
|
|
13
13
|
"Use agent_browser whenever the task requires a real browser or live web content.",
|
|
14
14
|
] as const;
|
|
15
15
|
|
|
16
|
+
export function buildInstalledDocsGuideline(paths: { readmePath: string; commandReferencePath: string; toolContractPath: string }): string {
|
|
17
|
+
return `For deeper guidance without bloating context, read installed package docs on demand: ${paths.readmePath} for setup/external dependencies, ${paths.commandReferencePath} for command workflows, and ${paths.toolContractPath} for result/details contracts. Do not load the full command reference unless needed; prefer targeted sections.`;
|
|
18
|
+
}
|
|
19
|
+
|
|
16
20
|
export const QUICK_START_GUIDELINES = [
|
|
17
21
|
"Quick start mental model: use exactly one of args (exact agent-browser CLI args after the binary), semanticAction (a thin find-locator shorthand compiled to find argv), job (a constrained short-workflow schema compiled to batch), qa (a lightweight QA preset built on job/batch), or the experimental sourceLookup / networkSourceLookup helpers (each compiled to batch); stdin is only for batch, eval --stdin, auth save --password-stdin, and wrapper-generated batch stdin from job, qa, sourceLookup, or networkSourceLookup, and other command/stdin combinations are rejected before launch; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device state.",
|
|
18
22
|
"There is no first-class reusable named browser recipe runtime above top-level job, the qa preset, and raw batch stdin; keep recurring flows in documentation examples or those inputs (closed RQ-0068; see docs/ARCHITECTURE.md#no-reusable-recipe-layer-yet).",
|
|
@@ -47,6 +51,7 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
|
|
|
47
51
|
"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
|
|
48
52
|
"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
|
|
49
53
|
"For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.",
|
|
54
|
+
"On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, prefer scrollintoview <@ref> or target the actual scrollable region. For comboboxes, a click/semanticAction may only focus the field; re-snapshot and fall back to type, press Enter/arrow keys, select, or visible option refs.",
|
|
50
55
|
"When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
|
|
51
56
|
"When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel. Prefer plain expressions like ({ title: document.title }) or explicitly invoked functions like (() => ({ title: document.title }))(); if a function-shaped snippet returns {}, details.evalStdinHint may warn that the function was serialized instead of called. If get text on a CSS selector surfaces details.selectorTextVisibility or selectorTextVisibilityAll, prefer a visible @ref, a more specific selector, or the inspect-visible-text-candidates nextAction over hidden tab content.",
|
|
52
57
|
"When details.pageChangeSummary is present, use changeType and summary as a compact signal for navigation, DOM mutation, confirmations, or artifacts; when nextActionIds is set, match those ids to entries in details.nextActions (or per-step nextActions inside batch) for concrete follow-up payloads instead of inferring from prose alone. If a no-navigation click surfaces details.overlayBlockers, inspect the fresh snapshot evidence before using a close/dismiss candidate nextAction; ordinary page chrome without dialog/alertdialog evidence should not trigger this diagnostic.",
|
|
@@ -83,11 +88,26 @@ export function buildSharedBrowserPlaybookGuidelines(options: { includeBraveSear
|
|
|
83
88
|
];
|
|
84
89
|
}
|
|
85
90
|
|
|
86
|
-
|
|
91
|
+
const RUNTIME_PROMPT_GUIDELINES = [
|
|
92
|
+
"Use exactly one input mode: args, semanticAction, job, qa, sourceLookup, or networkSourceLookup. Use stdin only for batch, eval --stdin, auth save --password-stdin, or wrapper-generated batch modes.",
|
|
93
|
+
"Common flow: open, snapshot -i, interact with current @refs or semanticAction, then re-snapshot after navigation, scrolling, rerenders, or DOM changes.",
|
|
94
|
+
"Prefer stable locators for visible text/names: semanticAction or upstream find with role/text/label/placeholder/alt/title/testid. Use current @refs only from the latest same-page snapshot.",
|
|
95
|
+
"Use sessionMode=fresh for launch-scoped state such as --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, providers, or iOS devices; otherwise let the implicit session carry continuity.",
|
|
96
|
+
"For artifacts, read visible metadata and details.artifactVerification before using files. record stop needs ffmpeg on PATH. close does not delete saved files; cleanup is host-owned.",
|
|
97
|
+
"When details.nextActions is present, prefer those exact follow-up payloads over prose or guessed selectors.",
|
|
98
|
+
"For dense snapshots, check Omitted high-value controls and details.data.highValueControlRefIds before opening large spill files.",
|
|
99
|
+
"For dashboards, verify scroll with screenshot/snapshot; if nothing moved, use scrollintoview <@ref> or target the real scroll region. Combobox clicks may only focus; re-snapshot and fall back to type, Enter/arrows, select, or option refs.",
|
|
100
|
+
"For extraction, prefer get title/url/text/html/value/attr/count or eval --stdin that returns a value; do not rely on console.log. If selector visibility warnings appear, prefer visible @refs or nextActions.",
|
|
101
|
+
"For non-core debugging, pass upstream commands through args: network, diff, trace/profiler/record, console/errors, stream, dashboard, chat, react, vitals, pushstate, dialog, frame, tab.",
|
|
102
|
+
] as const;
|
|
103
|
+
|
|
104
|
+
export function buildToolPromptGuidelines(options: { includeBraveSearch: boolean; docs?: { readmePath: string; commandReferencePath: string; toolContractPath: string } }): string[] {
|
|
87
105
|
return [
|
|
88
106
|
...TOOL_PROMPT_GUIDELINES_PREFIX,
|
|
89
|
-
...
|
|
90
|
-
...
|
|
91
|
-
...
|
|
107
|
+
...(options.docs ? [buildInstalledDocsGuideline(options.docs)] : []),
|
|
108
|
+
...RUNTIME_PROMPT_GUIDELINES,
|
|
109
|
+
...(options.includeBraveSearch ? [BRAVE_SEARCH_PROMPT_GUIDELINE] : []),
|
|
110
|
+
TOOL_PROMPT_GUIDELINES_SUFFIX[0],
|
|
111
|
+
TOOL_PROMPT_GUIDELINES_SUFFIX[1],
|
|
92
112
|
];
|
|
93
113
|
}
|
|
@@ -123,9 +123,9 @@ const GLOBAL_BOOLEAN_FLAGS_WITH_OPTIONAL_VALUES = new Set([
|
|
|
123
123
|
"-v",
|
|
124
124
|
]);
|
|
125
125
|
const SENSITIVE_QUERY_PARAM_PATTERN =
|
|
126
|
-
/^(?:access(?:_|-)?token|api(?:_|-)?key|auth|authorization|bearer|client(?:_|-)?secret|code|cookie|id(?:_|-)?token|key|pass(?:word)?|refresh(?:_|-)?token|secret|session(?:_|-)?id|sig(?:nature)?|token)$/i;
|
|
126
|
+
/^(?:access(?:_|-)?token|api(?:_|-)?key|auth|authorization|bearer|client(?:_|-)?secret|code|cookie|id(?:_|-)?token|key|pass(?:word)?|refresh(?:_|-)?token|secret|sentry(?:_|-)?key|session(?:_|-)?id|sig(?:nature)?|token|write(?:_|-)?key)$/i;
|
|
127
127
|
const SENSITIVE_FIELD_NAME_PATTERN =
|
|
128
|
-
/^(?:access(?:_|-)?token|api(?:_|-)?key|auth(?:orization)?|bearer|client(?:_|-)?secret|cookie|id(?:_|-)?token|pass(?:word)?|proxy(?:_|-)?authorization|refresh(?:_|-)?token|secret|session(?:_|-)?id|set(?:_|-)?cookie|sig(?:nature)?|token|x(?:_|-)?api(?:_|-)?key)$/i;
|
|
128
|
+
/^(?:access(?:_|-)?token|api(?:_|-)?key|auth(?:orization)?|bearer|client(?:_|-)?secret|cookie|id(?:_|-)?token|pass(?:word)?|proxy(?:_|-)?authorization|refresh(?:_|-)?token|secret|sentry(?:_|-)?key|session(?:_|-)?id|set(?:_|-)?cookie|sig(?:nature)?|token|write(?:_|-)?key|x(?:_|-)?api(?:_|-)?key)$/i;
|
|
129
129
|
|
|
130
130
|
const VALUE_FLAGS = new Set([
|
|
131
131
|
"--session",
|
package/package.json
CHANGED