pi-agent-browser-native 0.2.27 → 0.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.29 - 2026-05-18
4
+
5
+ ### Changed
6
+ - Updated the local pi package baseline to `@earendil-works/*` `0.75.3`, including the Node.js `>=22.19.0` runtime floor and refreshed npm lockfile.
7
+ - Removed tracked CueLoop runtime state from the repository and ignored local `.cueloop/` artifacts.
8
+
9
+
10
+ ## 0.2.28 - 2026-05-15
11
+
12
+ ### Added
13
+ - Compact runtime guidance now points agents to the installed package's `README.md`, `docs/COMMAND_REFERENCE.md`, and `docs/TOOL_CONTRACT.md` for on-demand detail instead of injecting the full browser playbook into every browser-oriented turn.
14
+ - Successful top-level `scroll` calls can now report `details.scrollNoop`, visible no-op scroll diagnostics, and exact snapshot/screenshot recovery `nextActions` when wrapper-side probes show the viewport and sampled scroll containers did not move.
15
+ - Successful explicit combobox-targeted actions can now report `details.comboboxFocus` and exact `snapshot -i`, `press ArrowDown`, and `press Enter` recovery `nextActions` when a focused combobox has explicit `aria-expanded` state but no visible options, including after active-session semanticAction role/name clicks resolve through current visible `@ref`s.
16
+ - Successful `record start` / `record restart` calls now warn early with `details.recordingDependencyWarning` when executable `ffmpeg` is missing from the Pi process `PATH`, so agents can fix recording prerequisites before `record stop` needs to encode the WebM.
17
+ - `docs/RELEASE.md` now includes a repeatable public Grafana Play stress checklist for dense-dashboard release dogfood without bundling private dogfood/VFR skills or adding a recipe runtime.
18
+
19
+ ### Fixed
20
+ - Network request redaction now treats secret-like query and field names such as `sentry_key` and `writeKey` as sensitive in model-visible summaries and details.
21
+ - README and command-reference setup notes now call out `ffmpeg` as the external dependency required for recording workflows.
22
+
3
23
  ## 0.2.27 - 2026-05-14
4
24
 
5
25
  ### Fixed
package/README.md CHANGED
@@ -65,6 +65,9 @@ The result is optimized for agent work:
65
65
  | Stale `@eN` refs fail mysteriously | Records per-session `details.refSnapshot`, rejects mismatched URLs / unknown refs / unsafe `batch` stdin ordering before spawn, adds recovery guidance to rerun `snapshot -i` or use stable `find` locators | `extensions/agent-browser/index.ts`, `test/agent-browser.results.test.ts`, `test/agent-browser.extension-validation.test.ts` |
66
66
  | Agents need stable success/failure buckets | Exposes bounded `resultCategory`, `successCategory`, and `failureCategory` on tool `details` for branching without parsing prose | [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#details), `extensions/agent-browser/lib/results/shared.ts`, `test/agent-browser.results.test.ts` |
67
67
  | Models re-snapshot after every click without new URL/title context | Adds optional `details.pageChangeSummary` (and per-batch-step summaries) with `changeType`, compact text, optional `title`/`url`, artifact hints, and `nextActionIds` aligned to `nextActions`; no-navigation clicks can also surface evidence-backed `details.overlayBlockers` candidates | [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#details), `extensions/agent-browser/lib/results/presentation.ts`, `test/agent-browser.presentation.test.ts` |
68
+ | Dashboard scroll commands can look successful while nothing moves | Samples viewport and prominent scroll-container positions around top-level `scroll` calls; unchanged positions produce `details.scrollNoop`, visible recovery guidance, and exact `nextActions` for snapshot/screenshot verification | [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#details), [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#core-page-and-element-commands), `test/agent-browser.extension-validation.test.ts` |
69
+ | Combobox clicks can focus the field without opening options | For explicit combobox-targeted actions, detects focused combobox-like controls with explicit `aria-expanded` state but no visible options and returns `details.comboboxFocus` plus exact `nextActions` for snapshot, ArrowDown, and Enter recovery | [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#details), [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#core-page-and-element-commands), `test/agent-browser.extension-validation.test.ts` |
70
+ | Recording workflows fail late when `ffmpeg` is missing | After successful `record start` / `record restart`, warns when `ffmpeg` is not on `PATH` so agents can install or fix PATH before `record stop` | [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#details), [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#diagnostics-performance-and-recording), `test/agent-browser.extension-validation.test.ts` |
68
71
  | Direct binary help may be blocked in agent sessions | Publishes a repo-readable command reference and verifies it against the target upstream version | `npm run verify` |
69
72
  | Agents need bundled `skills` text without touching the live session | Treats `skills list`, `skills get …`, and `skills path …` as stateless JSON reads: no implicit managed `--session` under default `sessionMode: "auto"` (same session-ownership goal as plain-text `--help` / `--version`), while provider workflows stay thin passthroughs that require upstream setup and credentials | [`docs/COMMAND_REFERENCE.md`](docs/COMMAND_REFERENCE.md#built-in-skills), `extensions/agent-browser/lib/runtime.ts` |
70
73
 
@@ -75,6 +78,17 @@ Install upstream `agent-browser` first and make sure it is on `PATH`:
75
78
  - https://agent-browser.dev/
76
79
  - https://github.com/vercel-labs/agent-browser
77
80
 
81
+ Optional external tools unlock the full command surface:
82
+
83
+ | Dependency | Required for | macOS install example |
84
+ | --- | --- | --- |
85
+ | `agent-browser` | All browser automation through this extension | See upstream install docs |
86
+ | `ffmpeg` | `record stop` WebM encoding after `record start` / `record restart` | `brew install ffmpeg` or `brew install ffmpeg-full` |
87
+
88
+ Keep both binaries on `PATH`. `record start` can begin without a file on disk, but `record stop` needs `ffmpeg` to encode the WebM.
89
+
90
+ The native tool also gives agents absolute installed-package doc paths in its compact runtime guidance. Agents should read `README.md` for setup/dependencies, `docs/COMMAND_REFERENCE.md` for targeted command workflows, and `docs/TOOL_CONTRACT.md` for result/detail contracts only when deeper guidance is needed.
91
+
78
92
  Then install this Pi package:
79
93
 
80
94
  ```bash
@@ -421,7 +435,7 @@ pi --no-extensions -e .
421
435
 
422
436
  This bypasses Pi settings and configured extensions. After editing extension code, restart that Pi process to test the new checkout.
423
437
 
424
- For a concrete expanded native-tool smoke matrix (version/help/skills through dashboard/chat families), see [Local development validation](docs/RELEASE.md#local-development-validation) in `docs/RELEASE.md`.
438
+ For a concrete expanded native-tool smoke matrix (version/help/skills through dashboard/chat families), see [Local development validation](docs/RELEASE.md#local-development-validation) in `docs/RELEASE.md`. When changes affect dense dashboards, diagnostics, artifacts, recording, scroll, or combobox behavior, use the public [Grafana stress checklist](docs/RELEASE.md#public-grafana-stress-checklist) for repeatable release dogfood without bundling private skills or recipes.
425
439
 
426
440
  Configured-source lifecycle validation:
427
441
 
@@ -246,7 +246,7 @@ Prefer `download <selector> <path>` when the target element itself is the downlo
246
246
  Wrapper result rendering is metadata-first for saved files:
247
247
  - screenshots return a saved-path summary, visible artifact metadata, structured `details.artifacts` metadata, and an inline image attachment when safe; the visible block includes artifact type, requested path, absolute path, existence, size, cwd, session, and repair/copy status when applicable
248
248
  - downloads, PDFs, `wait --download` files, `state save` state files, diff screenshot output images, traces, CPU profiles, completed WebM recordings from `record stop`, and path-bearing HAR captures return concise saved-path summaries plus structured `details.artifacts` metadata without inlining large files
249
- - `record start <path>` reports that recording started and that output will be written on `record stop`; the target file may not exist until recording stops
249
+ - `record start <path>` reports that recording started and that output will be written on `record stop`; the target file may not exist until recording stops, and upstream needs `ffmpeg` on `PATH` at stop time to encode the WebM. If `ffmpeg` is missing after a successful `record start` / `record restart`, the wrapper appends `Recording dependency warning: ffmpeg not found on PATH` and sets `details.recordingDependencyWarning` without blocking the upstream command.
250
250
  - `batch` keeps each step's artifacts in `details.batchSteps[].artifacts` and aggregates them in top-level `details.artifacts` in step order
251
251
 
252
252
  `diff screenshot` follows the file-artifact path above for the **diff** image: model-visible text and `details.artifacts` focus on that output, while baseline paths stay out of the artifact summary block, and Pi does **not** auto-inline the diff the way it inlines trusted `screenshot` captures. `state load` may print the loaded path in prose but does not add a saved-file artifact entry the way `state save` does.
@@ -379,6 +379,10 @@ Session note: `skills list`, `skills get …`, and `skills path …` are **state
379
379
  | `connect <port|url>` | Connect to a browser through CDP. |
380
380
  | `close [--all]` | Close the current browser or all sessions. |
381
381
 
382
+ On dashboards and other apps with nested scroll containers, `scroll <dir> [px]` may report a successful wheel action while the viewport appears unchanged because the page-level scroller was not the one containing the content. For top-level `scroll` calls without startup-scoped launch flags, the wrapper samples viewport and prominent scroll-container positions before and after the command; when nothing changes it appends `Scroll diagnostic: no observed scroll movement`, exposes `details.scrollNoop`, and adds exact `details.nextActions` for a fresh `snapshot -i` and screenshot. Use those before repeating page scrolls; when you need a specific panel, prefer `scrollintoview <@ref>` or a scoped interaction with the actual scrollable region.
383
+
384
+ Comboboxes vary by app. A `click` or `semanticAction` role/name click may focus a searchable combobox without opening its option list. For explicit combobox-targeted actions such as `semanticAction` role `combobox`, the wrapper checks whether a combobox-like element is focused, has explicit `aria-expanded` state, and has no visible listbox/options open; this still applies when the semantic action first resolves to a current visible `@ref` before execution. When that happens it appends `Combobox diagnostic: focused combobox did not expose visible options`, exposes `details.comboboxFocus`, and adds exact `details.nextActions` for a fresh `snapshot -i`, `press ArrowDown`, and `press Enter`. Use those instead of assuming click alone expanded the control; prefer visible option refs or `select` when options are exposed.
385
+
382
386
  ### Navigation
383
387
 
384
388
  | Command | Purpose |
@@ -479,8 +483,8 @@ When a snapshot is too large for inline output, the Pi wrapper renders a compact
479
483
  | `diff url <u1> <u2>` | Compare two pages. |
480
484
  | `trace start|stop [path]` | Record a Chrome DevTools trace. |
481
485
  | `profiler start|stop [path]` | Record a Chrome DevTools profile. |
482
- | `record start <path> [url]` | Start WebM video recording; output is written on `record stop`. |
483
- | `record stop` | Stop and save video. |
486
+ | `record start <path> [url]` | Start WebM video recording; output is written on `record stop`. Requires `ffmpeg` on `PATH` for the final encode. |
487
+ | `record stop` | Stop and save video. If this fails with `ffmpeg not found`, install `ffmpeg` / `ffmpeg-full` and rerun the recording. |
484
488
  | `record restart <path> [url]` | Stop any current recording and start a new WebM recording. |
485
489
  | `console [--clear]` | View or clear console logs. |
486
490
  | `errors [--clear]` | View or clear page errors. |
@@ -499,7 +503,7 @@ When a snapshot is too large for inline output, the Pi wrapper renders a compact
499
503
  | `pushstate <url>` | Perform SPA client-side navigation; detects Next.js router pushes and falls back to history navigation events. |
500
504
  | `removeinitscript <id>` | Remove an init script registered through upstream init-script mechanisms. |
501
505
 
502
- When these diagnostic commands are invoked through the native `agent_browser` tool, structured console, page-error, React, Web Vitals, and SPA outputs render as compact summaries when possible, with large outputs previewed and spilled instead of dumped into context. Large outputs are previewed with a `Full output path:` spill file instead of dumping the entire payload into context. Artifact-producing commands such as `network har stop`, `diff screenshot`, `trace stop`, `profiler stop`, and `record stop` report `details.artifacts[]` plus `details.artifactVerification`; `record start` is reported as pending until `record stop` completes.
506
+ When these diagnostic commands are invoked through the native `agent_browser` tool, structured console, page-error, React, Web Vitals, and SPA outputs render as compact summaries when possible, with large outputs previewed and spilled instead of dumped into context. Large outputs are previewed with a `Full output path:` spill file instead of dumping the entire payload into context. Artifact-producing commands such as `network har stop`, `diff screenshot`, `trace stop`, `profiler stop`, and `record stop` report `details.artifacts[]` plus `details.artifactVerification`; `record start` is reported as pending until `record stop` completes. For video workflows, keep `ffmpeg` on `PATH` first; on macOS with Homebrew, `brew install ffmpeg` or `brew install ffmpeg-full` is sufficient. Successful `record start` / `record restart` results warn early with `details.recordingDependencyWarning` when the wrapper cannot find `ffmpeg`, so fix PATH before `record stop` instead of discovering the missing encoder after the capture. The README install section keeps the concise external-dependency list for maximal extension use.
503
507
 
504
508
  Long-running or lifecycle commands should be explicitly paired with cleanup calls: `stream enable` → `stream disable`, `dashboard start` → `dashboard stop`, `trace start` → `trace stop`, `profiler start` → `profiler stop`, and `record start` → `record stop`. The wrapper keeps each subprocess bounded by its normal timeout; it does not keep an interactive `chat` REPL open, so prefer `chat <message>` with `--model` or `AI_GATEWAY_MODEL` for single-shot AI use.
505
509
 
package/docs/RELEASE.md CHANGED
@@ -36,7 +36,7 @@ npm run verify -- release
36
36
 
37
37
  `prepublishOnly` intentionally does **not** run `npm run verify -- lifecycle`, `npm run verify -- real-upstream`, or `npm run verify -- benchmark`; those are separate `npm run verify` modes in [`scripts/project.mjs`](../scripts/project.mjs). Treat the bullets below as the full pre-publish contract even though only the `release` slice is automated at publish time.
38
38
 
39
- Every release also requires interactive `tmux`-driven Pi dogfood with the native `agent_browser` tool against real sites. Use `pi --no-extensions -e .` from the checkout before publish, drive prompts with `tmux send-keys`, exercise at least one simple static site and one real documentation/product site, include the higher-level `qa` or `job`/`batch` surfaces when they changed, close every opened browser session, remove screenshots/temp artifacts, and record the outcome in the release notes or support-matrix evidence. Automated localhost and fake-upstream gates do not replace this human-readable live-site transcript evidence.
39
+ Every release also requires interactive `tmux`-driven Pi dogfood with the native `agent_browser` tool against real sites. Use `pi --no-extensions -e .` from the checkout before publish, drive prompts with `tmux send-keys`, exercise at least one simple static site and one real documentation/product site, include the higher-level `qa` or `job`/`batch` surfaces when they changed, close every opened browser session, remove screenshots/temp artifacts, and record the outcome in the release notes or support-matrix evidence. Automated localhost and fake-upstream gates do not replace this human-readable live-site transcript evidence. For dense-dashboard stress coverage, use the [public Grafana stress checklist](#public-grafana-stress-checklist) below; it is a maintainer workflow, not bundled product skill or recipe runtime.
40
40
 
41
41
  The configured-source lifecycle regression harness is required before release because it launches an interactive `pi` process under `tmux` and validates `/reload` plus restart/`/resume` behavior:
42
42
 
@@ -46,6 +46,29 @@ npm run verify -- lifecycle
46
46
 
47
47
  Use `npm run verify -- lifecycle --keep-artifacts` when debugging failures, then remove retained artifacts after inspection.
48
48
 
49
+ ## Public Grafana stress checklist
50
+
51
+ Use this optional-but-recommended checklist when a release touches dashboard behavior, snapshots, refs, scroll, comboboxes, artifacts, network diagnostics, recording, or prompt guidance. It keeps the useful public Grafana dogfood target repeatable without bundling private dogfood/VFR skills or adding a reusable browser recipe layer.
52
+
53
+ Target:
54
+
55
+ ```text
56
+ https://play.grafana.org/d/rYdddlPWk/node-exporter-full?orgId=1&from=now-6h&to=now&timezone=browser&var-datasource=default&var-job=node&var-node=All
57
+ ```
58
+
59
+ Minimum pass:
60
+
61
+ 1. Open the URL with the native `agent_browser` tool in a fresh session.
62
+ 2. Run `snapshot -i`; confirm the output is useful on a dense dashboard, including high-value controls and bounded spill behavior when needed.
63
+ 3. Exercise one dashboard scroll path. If page-level `scroll` does not move visible content, confirm `details.scrollNoop` / next actions or equivalent guidance points to snapshot/screenshot verification and nested-scroll recovery.
64
+ 4. Exercise one explicit combobox-targeted action such as a role/name `semanticAction` on a dashboard variable. If it only focuses the field, confirm `details.comboboxFocus` / next actions point to `snapshot -i`, `press ArrowDown`, and `press Enter` when the closed-state evidence qualifies.
65
+ 5. Capture at least one screenshot artifact and verify `details.artifactVerification` before using the file.
66
+ 6. If `ffmpeg` is on `PATH`, run a short `record start` / visible interaction / `record stop` cycle and verify the WebM artifact. If `ffmpeg` is absent, confirm `details.recordingDependencyWarning` appears after `record start` and stop before relying on recording evidence.
67
+ 7. Inspect `network requests`, `console`, and `errors` summaries. Treat Grafana Play-side noise such as analytics/Sentry requests, public-demo 403s, and console errors as site noise unless the wrapper leaks secrets, hides actionable failed rows, misclassifies artifacts, or suggests unsafe follow-ups.
68
+ 8. Close the browser session and delete temporary screenshots, HARs, recordings, and scratch reports after extracting any release evidence.
69
+
70
+ Record release evidence as a short note with: date, package/checkout source, target URL, browser command families exercised, artifacts collected and cleaned up, known Grafana-side noise observed, and any product findings converted into CueLoop tasks. Do not commit private dogfood scripts, VFR harness files, raw browser profiles, HARs, videos, or `.dogfood/` run output as product docs.
71
+
49
72
  ## Deterministic agent efficiency benchmark
50
73
 
51
74
  [`scripts/agent-browser-efficiency-benchmark.mjs`](../scripts/agent-browser-efficiency-benchmark.mjs) is an accounting-only benchmark: it does not shell out to `agent-browser`, launch a browser, or read or write Pi sessions. It models representative `agent_browser` call shapes (including optional `stdin` for `batch` and top-level `job`, `qa`, or experimental `sourceLookup` / `networkSourceLookup` objects that compile to batch) and aggregates success rate, tool-call counts, UTF-8 size of model-visible strings, stale-ref failure and recovery counts, artifact success, distinct failure-category coverage, and summed elapsed-time estimates. When extending scenarios, keep them aligned with the closed `RQ-0068` “no reusable recipe layer” rationale in [`ARCHITECTURE.md`](ARCHITECTURE.md#no-reusable-recipe-layer-yet) (benchmark ids cited there are the canonical inventory for that evidence bar).
@@ -206,7 +229,7 @@ Before publishing:
206
229
  - run `npm run doctor` and confirm any duplicate-source remediation matches the active package/checkout setup
207
230
  - run `npm run verify -- real-upstream` for upstream runtime, result-presentation, or managed-session changes
208
231
  - confirm both local-checkout modes still work for pre-release validation: isolated `pi --no-extensions -e .` smoke testing and configured-source lifecycle validation
209
- - complete interactive `tmux` live-site dogfood with `pi --no-extensions -e .` and the native `agent_browser` tool (at least one simple static site and one real documentation/product site; include `qa` or `job`/`batch` when those surfaces changed; close sessions and remove screenshots/temp artifacts; record evidence)—see [Pre-release checks](#pre-release-checks); automated gates are not a substitute
232
+ - complete interactive `tmux` live-site dogfood with `pi --no-extensions -e .` and the native `agent_browser` tool (at least one simple static site and one real documentation/product site; include `qa` or `job`/`batch` when those surfaces changed; use the [public Grafana stress checklist](#public-grafana-stress-checklist) when dashboard/diagnostic/artifact behavior changed; close sessions and remove screenshots/temp artifacts; record evidence)—see [Pre-release checks](#pre-release-checks); automated gates are not a substitute
210
233
  - rerun `npm run verify -- release`
211
234
  - run `npm run verify -- lifecycle` for configured-source `/reload` plus restart/`/resume` regression coverage (required before publish; see [Pre-release checks](#pre-release-checks))
212
235
  - confirm [`SUPPORT_MATRIX.md`](SUPPORT_MATRIX.md) still maps every current baseline inventory section to docs, runtime handling, tests, and validation status
@@ -28,7 +28,7 @@ When upstream ships a new `agent-browser` or the inventory changes:
28
28
  - Source of truth: `CAPABILITY_BASELINE.inventorySections` in the same file (stable `id` keys: `skills`, `core-commands`, `state-tabs-frames-dialogs`, `network-storage-artifacts-diagnostics`, `batch-auth-setup-ai`, `options-and-env`).
29
29
  - Status: supported for the current wrapper contract.
30
30
  - High-priority support gaps: none identified in the baseline audit.
31
- - Remaining queued work: none in the current support queue. Constrained `job` (`RQ-0064`), the lightweight `qa` preset (`RQ-0065`), the experimental `sourceLookup` helper (`RQ-0066`), and the experimental `networkSourceLookup` helper (`RQ-0067`) are implemented; see [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#job), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#qa), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sourcelookup), and [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#networksourcelookup). Reusable browser recipes (`RQ-0068`) are intentionally not adopted as a runtime surface; see [`ARCHITECTURE.md`](ARCHITECTURE.md#no-reusable-recipe-layer-yet).
31
+ - Remaining queued work: only `RQ-0084` remains active, covering the `0.2.28` npm/GitHub release after npm authentication is restored. Dogfood-driven improvements `RQ-0080` through `RQ-0083` and `RQ-0085` are implemented and are beyond the current baseline support promise for thin upstream command coverage. Constrained `job` (`RQ-0064`), the lightweight `qa` preset (`RQ-0065`), the experimental `sourceLookup` helper (`RQ-0066`), and the experimental `networkSourceLookup` helper (`RQ-0067`) are implemented; see [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#job), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#qa), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sourcelookup), and [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#networksourcelookup). Reusable browser recipes (`RQ-0068`) are intentionally not adopted as a runtime surface; see [`ARCHITECTURE.md`](ARCHITECTURE.md#no-reusable-recipe-layer-yet).
32
32
 
33
33
  ## Verification evidence
34
34
 
@@ -36,10 +36,10 @@ Re-run the gates below before each release; this table records what the closure
36
36
 
37
37
  | Gate | Evidence | Status |
38
38
  | --- | --- | --- |
39
- | Default local gate | `npm run verify` checks generated playbook drift, `tsc --noEmit`, unit/fake tests, generated command-reference blocks, and live command-reference sampling. | Pass on 2026-05-14 (`npm run verify`, `agent-browser 0.27.0` on `PATH`). |
39
+ | Default local gate | `npm run verify` checks generated playbook drift, `tsc --noEmit`, unit/fake tests, generated command-reference blocks, and live command-reference sampling. | Pass on 2026-05-15 (`npm run verify`, `agent-browser 0.27.0` on `PATH`). |
40
40
  | Real upstream contract | `npm run verify -- real-upstream` runs the localhost fixture matrix against the real installed `agent-browser` matching the baseline. | Pass on 2026-05-14 (`npm run verify -- real-upstream`). |
41
- | Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads exactly one packaged `agent_browser` tool, and executes fake-upstream `--version`. | Pass on 2026-05-14 (`npm run verify -- package-pi`). |
42
- | `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with packaged Pi smoke (`verifySteps` `release` in [`scripts/project.mjs`](../scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits lifecycle, real-upstream, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). | Aligned on 2026-05-14 with the green **Default local gate** and **Packaged Pi smoke** rows above. |
41
+ | Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads exactly one packaged `agent_browser` tool, and executes fake-upstream `--version`. | Pass on 2026-05-15 as part of `npm run verify -- release`. |
42
+ | `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with packaged Pi smoke (`verifySteps` `release` in [`scripts/project.mjs`](../scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits lifecycle, real-upstream, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). | Pass on 2026-05-15; `prepublishOnly` also passed during the blocked `npm publish` attempt before npm returned `ENEEDAUTH`. |
43
43
  | Configured-source lifecycle | `npm run verify -- lifecycle` (`scripts/verify-lifecycle.mjs`) drives `/reload`, restart, `/resume`, session continuity, slash-command sentinel tokens (`v1` then `v2` after rewriting the packaged extension to simulate pickup), and persisted spill reachability with a fake upstream on `PATH`. Passthrough flags are defined in `validatePassthrough` in [`scripts/project.mjs`](../scripts/project.mjs): `--keep-artifacts`, `--verbose`, and `--timeout-ms` plus a separate positive integer value (for example `npm run verify -- lifecycle --keep-artifacts --verbose --timeout-ms 600000`). | Pass on 2026-05-14 (`npm run verify -- lifecycle --keep-artifacts --verbose --timeout-ms 600000`) during release cleanup; retained temp artifacts were removed after inspection. Treat any future unexplained red lifecycle gate as a release blocker. |
44
44
  | Quick isolated Pi smoke | `pi --no-extensions -e .` from repo root; native `agent_browser` only. | Covered version/help/skills, open/snapshot/click, eval stdin, batch stdin, screenshot, explicit session, `sessionMode: "fresh"`, network requests, console/errors, diff snapshot, stream status/disable, dashboard start/stop, and chat credential-failure pass-through during RQ-0055; RQ-0056 cleanup spot-check found no lingering tmux or repo-local smoke artifacts. |
45
45
 
@@ -48,9 +48,9 @@ Re-run the gates below before each release; this table records what the closure
48
48
  | Baseline section | Baseline items | Documentation | Runtime handling | Test coverage | Validation status |
49
49
  | --- | --- | --- | --- | --- | --- |
50
50
  | Built-in skills | `skills list`, `skills get core`, `skills get core --full`, `skills get <name>`, `skills get electron`, `skills get slack`, `skills get dogfood`, `skills get vercel-sandbox`, `skills get agentcore`, `skills path [name]` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#built-in-skills), generated baseline block, README proof section, release docs. | `isStatelessInspectionCommand` keeps read-only `skills list` / `skills get` / `skills path` JSON inspection stateless while preserving thin upstream passthrough. | `test/agent-browser.runtime.test.ts`; `test/agent-browser.extension-validation.test.ts` skills/provider matrix; real-upstream inspection/skills group. | Supported. Real upstream covers `skills list`, `skills get core --full`, `skills path core`; fake matrix covers specialized skills. |
51
- | Core page, element, navigation, and extraction commands | `open <url>`, `click <sel>`, `dblclick <sel>`, `type <sel> <text>`, `fill <sel> <text>`, `press <key>`, `keyboard type <text>`, `keyboard inserttext <text>`, `keydown Shift`, `keyup Shift`, `hover <sel>`, `focus <sel>`, `check <sel>`, `uncheck <sel>`, `select <sel> <val...>`, `drag <src> <dst>`, `upload <sel> <files...>`, `download <sel> <path>`, `scroll <dir> [px]`, `scrollintoview <sel>`, `wait <sel|ms>`, `screenshot [path]`, `screenshot --full`, `screenshot --annotate`, `pdf <path>`, `snapshot`, `eval <js>`, `connect <port|url>`, `close [--all]`, `back`, `forward`, `reload`, `pushstate <url>`, `get <what> [selector]`, `is <what> <selector>`, `find <locator> <value> <action>`, `mouse <action> [args]`, `set <setting> [value]` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#core-page-and-element-commands), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md), README quick start. | Thin upstream passthrough with wrapper-owned `--json`, managed-session planning, stale-ref guidance, artifact verification, page-change summaries, and redaction. | Real-upstream core matrix covers representative interactions/navigation/extraction/artifacts; fake core matrix covers additional passthrough and ordering; presentation/results/runtime tests lock wrapper behavior. | Supported. Some upstream semantics remain upstream-owned; wrapper contract and artifact metadata are tested. |
51
+ | Core page, element, navigation, and extraction commands | `open <url>`, `click <sel>`, `dblclick <sel>`, `type <sel> <text>`, `fill <sel> <text>`, `press <key>`, `keyboard type <text>`, `keyboard inserttext <text>`, `keydown Shift`, `keyup Shift`, `hover <sel>`, `focus <sel>`, `check <sel>`, `uncheck <sel>`, `select <sel> <val...>`, `drag <src> <dst>`, `upload <sel> <files...>`, `download <sel> <path>`, `scroll <dir> [px]`, `scrollintoview <sel>`, `wait <sel|ms>`, `screenshot [path]`, `screenshot --full`, `screenshot --annotate`, `pdf <path>`, `snapshot`, `eval <js>`, `connect <port|url>`, `close [--all]`, `back`, `forward`, `reload`, `pushstate <url>`, `get <what> [selector]`, `is <what> <selector>`, `find <locator> <value> <action>`, `mouse <action> [args]`, `set <setting> [value]` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#core-page-and-element-commands), [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md), README quick start. | Thin upstream passthrough with wrapper-owned `--json`, managed-session planning, stale-ref guidance, artifact verification, page-change summaries, no-op scroll diagnostics, focused-combobox diagnostics, and redaction. | Real-upstream core matrix covers representative interactions/navigation/extraction/artifacts; fake core matrix covers additional passthrough and ordering plus no-op scroll and combobox-focus diagnostics; presentation/results/runtime tests lock wrapper behavior. | Supported. Some upstream semantics remain upstream-owned; wrapper contract and artifact metadata are tested. |
52
52
  | Sessions, state, tabs, frames, dialogs, and windows | `session`, `session list`, `state save <path>`, `state load <path>`, `tab list`, `tab new --label <name> [url]`, `tab <t<N>|label>`, `frame <selector|main>`, `dialog accept [text]`, `dialog dismiss`, `dialog status`, `window new` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#session-state-frames-dialogs-windows-and-inspection-commands) (session/state/tabs/frames/dialogs/windows), stateful workflow notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Stateful presentation summaries/redaction; state save artifact handling; explicit/implicit session restore; tab target pinning; frame/dialog/window passthrough. | `test/agent-browser.extension-validation.test.ts` stateful matrix; runtime session/resume tests; presentation stateful redaction tests; lifecycle harness for reload/resume. | Supported. External profile/auth state remains operator-owned and documented. |
53
- | Network, storage, artifacts, diagnostics, and performance | `network <action>`, `network route <url> [--abort|--body <json>] [--resource-type <csv>]`, `network request <requestId>`, `cookies [get|set|clear]`, `cookies set --curl <file>`, `storage <local|session>`, `diff snapshot`, `diff screenshot --baseline`, `diff url <u1> <u2>`, `trace start|stop [path]`, `profiler start|stop [path]`, `record start <path> [url]`, `record restart <path> [url]`, `record stop`, `console [--clear]`, `errors [--clear]`, `highlight <sel>`, `inspect`, `clipboard <op> [text]`, `stream enable [--port <n>]`, `stream disable`, `stream status`, `react tree`, `react inspect <id>`, `react renders start`, `react renders stop [--json]`, `react suspense [--only-dynamic] [--json]`, `vitals [url] [--json]`, `removeinitscript <id>` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage) and diagnostic sections; [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Thin passthrough plus command-specific compact diagnostic summaries, artifact metadata for HAR/diff/trace/profile/record, sensitive-data redaction, timeout bounds, and cleanup-pair guidance. | Fake non-core matrix covers network/diff/trace/profiler/record/console/errors/highlight/inspect/clipboard/stream/dashboard/chat JSON shapes and redaction; real-upstream covers safe network requests/HAR, diff, trace/profiler, console/errors/highlight, stream, vitals, and React missing-renderer. | Supported. Browser-opening or environment-sensitive operations (`inspect`, OS clipboard, full React app inspection) are delegated thinly and documented as needing suitable local/browser state. |
53
+ | Network, storage, artifacts, diagnostics, and performance | `network <action>`, `network route <url> [--abort|--body <json>] [--resource-type <csv>]`, `network request <requestId>`, `cookies [get|set|clear]`, `cookies set --curl <file>`, `storage <local|session>`, `diff snapshot`, `diff screenshot --baseline`, `diff url <u1> <u2>`, `trace start|stop [path]`, `profiler start|stop [path]`, `record start <path> [url]`, `record restart <path> [url]`, `record stop`, `console [--clear]`, `errors [--clear]`, `highlight <sel>`, `inspect`, `clipboard <op> [text]`, `stream enable [--port <n>]`, `stream disable`, `stream status`, `react tree`, `react inspect <id>`, `react renders start`, `react renders stop [--json]`, `react suspense [--only-dynamic] [--json]`, `vitals [url] [--json]`, `removeinitscript <id>` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#page-state-finding-mouse-settings-network-and-storage) and diagnostic sections; [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details). | Thin passthrough plus command-specific compact diagnostic summaries, artifact metadata for HAR/diff/trace/profile/record, early missing-ffmpeg recording warnings, sensitive-data redaction, timeout bounds, and cleanup-pair guidance. | Fake non-core matrix covers network/diff/trace/profiler/record/console/errors/highlight/inspect/clipboard/stream/dashboard/chat JSON shapes and redaction; real-upstream covers safe network requests/HAR, diff, trace/profiler, console/errors/highlight, stream, vitals, and React missing-renderer. | Supported. Browser-opening or environment-sensitive operations (`inspect`, OS clipboard, full React app inspection) are delegated thinly and documented as needing suitable local/browser state. |
54
54
  | Batch, auth, confirmations, setup, dashboard, and AI commands | `batch [--bail]`, `auth save <name>`, `auth save <name> --password-stdin`, `auth login <name>`, `auth list`, `auth show <name>`, `auth delete <name>`, `confirm <id>`, `deny <id>`, `chat <message>`, `dashboard start --port <n>`, `dashboard stop`, `install`, `install --with-deps`, `upgrade`, `doctor [--fix]`, `doctor --offline --quick`, `doctor --json`, `profiles` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#batch-auth-confirmations-sessions-chat-dashboard-and-setup), README security notes, release docs. | Batch stdin is native-tool-only; top-level `job`, `qa`, and experimental `sourceLookup` / `networkSourceLookup` compile to `batch` with generated stdin (caller `stdin` rejected for those modes); auth/confirmation details are redacted; dashboard/chat/setup/doctor are passed through thinly with timeout/cleanup guidance; package doctor remains separate and read-only. | Unit/fake tests cover batch, auth password stdin, confirmations, dashboard/chat summaries, and doctor diagnostics; extension-validation covers `job`, `qa`, `sourceLookup`, and `networkSourceLookup` compilation plus `details.sourceLookup` / `details.networkSourceLookup` evidence; [`scripts/agent-browser-efficiency-benchmark.mjs`](../scripts/agent-browser-efficiency-benchmark.mjs) includes `source-lookup-visible-element` and `network-source-lookup-failed-request` scenarios; quick isolated Pi smoke covered dashboard start/stop and chat credential-failure pass-through. | Supported. `install`, `upgrade`, `doctor --fix`, and interactive auth/chat/setup flows are upstream-owned and should be run only when the operator intends those side effects. |
55
55
  | Global flags, config, providers, policy, and environment | `--profile <name|path>`, `AGENT_BROWSER_PROFILE`, `--session <name>`, `AGENT_BROWSER_SESSION`, `--session-name <name>`, `AGENT_BROWSER_SESSION_NAME`, `--state <path>`, `AGENT_BROWSER_STATE`, `--auto-connect`, `AGENT_BROWSER_AUTO_CONNECT`, `--headers <json>`, `--init-script <path>`, `AGENT_BROWSER_INIT_SCRIPTS`, `--enable <feature>`, `AGENT_BROWSER_ENABLE`, `--executable-path <path>`, `AGENT_BROWSER_EXECUTABLE_PATH`, `--extension <path>`, `AGENT_BROWSER_EXTENSIONS`, `--args <args>`, `AGENT_BROWSER_ARGS`, `--user-agent <ua>`, `AGENT_BROWSER_USER_AGENT`, `--proxy <server>`, `AGENT_BROWSER_PROXY`, `HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `--proxy-bypass <hosts>`, `AGENT_BROWSER_PROXY_BYPASS`, `NO_PROXY`, `--ignore-https-errors`, `AGENT_BROWSER_IGNORE_HTTPS_ERRORS`, `--allow-file-access`, `AGENT_BROWSER_ALLOW_FILE_ACCESS`, `--headed`, `AGENT_BROWSER_HEADED`, `--cdp <port>`, `--color-scheme <scheme>`, `AGENT_BROWSER_COLOR_SCHEME`, `--download-path <path>`, `AGENT_BROWSER_DOWNLOAD_PATH`, `--engine <name>`, `AGENT_BROWSER_ENGINE`, `--no-auto-dialog`, `AGENT_BROWSER_NO_AUTO_DIALOG`, `--json`, `AGENT_BROWSER_JSON`, `--annotate`, `AGENT_BROWSER_ANNOTATE`, `--screenshot-dir <path>`, `AGENT_BROWSER_SCREENSHOT_DIR`, `--screenshot-quality <n>`, `AGENT_BROWSER_SCREENSHOT_QUALITY`, `--screenshot-format <fmt>`, `AGENT_BROWSER_SCREENSHOT_FORMAT`, `--content-boundaries`, `AGENT_BROWSER_CONTENT_BOUNDARIES`, `--max-output <chars>`, `AGENT_BROWSER_MAX_OUTPUT`, `--allowed-domains <list>`, `AGENT_BROWSER_ALLOWED_DOMAINS`, `--action-policy <path>`, `AGENT_BROWSER_ACTION_POLICY`, `--confirm-actions <list>`, `AGENT_BROWSER_CONFIRM_ACTIONS`, `--confirm-interactive`, `AGENT_BROWSER_CONFIRM_INTERACTIVE`, `-p, --provider <name>`, `AGENT_BROWSER_PROVIDER`, `browserbase`, `kernel`, `browseruse`, `browserless`, `agentcore`, `--device <name>`, `AGENT_BROWSER_IOS_DEVICE`, `agent-browser -p ios device list`, `agent-browser -p ios swipe up`, `agent-browser -p ios tap @e1`, `--model <name>`, `AI_GATEWAY_MODEL`, `-v, --verbose`, `-q, --quiet`, `--debug`, `AGENT_BROWSER_DEBUG`, `AGENT_BROWSER_CONFIG`, `AGENT_BROWSER_DEFAULT_TIMEOUT`, `AGENT_BROWSER_STREAM_PORT`, `AGENT_BROWSER_IDLE_TIMEOUT_MS`, `AGENT_BROWSER_ENCRYPTION_KEY`, `AGENT_BROWSER_STATE_EXPIRE_DAYS`, `AGENT_BROWSER_IOS_UDID`, `AI_GATEWAY_URL`, `AI_GATEWAY_API_KEY` | [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md#important-global-flags-config-and-environment), README provider/setup notes, [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#sessionmode), architecture/runtime docs. | Runtime handles value flags, launch-scoped flags, redacted invocation echoes, `sessionMode: "fresh"` recovery hints, explicit sessions, and provider/device launch-scoping. Process env forwards a curated allowlist/prefix set for upstream/provider credentials without cloning the whole parent env. | Runtime tests cover launch-scoped flags, provider/device planning, redaction, stateless inspections, and explicit/fresh sessions. Process tests cover provider env prefixes. Fake provider/specialized-skill matrix covers provider argv/env passthrough. Package doctor checks version/source drift. | Supported. Provider clouds, iOS/Appium, Browserbase/Kernel/BrowserUse/Browserless/AgentCore, proxies, profiles, and credentials require external setup; the wrapper documents and forwards them thinly rather than emulating provider behavior. |
56
56
 
@@ -83,3 +83,11 @@ Native `job`, `qa`, experimental `sourceLookup`, and experimental `networkSource
83
83
  `RQ-0078` improves getter/eval discoverability: `extensions/agent-browser/lib/results/presentation.ts` matches upstream failure text containing `unknown command`, `unknown subcommand`, or `unrecognized command` (case-insensitive) when the failed command token is one of `attr`, `count`, `html`, `text`, `title`, `url`, or `value`, then adds grouped-`get` prose; only `title` / `url` also emit read-only `nextActions` (`use-get-title` / `use-get-url`, with `--session` when the failed call named a session). The getter block is skipped when selector recovery already injected an `Agent-browser hint:` line into the same error string. `extensions/agent-browser/index.ts` adds `details.evalStdinHint` plus visible `Eval stdin hint` when `looksLikeFunctionEvalStdin` matches trimmed stdin and upstream JSON carries an empty-object `data.result`. Contract: [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details) (`nextActions`, `evalStdinHint`); human workflow: [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md) extraction note and README quick start; fake coverage: `buildToolPresentation suggests grouped getter commands for common unknown getter shortcuts` and `agentBrowserExtension warns when eval stdin returns an empty object from a function-shaped snippet`.
84
84
 
85
85
  `RQ-0079` clarifies artifact lifecycle and cleanup ownership: `extensions/agent-browser/index.ts` adds `details.artifactCleanup` and visible `Artifact lifecycle` copy on successful `close` when `artifactManifest.entries` is non-empty (`getArtifactCleanupGuidance`), stating that close does not delete explicit artifacts; `explicitArtifactPaths` carries up to ten distinct existing `explicit-path` manifest paths after a filesystem existence check, skipping stale paths already removed by host tools (possibly empty when the recent window has no existing explicit rows). Contract: [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details) (`artifactCleanup`); human workflow: [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md) artifact retention section and README artifact notes; fake coverage: `agentBrowserExtension reports artifact lifecycle guidance on close`.
86
+
87
+ `RQ-0080` adds no-op scroll recovery for dense dashboards and nested panes: for successful top-level `scroll`, `extensions/agent-browser/index.ts` samples viewport and prominent scroll-container positions before and after execution with read-only session-scoped `eval --stdin` probes. If no sampled position changes, it emits `details.scrollNoop`, appends visible `Scroll diagnostic: no observed scroll movement`, appends exact `inspect-after-noop-scroll` / `verify-noop-scroll-visually` next actions, and updates `pageChangeSummary.nextActionIds` so agents can branch without parsing prose. Contract: [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details) (`scrollNoop`, `nextActions`); human workflow: [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md) scroll note; fake coverage: `agentBrowserExtension reports no-op scroll diagnostics with recovery next actions`.
88
+
89
+ `RQ-0081` adds focused-combobox recovery for dense dashboard controls: after successful explicit combobox-targeted actions (for example `semanticAction` role `combobox` click), `extensions/agent-browser/index.ts` runs a read-only focused-element probe and emits `details.comboboxFocus` plus visible `Combobox diagnostic` text when a combobox-like control is focused, has explicit `aria-expanded` state, and no visible listbox/options are open. It appends exact `inspect-focused-combobox`, `try-open-combobox-with-arrow`, and `try-open-combobox-with-enter` next actions, all session-prefixed when applicable. The probe is gated to explicit combobox targets to avoid ordinary-click false positives and preserves the original combobox semantic target even when active-session visible-ref resolution rewrites execution to `click @ref`. Contract: [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details) (`comboboxFocus`, `nextActions`); human workflow: [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md) combobox note; fake coverage: `agentBrowserExtension reports focused combobox diagnostics with option-opening next actions` and `agentBrowserExtension preserves combobox diagnostics after semanticAction visible-ref resolution`.
90
+
91
+ `RQ-0082` adds early recording dependency warnings: after successful `record start` / `record restart`, `extensions/agent-browser/index.ts` checks whether executable `ffmpeg` is visible on the Pi process `PATH`. If not, it emits non-blocking `details.recordingDependencyWarning` plus visible `Recording dependency warning: ffmpeg not found on PATH` text so agents can install `ffmpeg` or fix PATH before `record stop` needs to encode the WebM. Contract: [`TOOL_CONTRACT.md`](TOOL_CONTRACT.md#details) (`recordingDependencyWarning`); human workflow: [`COMMAND_REFERENCE.md`](COMMAND_REFERENCE.md) recording notes and README dependency table; fake coverage: `agentBrowserExtension warns after record start when ffmpeg is missing`.
92
+
93
+ `RQ-0083` documents a repeatable public Grafana stress checklist in [`RELEASE.md`](RELEASE.md#public-grafana-stress-checklist) instead of bundling private dogfood/VFR skills or adding a recipe runtime. The checklist uses Grafana Play Node Exporter Full to manually exercise dense snapshots, no-op scroll diagnostics, combobox recovery, screenshots/artifacts, optional short recording, network/console/error summaries, and cleanup. Treat known Grafana Play noise (analytics/Sentry requests, public-demo 403s, console errors) as site noise unless the wrapper leaks secrets, hides actionable rows, mishandles artifacts, or suggests unsafe follow-ups. Evidence should be a short release note or CueLoop task, not committed `.dogfood/` outputs, raw HARs, videos, or private scripts. Validation on 2026-05-15 used the native tool against Grafana Play: fresh open, dense `snapshot -i`, scroll, combobox semantic click, screenshots with verified artifacts, `network requests`, `console`, `close`, and host cleanup of `/tmp/pi-agent-browser-grafana-rq0083*.png`; observed 11 public-demo 403 request rows and Grafana console noise as expected site noise.
@@ -26,7 +26,7 @@ It also keeps the main UX where it belongs: the agent invokes the tool directly
26
26
 
27
27
  The tool guidance should be written for task discovery first, not wrapper implementation first. That means the description should emphasize browser use cases like web research, reading live docs, clicking, filling, screenshots, extraction, and authenticated/profile-based workflows. Low-level wrapper details like `stdin` and exact CLI args belong in the schema and guidelines, not the lead description.
28
28
 
29
- The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. The canonical agent-facing playbook lives in `extensions/agent-browser/lib/playbook.ts`; generated Markdown fragments are updated by `npm run docs -- playbook write`, and `npm run docs -- playbook check` fails when checked-in documentation drifts.
29
+ The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session, but always-on guidance must stay concise. The canonical agent-facing playbook lives in `extensions/agent-browser/lib/playbook.ts`; it provides compact runtime rules plus absolute installed-package paths to `README.md`, `docs/COMMAND_REFERENCE.md`, and this contract so agents with file tools can read targeted guidance on demand instead of receiving the full docs in prompt context. Generated Markdown fragments are updated by `npm run docs -- playbook write`, and `npm run docs -- playbook check` fails when checked-in documentation drifts.
30
30
 
31
31
  The native command reference in `docs/COMMAND_REFERENCE.md` is driven by the same pattern: canonical metadata lives in `scripts/agent-browser-capability-baseline.mjs`, selected regions are generated into the Markdown by `npm run docs -- command-reference write`, and `npm run docs` plus `npm run verify -- command-reference` catch drift (the latter also samples the installed `agent-browser` on `PATH`). Maintainer workflow details live in `AGENTS.md` under upstream capability baseline.
32
32
 
@@ -53,6 +53,7 @@ Agent-facing efficiency claims are measured with `npm run benchmark:agent-browse
53
53
  - For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.
54
54
  - For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.
55
55
  - For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.
56
+ - On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, prefer scrollintoview <@ref> or target the actual scrollable region. For comboboxes, a click/semanticAction may only focus the field; re-snapshot and fall back to type, press Enter/arrow keys, select, or visible option refs.
56
57
  - When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.
57
58
  - When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel. Prefer plain expressions like ({ title: document.title }) or explicitly invoked functions like (() => ({ title: document.title }))(); if a function-shaped snippet returns {}, details.evalStdinHint may warn that the function was serialized instead of called. If get text on a CSS selector surfaces details.selectorTextVisibility or selectorTextVisibilityAll, prefer a visible @ref, a more specific selector, or the inspect-visible-text-candidates nextAction over hidden tab content.
58
59
  - When details.pageChangeSummary is present, use changeType and summary as a compact signal for navigation, DOM mutation, confirmations, or artifacts; when nextActionIds is set, match those ids to entries in details.nextActions (or per-step nextActions inside batch) for concrete follow-up payloads instead of inferring from prose alone. If a no-navigation click surfaces details.overlayBlockers, inspect the fresh snapshot evidence before using a close/dismiss candidate nextAction; ordinary page chrome without dialog/alertdialog evidence should not trigger this diagnostic.
@@ -368,7 +369,7 @@ Ref preflight details (implementation in `extensions/agent-browser/index.ts`):
368
369
 
369
370
  **Presentation redaction (implementation map):** Successful non-`batch` tool calls and each successful `batchSteps[]` row run upstream `data` through `redactPresentationData` in `extensions/agent-browser/lib/results/presentation.ts`: `cookies` and `storage` walk objects/arrays and replace case-insensitive `value` keys with `"[REDACTED]"` (diagnostic formatters still describe rows without expanding secrets); every other command’s payload is recursively scrubbed with `redactStructuredPresentationValue`, which redacts known sensitive key names and applies string-level sensitivity heuristics so network, diff, trace/profiler, stream, dashboard, chat, and other structured results do not echo bearer tokens, proxy credentials, or similar fields verbatim into `details.data`. Echoed `command` arrays in `details` and in batch roll-ups use `redactInvocationArgs` from `extensions/agent-browser/lib/runtime.ts` to mask trailing values for sensitive global flags (including `--body`, `--headers`, `--password`, and `--proxy`), preserve the special positional rules for `cookies set`, `storage local|session set`, and `set credentials`, and scrub other argv tokens for URLs and inline secrets. Failed batch steps additionally run `redactExactValues` on structured step errors so literals taken from that step’s argv (cookie value, storage set value, `--password` / `--password=` tokens) cannot reappear inside formatted error blobs.
370
371
 
371
- `nextActions` is an optional machine-readable list of exact native `agent_browser` follow-ups. Each entry includes `tool: "agent_browser"`, an `id`, a short `reason`, optional `safety`, and either `params` (`args`, optional `stdin`, optional `sessionMode`) or an `artifactPath` for saved-file workflows. Agents should prefer these payloads over prose when present. Current recommendations include: `open` success → `snapshot -i`; mutating/navigation commands (see `buildAgentBrowserNextActions` in source for the exact command set) → `snapshot -i`; stale refs and selector failures → `snapshot -i` via `refresh-interactive-refs` (prefixed with `--session <name>` when the failed call ran in a named or managed session); unknown getter shortcuts such as `title` / `url` → exact read-only retries like `get title` / `get url` with ids `use-get-title` / `use-get-url`; semantic `selector-not-found` failures that compiled from `semanticAction` may append `try-searchbox-name-candidate`, `try-textbox-name-candidate`, `try-button-name-candidate`, `try-link-name-candidate`, or `try-labeled-textbox-candidate` after presentation `nextActions` only for the bounded fill/click pairs enumerated under `semanticAction` (not for `select`); semantic `stale-ref` failures that compiled from `semanticAction` may also include `retry-semantic-action-after-stale-ref` after that snapshot step; qualifying same-URL top-level clicks (see `overlayBlockers` below) with fresh snapshot evidence of likely overlay/banner/dialog close controls may append `inspect-overlay-state` and bounded `try-overlay-blocker-candidate-*` entries; `get text <selector>` calls with hidden/multiple CSS matches may append `inspect-visible-text-candidates` with a read-only `eval --stdin` probe (each prefixed with `--session <name>` when `details.sessionName` is set, same `sessionPrefixArgs` rule as other session-scoped follow-ups); confirmations → exact `confirm <id>` and `deny <id>` choices; tab drift → `tab list` then `snapshot -i`; download verification failures or missing successful download artifacts → `wait --download [path]`; saved artifacts → the artifact path to inspect/consume after checking `artifactVerification`/metadata; missing non-download artifacts → `verify-artifact-path` so agents do not trust an absent file. When nothing applies, the field is omitted.
372
+ `nextActions` is an optional machine-readable list of exact native `agent_browser` follow-ups. Each entry includes `tool: "agent_browser"`, an `id`, a short `reason`, optional `safety`, and either `params` (`args`, optional `stdin`, optional `sessionMode`) or an `artifactPath` for saved-file workflows. Agents should prefer these payloads over prose when present. Current recommendations include: `open` success → `snapshot -i`; mutating/navigation commands (see `buildAgentBrowserNextActions` in source for the exact command set) → `snapshot -i`; stale refs and selector failures → `snapshot -i` via `refresh-interactive-refs` (prefixed with `--session <name>` when the failed call ran in a named or managed session); unknown getter shortcuts such as `title` / `url` → exact read-only retries like `get title` / `get url` with ids `use-get-title` / `use-get-url`; semantic `selector-not-found` failures that compiled from `semanticAction` may append `try-searchbox-name-candidate`, `try-textbox-name-candidate`, `try-button-name-candidate`, `try-link-name-candidate`, or `try-labeled-textbox-candidate` after presentation `nextActions` only for the bounded fill/click pairs enumerated under `semanticAction` (not for `select`); semantic `stale-ref` failures that compiled from `semanticAction` may also include `retry-semantic-action-after-stale-ref` after that snapshot step; qualifying same-URL top-level clicks (see `overlayBlockers` below) with fresh snapshot evidence of likely overlay/banner/dialog close controls may append `inspect-overlay-state` and bounded `try-overlay-blocker-candidate-*` entries; successful top-level `scroll` calls whose pre/post viewport and sampled scroll-container positions do not change may append `inspect-after-noop-scroll` and `verify-noop-scroll-visually`; explicit combobox-targeted actions that focus a combobox without visible options may append `inspect-focused-combobox`, `try-open-combobox-with-arrow`, and `try-open-combobox-with-enter`; `get text <selector>` calls with hidden/multiple CSS matches may append `inspect-visible-text-candidates` with a read-only `eval --stdin` probe (each prefixed with `--session <name>` when `details.sessionName` is set, same `sessionPrefixArgs` rule as other session-scoped follow-ups); confirmations → exact `confirm <id>` and `deny <id>` choices; tab drift → `tab list` then `snapshot -i`; download verification failures or missing successful download artifacts → `wait --download [path]`; saved artifacts → the artifact path to inspect/consume after checking `artifactVerification`/metadata; missing non-download artifacts → `verify-artifact-path` so agents do not trust an absent file. When nothing applies, the field is omitted.
372
373
 
373
374
  **Unknown-command getter hints (failure presentation):** `buildToolPresentation` in `extensions/agent-browser/lib/results/presentation.ts` only runs this path when upstream error text (after model-facing redaction) matches `unknown command`, `unknown subcommand`, or `unrecognized command` (case-insensitive) **and** the failed invocation’s primary command token is one of `attr`, `count`, `html`, `text`, `title`, `url`, or `value`. Visible text then includes a grouped-`get` hint line plus per-token guidance (`get text <selector>`, `get html …`, `get attr …`, `get count …`, `get value …`, `get title`, `get url`). Machine `nextActions` with ids `use-get-title` / `use-get-url` are emitted only for `title` / `url`, with `params.args` optionally prefixed by `--session <name>` when the failed call targeted a named session. If the error string already contains `Agent-browser hint:` from selector recovery (stale-ref or unsupported selector dialect appendages), the getter block is skipped so two stacked `Agent-browser hint:` headers are not emitted.
374
375
 
@@ -441,6 +442,9 @@ Additional structured fields can appear when relevant:
441
442
  - `navigationSummary` for navigation-style commands like `click`, `back`, `forward`, and `reload`
442
443
  - `pageChangeSummary` for compact mutation/artifact/navigation summaries on commands that can change browser state
443
444
  - `overlayBlockers` for conservative post-click overlay/banner/dialog blocker candidates when a direct click stays on the same URL and a fresh snapshot provides evidence (`candidates`, `summary`, and `snapshot` per `OverlayBlockerDiagnostic` in `extensions/agent-browser/index.ts`)
445
+ - `scrollNoop` after a successful **top-level** `scroll` when wrapper-side read-only probes before and after the command show no change in `window.scrollX` / `window.scrollY` and no change in the sampled prominent scrollable containers. To avoid pre-launching a session without caller startup state, this probe is skipped when the invocation includes startup-scoped flags such as `--profile`, `--state`, `--session-name`, `--cdp`, providers, init scripts, or similar launch settings. Shape: `{ reason: "no-observed-scroll-position-change", message, before, after, recommendations }`; `before` / `after` include viewport dimensions, document scroll dimensions, and up to ten sampled container descriptors plus scroll offsets. Container descriptors use only sample index, tag name, and ARIA role; DOM ids/classes are intentionally not stored. This diagnostic is conservative evidence that the page-level scroll likely missed a nested pane, not proof that every app-specific region is unchanged. Visible text appends `Scroll diagnostic: no observed scroll movement`, and `details.nextActions` gains `inspect-after-noop-scroll` (`snapshot -i`) plus `verify-noop-scroll-visually` (`screenshot`), session-prefixed when applicable.
446
+ - `comboboxFocus` after a successful explicit combobox-targeted `click` / `fill` / `find … click|fill|select` (for example `semanticAction` with role `combobox`, including when that semantic action resolves through a current visible `@ref` before execution) when a read-only probe sees the active element is combobox-like, `aria-expanded` is explicitly present (`false` or `true`), and no visible `listbox` / `option` / menu option elements are open. Shape: `{ reason: "focused-combobox-without-visible-options", message, activeElement, visibleListboxCount, visibleOptionCount, recommendations }`; `activeElement` includes bounded role/tag/expanded/hasPopup/name metadata with normal text redaction. Visible text appends `Combobox diagnostic: focused combobox did not expose visible options`, and `details.nextActions` gains `inspect-focused-combobox` (`snapshot -i`), `try-open-combobox-with-arrow` (`press ArrowDown`), and `try-open-combobox-with-enter` (`press Enter`), session-prefixed when applicable. The diagnostic is deliberately gated to explicit combobox-targeted calls to avoid extra probes or false positives on ordinary clicks/textboxes.
447
+ - `recordingDependencyWarning` after a successful `record start` or `record restart` when the wrapper cannot find an executable `ffmpeg` on the Pi process `PATH`. Shape: `{ reason: "ffmpeg-missing-for-recording", dependency: "ffmpeg", command, message, recommendations }`. Visible text appends `Recording dependency warning: ffmpeg not found on PATH`. This is a non-blocking preflight warning: upstream may start recording, but `record stop` needs `ffmpeg` to encode the WebM.
444
448
  - `selectorTextVisibility` after a **successful** upstream `get text <selector>` (standalone or inside a successful `batch`) when the wrapper’s follow-up probe finds a hazard: more than one DOM match (upstream reads the first `querySelectorAll` hit, which may be the wrong tab/panel), or the first match is hidden while at least one other match is visible (requires multiple DOM nodes so a visible peer exists; a lone hidden match is not flagged). The probe is a read-only `eval --stdin` script (`buildVisibleTextProbeScript` in `extensions/agent-browser/index.ts`) that counts matches, applies a small visibility heuristic (`display`/`visibility`/`opacity` plus non-zero client rects), and may include a redacted `firstVisibleTextPreview`. It is **not** run for page-scoped `@e…` selectors or when the selector string is withheld because `selectorMayExposeSensitiveLiteral` would risk echoing secrets in probe output. `details.selectorTextVisibility` mirrors the primary diagnostic (first sorted entry); when several selectors in one `batch` qualify, `selectorTextVisibilityAll` lists every diagnostic sorted so hidden-first cases precede generic multi-match ambiguity. Appended `details.nextActions` use ids `inspect-visible-text-candidates` and `inspect-visible-text-candidates-2`, … with the probe replayed via `eval --stdin` for each hazardous selector.
445
449
  - `evalStdinHint` after a successful `eval --stdin` when caller stdin (trimmed) looks function-shaped to the wrapper’s lightweight detector (`looksLikeFunctionEvalStdin` in `extensions/agent-browser/index.ts`: leading `function` / `async function`, parenthesized arrow `(…) =>`, or a concise `name =>` / `async name =>` form) **and** upstream JSON `data` is an object whose `result` field is an empty object (`{}`). It includes `reason` and `suggestion`; visible output appends `Eval stdin hint` with the same guidance. This is a heuristic for the common mistake of returning a function object instead of invoking it or passing a plain expression, not a JavaScript parser or proof that the page returned no useful data.
446
450
  - `timeoutPartialProgress` after `runAgentBrowserProcess` reports `timedOut` (wrapper child-process watchdog) when best-effort recovery finds useful context. `summary` is a short sentence counting how many declared artifact paths exist on disk versus how many were scanned, and whether page context came from live session reads or only from a planned URL (when nothing in the plan declares an artifact path, the fraction may read `0/0` while `currentPage` can still carry session or planned URL context). `steps` lists planned argv from the compiled `job` or `qa` batch plan (`compiledJob` in `extensions/agent-browser/index.ts`, which is only populated for those top-level modes) or, when that object is absent, from the same JSON-array `batch` stdin the tool sends upstream—whether caller-authored or wrapper-generated for `sourceLookup` / `networkSourceLookup` (1-based indices; only JSON-array stdin whose elements are string[] argv arrays is parsed); timeouts on other argv shapes may still emit `currentPage` / summary evidence without `steps`. `currentPage` comes from session-scoped `get url` / `get title` when the session answers, otherwise a fallback URL may be inferred from the last `open` / `navigate` / `pushstate` step in the plan. `artifacts` covers declared output paths on `screenshot`, `pdf`, `download`, and `wait --download` steps (absolute path, existence, optional `sizeBytes`, `stepIndex`). Visible text repeats the same block under `Timeout partial progress`, applying URL and path-segment redaction; the prose `Planned steps` list shows at most six steps, then an omitted-count line when the plan is longer. This is recovery evidence only; missing entries do not prove the upstream step never ran or that no other side effects occurred.
@@ -470,7 +474,7 @@ The TUI renderer is user-facing only. It may compact or colorize what the human
470
474
 
471
475
  Worth doing in v1:
472
476
  - screenshots → saved-path summary, visible artifact metadata, `details.artifacts` metadata, and inline image attachment when safe; screenshot paths that upstream would treat ambiguously, such as `.dogfood/run/foo.png`, are normalized to absolute paths before launch and repaired from upstream temp output when possible
473
- - file artifacts such as PDFs, downloads, `wait --download` files, `state save` state files, diff screenshot output images, traces, CPU profiles, completed WebM recordings, and path-bearing HAR captures → concise saved-path summaries plus metadata in `details.artifacts` and bounded recent metadata in `details.artifactManifest`; `record start` reports recording lifecycle state and the future output path without adding a missing manifest entry; direct saved-file workflows also expose `details.savedFilePath` / `details.savedFile`; large or binary artifacts are not inlined into model context; the recent manifest cap can age out explicit-file metadata but does not remove explicit saved files from disk
477
+ - file artifacts such as PDFs, downloads, `wait --download` files, `state save` state files, diff screenshot output images, traces, CPU profiles, completed WebM recordings, and path-bearing HAR captures → concise saved-path summaries plus metadata in `details.artifacts` and bounded recent metadata in `details.artifactManifest`; `record start` reports recording lifecycle state and the future output path without adding a missing manifest entry; upstream needs `ffmpeg` on `PATH` for `record stop` to encode the WebM, and successful `record start` / `record restart` calls may also expose `details.recordingDependencyWarning` when the wrapper cannot find `ffmpeg`; direct saved-file workflows also expose `details.savedFilePath` / `details.savedFile`; large or binary artifacts are not inlined into model context; the recent manifest cap can age out explicit-file metadata but does not remove explicit saved files from disk
474
478
  - `diff screenshot` → same file-artifact pattern as above for the **diff** image path only (summary text uses “Saved diff image”); baseline paths and other fields stay in the structured payload but are not echoed as separate saved artifacts in the visible artifact block, and there is no Pi inline image attachment for the diff output
475
479
  - `state load` → completion text may mention the loaded path, but the wrapper does **not** treat that path as a new saved artifact (`artifacts` / `artifactManifest` stay unset) the way `state save` does
476
480
  - auth, cookies, storage, dialog, frame, state, network, debug, diff, stream, dashboard, chat, and other structured results → concise summaries that avoid expanding secret-bearing payloads; credential-like keys, values, URLs, body snippets, bearer/basic credentials, and cookie/storage values are redacted before model-facing output and `details.data`
@@ -6,8 +6,10 @@
6
6
  * Invariants/Assumptions: agent-browser is installed separately on PATH, the wrapper targets the current locally installed upstream version only, and no backward-compatibility shims are provided.
7
7
  */
8
8
 
9
- import { copyFile, mkdir, readFile, readdir, rm, stat } from "node:fs/promises";
10
- import { dirname, extname, isAbsolute, join, resolve } from "node:path";
9
+ import { constants as fsConstants } from "node:fs";
10
+ import { access, copyFile, mkdir, readFile, readdir, rm, stat } from "node:fs/promises";
11
+ import { delimiter, dirname, extname, isAbsolute, join, resolve } from "node:path";
12
+ import { fileURLToPath } from "node:url";
11
13
 
12
14
  import { StringEnum } from "@earendil-works/pi-ai";
13
15
  import {
@@ -114,6 +116,48 @@ interface CompiledAgentBrowserSemanticAction {
114
116
  args: string[];
115
117
  }
116
118
 
119
+ interface ScrollPositionSnapshot {
120
+ containerCount: number;
121
+ containers: Array<{ id: string; scrollLeft: number; scrollTop: number }>;
122
+ innerHeight: number;
123
+ innerWidth: number;
124
+ scrollHeight: number;
125
+ scrollWidth: number;
126
+ scrollX: number;
127
+ scrollY: number;
128
+ }
129
+
130
+ interface ScrollNoopDiagnostic {
131
+ after: ScrollPositionSnapshot;
132
+ before: ScrollPositionSnapshot;
133
+ message: string;
134
+ reason: "no-observed-scroll-position-change";
135
+ recommendations: string[];
136
+ }
137
+
138
+ interface ComboboxFocusDiagnostic {
139
+ activeElement: {
140
+ expanded?: string;
141
+ hasPopup?: string;
142
+ name?: string;
143
+ role?: string;
144
+ tagName?: string;
145
+ };
146
+ message: string;
147
+ reason: "focused-combobox-without-visible-options";
148
+ recommendations: string[];
149
+ visibleListboxCount: number;
150
+ visibleOptionCount: number;
151
+ }
152
+
153
+ interface RecordingDependencyWarning {
154
+ command: "record start" | "record restart";
155
+ dependency: "ffmpeg";
156
+ message: string;
157
+ reason: "ffmpeg-missing-for-recording";
158
+ recommendations: string[];
159
+ }
160
+
117
161
  interface CompiledAgentBrowserJobStep {
118
162
  action: AgentBrowserJobStepAction;
119
163
  args: string[];
@@ -2680,6 +2724,140 @@ async function collectNavigationSummary(options: {
2680
2724
  return { title, url };
2681
2725
  }
2682
2726
 
2727
+ function extractScrollPositionSnapshot(data: unknown): ScrollPositionSnapshot | undefined {
2728
+ const result = isRecord(data) && isRecord(data.result) ? data.result : data;
2729
+ if (!isRecord(result)) return undefined;
2730
+ const scrollX = typeof result.scrollX === "number" ? result.scrollX : undefined;
2731
+ const scrollY = typeof result.scrollY === "number" ? result.scrollY : undefined;
2732
+ const innerHeight = typeof result.innerHeight === "number" ? result.innerHeight : undefined;
2733
+ const innerWidth = typeof result.innerWidth === "number" ? result.innerWidth : undefined;
2734
+ const scrollHeight = typeof result.scrollHeight === "number" ? result.scrollHeight : undefined;
2735
+ const scrollWidth = typeof result.scrollWidth === "number" ? result.scrollWidth : undefined;
2736
+ if (scrollX === undefined || scrollY === undefined || innerHeight === undefined || innerWidth === undefined || scrollHeight === undefined || scrollWidth === undefined) return undefined;
2737
+ const containers = Array.isArray(result.containers)
2738
+ ? result.containers.flatMap((entry, index): ScrollPositionSnapshot["containers"] => {
2739
+ if (!isRecord(entry)) return [];
2740
+ const rawId = typeof entry.id === "string" ? entry.id : undefined;
2741
+ const id = rawId && /^\d+:[a-z][a-z0-9-]*(?:\[role=[a-z-]+\])?$/i.test(rawId) ? rawId : `sample-${index}`;
2742
+ const scrollTop = typeof entry.scrollTop === "number" ? entry.scrollTop : undefined;
2743
+ const scrollLeft = typeof entry.scrollLeft === "number" ? entry.scrollLeft : undefined;
2744
+ return scrollTop !== undefined && scrollLeft !== undefined ? [{ id, scrollLeft, scrollTop }] : [];
2745
+ })
2746
+ : [];
2747
+ return {
2748
+ containerCount: typeof result.containerCount === "number" ? result.containerCount : containers.length,
2749
+ containers,
2750
+ innerHeight,
2751
+ innerWidth,
2752
+ scrollHeight,
2753
+ scrollWidth,
2754
+ scrollX,
2755
+ scrollY,
2756
+ };
2757
+ }
2758
+
2759
+ const SCROLL_POSITION_EVAL = `(() => {
2760
+ const viewport = {
2761
+ scrollX: window.scrollX,
2762
+ scrollY: window.scrollY,
2763
+ innerHeight: window.innerHeight,
2764
+ innerWidth: window.innerWidth,
2765
+ scrollHeight: Math.max(document.documentElement?.scrollHeight || 0, document.body?.scrollHeight || 0),
2766
+ scrollWidth: Math.max(document.documentElement?.scrollWidth || 0, document.body?.scrollWidth || 0),
2767
+ };
2768
+ const describe = (element, index) => {
2769
+ const role = element.getAttribute("role") || "";
2770
+ const id = element.tagName.toLowerCase();
2771
+ return {
2772
+ id: String(index) + ":" + id + (role ? "[role=" + role + "]" : ""),
2773
+ scrollTop: element.scrollTop,
2774
+ scrollLeft: element.scrollLeft,
2775
+ area: element.clientWidth * element.clientHeight,
2776
+ };
2777
+ };
2778
+ const containers = Array.from(document.querySelectorAll("body *"))
2779
+ .filter((element) => element instanceof HTMLElement && (element.scrollHeight > element.clientHeight + 1 || element.scrollWidth > element.clientWidth + 1))
2780
+ .map(describe)
2781
+ .sort((left, right) => right.area - left.area)
2782
+ .slice(0, 10)
2783
+ .map(({ area, ...entry }) => entry);
2784
+ return { ...viewport, containerCount: containers.length, containers };
2785
+ })()`;
2786
+
2787
+ async function collectScrollPositionSnapshot(options: {
2788
+ cwd: string;
2789
+ sessionName?: string;
2790
+ signal?: AbortSignal;
2791
+ }): Promise<ScrollPositionSnapshot | undefined> {
2792
+ return extractScrollPositionSnapshot(await runSessionCommandData({
2793
+ args: ["eval", "--stdin"],
2794
+ cwd: options.cwd,
2795
+ sessionName: options.sessionName,
2796
+ signal: options.signal,
2797
+ stdin: SCROLL_POSITION_EVAL,
2798
+ }));
2799
+ }
2800
+
2801
+ function sameScrollPositionSnapshot(left: ScrollPositionSnapshot, right: ScrollPositionSnapshot): boolean {
2802
+ if (
2803
+ left.scrollX !== right.scrollX ||
2804
+ left.scrollY !== right.scrollY ||
2805
+ left.scrollHeight !== right.scrollHeight ||
2806
+ left.scrollWidth !== right.scrollWidth ||
2807
+ left.containers.length !== right.containers.length
2808
+ ) {
2809
+ return false;
2810
+ }
2811
+ return left.containers.every((container, index) => {
2812
+ const other = right.containers[index];
2813
+ return other?.id === container.id && other.scrollTop === container.scrollTop && other.scrollLeft === container.scrollLeft;
2814
+ });
2815
+ }
2816
+
2817
+ function buildScrollNoopDiagnostic(before: ScrollPositionSnapshot | undefined, after: ScrollPositionSnapshot | undefined): ScrollNoopDiagnostic | undefined {
2818
+ if (!before || !after || !sameScrollPositionSnapshot(before, after)) return undefined;
2819
+ return {
2820
+ after,
2821
+ before,
2822
+ message: "Scroll reported success, but the viewport and sampled scrollable containers did not change position.",
2823
+ reason: "no-observed-scroll-position-change",
2824
+ recommendations: [
2825
+ "Run snapshot -i or screenshot to confirm what is visible before choosing the next action.",
2826
+ "On dashboards and panes with nested scrolling, use scrollintoview <@ref> for a visible target or target the actual scrollable region instead of repeating page scrolls.",
2827
+ ],
2828
+ };
2829
+ }
2830
+
2831
+ function buildScrollNoopNextActions(sessionName: string | undefined): AgentBrowserNextAction[] {
2832
+ const withSession = (args: string[]): string[] => sessionName ? ["--session", sessionName, ...args] : args;
2833
+ return [
2834
+ {
2835
+ id: "inspect-after-noop-scroll",
2836
+ params: { args: withSession(["snapshot", "-i"]) },
2837
+ reason: "Refresh interactive refs and inspect whether the intended target is inside a nested scroll container.",
2838
+ safety: "Do not assume repeated page scrolls will move dashboard panels or nested panes.",
2839
+ tool: "agent_browser",
2840
+ },
2841
+ {
2842
+ id: "verify-noop-scroll-visually",
2843
+ params: { args: withSession(["screenshot"]) },
2844
+ reason: "Capture the current viewport to verify whether the scroll actually changed visible content.",
2845
+ safety: "Use screenshot evidence before concluding a dense dashboard did or did not move.",
2846
+ tool: "agent_browser",
2847
+ },
2848
+ ];
2849
+ }
2850
+
2851
+ function formatScrollNoopDiagnosticText(diagnostic: ScrollNoopDiagnostic | undefined): string | undefined {
2852
+ if (!diagnostic) return undefined;
2853
+ return [
2854
+ "Scroll diagnostic: no observed scroll movement.",
2855
+ `Reason: ${diagnostic.message}`,
2856
+ `Sampled scrollable containers: ${diagnostic.after.containers.length}/${diagnostic.after.containerCount}.`,
2857
+ ...diagnostic.recommendations.map((recommendation) => `- ${recommendation}`),
2858
+ ].join("\n");
2859
+ }
2860
+
2683
2861
  function mergeNavigationSummaryIntoData(data: unknown, navigationSummary: NavigationSummary): unknown {
2684
2862
  if (isRecord(data)) {
2685
2863
  return { ...data, navigationSummary };
@@ -2687,6 +2865,182 @@ function mergeNavigationSummaryIntoData(data: unknown, navigationSummary: Naviga
2687
2865
  return { navigationSummary, result: data };
2688
2866
  }
2689
2867
 
2868
+ const COMBOBOX_FOCUS_EVAL = `(() => {
2869
+ const isVisible = (element) => {
2870
+ if (!(element instanceof HTMLElement)) return false;
2871
+ const style = window.getComputedStyle(element);
2872
+ if (style.display === "none" || style.visibility === "hidden" || Number(style.opacity) === 0) return false;
2873
+ return element.getClientRects().length > 0;
2874
+ };
2875
+ const active = document.activeElement instanceof HTMLElement ? document.activeElement : null;
2876
+ const role = active?.getAttribute("role") || undefined;
2877
+ const hasPopup = active?.getAttribute("aria-haspopup") || undefined;
2878
+ const expanded = active?.getAttribute("aria-expanded") || undefined;
2879
+ const tagName = active?.tagName.toLowerCase();
2880
+ const name = (active?.getAttribute("aria-label") || active?.getAttribute("placeholder") || active?.getAttribute("title") || active?.textContent || "").trim().slice(0, 80) || undefined;
2881
+ const visibleListboxCount = Array.from(document.querySelectorAll('[role="listbox"], [role="menu"]')).filter(isVisible).length;
2882
+ const visibleOptionCount = Array.from(document.querySelectorAll('[role="option"], option, [role="menuitem"]')).filter(isVisible).length;
2883
+ const comboboxLike = role === "combobox" || hasPopup === "listbox" || hasPopup === "menu" || tagName === "select" || active?.getAttribute("aria-autocomplete") !== null;
2884
+ return { activeElement: active ? { expanded, hasPopup, name, role, tagName } : undefined, comboboxLike, visibleListboxCount, visibleOptionCount };
2885
+ })()`;
2886
+
2887
+ function extractComboboxFocusDiagnostic(data: unknown): ComboboxFocusDiagnostic | undefined {
2888
+ const result = isRecord(data) && isRecord(data.result) ? data.result : data;
2889
+ if (!isRecord(result) || result.comboboxLike !== true || !isRecord(result.activeElement)) return undefined;
2890
+ const visibleListboxCount = typeof result.visibleListboxCount === "number" ? result.visibleListboxCount : 0;
2891
+ const visibleOptionCount = typeof result.visibleOptionCount === "number" ? result.visibleOptionCount : 0;
2892
+ const expanded = typeof result.activeElement.expanded === "string" ? result.activeElement.expanded : undefined;
2893
+ if ((expanded !== "false" && expanded !== "true") || visibleListboxCount > 0 || visibleOptionCount > 0) return undefined;
2894
+ return {
2895
+ activeElement: {
2896
+ expanded,
2897
+ hasPopup: typeof result.activeElement.hasPopup === "string" ? result.activeElement.hasPopup : undefined,
2898
+ name: typeof result.activeElement.name === "string" ? redactSensitiveText(result.activeElement.name) : undefined,
2899
+ role: typeof result.activeElement.role === "string" ? result.activeElement.role : undefined,
2900
+ tagName: typeof result.activeElement.tagName === "string" ? result.activeElement.tagName : undefined,
2901
+ },
2902
+ message: "A combobox-like control is focused, but no listbox or option elements are visibly open.",
2903
+ reason: "focused-combobox-without-visible-options",
2904
+ recommendations: [
2905
+ "Run snapshot -i to inspect whether options appeared under a different role or portal.",
2906
+ "Try ArrowDown or Enter to open the option list before selecting, or use select/visible option refs when available.",
2907
+ ],
2908
+ visibleListboxCount,
2909
+ visibleOptionCount,
2910
+ };
2911
+ }
2912
+
2913
+ function isComboboxFocusDiagnosticCommand(command: string | undefined, commandTokens: string[]): boolean {
2914
+ const explicitlyTargetsCombobox = commandTokens.some((token) => /^(?:combobox|listbox)$/i.test(token));
2915
+ if (!explicitlyTargetsCombobox) return false;
2916
+ if (command === "click" || command === "fill") return true;
2917
+ return command === "find" && commandTokens.some((token) => ["click", "fill", "select"].includes(token));
2918
+ }
2919
+
2920
+ function getCompiledSemanticActionRoleValue(compiled: CompiledAgentBrowserSemanticAction): string | undefined {
2921
+ if (compiled.locator !== "role") return undefined;
2922
+ const findIndex = compiled.args.indexOf("find");
2923
+ if (findIndex < 0 || compiled.args[findIndex + 1] !== "role") return undefined;
2924
+ return compiled.args[findIndex + 2];
2925
+ }
2926
+
2927
+ function isComboboxFocusDiagnosticSemanticAction(compiled: CompiledAgentBrowserSemanticAction | undefined): boolean {
2928
+ if (!compiled || !["click", "fill", "select"].includes(compiled.action)) return false;
2929
+ return /^(?:combobox|listbox)$/i.test(getCompiledSemanticActionRoleValue(compiled) ?? "");
2930
+ }
2931
+
2932
+ async function collectComboboxFocusDiagnostic(options: {
2933
+ command?: string;
2934
+ commandTokens: string[];
2935
+ cwd: string;
2936
+ semanticAction?: CompiledAgentBrowserSemanticAction;
2937
+ sessionName?: string;
2938
+ signal?: AbortSignal;
2939
+ }): Promise<ComboboxFocusDiagnostic | undefined> {
2940
+ if (!isComboboxFocusDiagnosticCommand(options.command, options.commandTokens) && !isComboboxFocusDiagnosticSemanticAction(options.semanticAction)) return undefined;
2941
+ return extractComboboxFocusDiagnostic(await runSessionCommandData({
2942
+ args: ["eval", "--stdin"],
2943
+ cwd: options.cwd,
2944
+ sessionName: options.sessionName,
2945
+ signal: options.signal,
2946
+ stdin: COMBOBOX_FOCUS_EVAL,
2947
+ }));
2948
+ }
2949
+
2950
+ function buildComboboxFocusNextActions(sessionName: string | undefined): AgentBrowserNextAction[] {
2951
+ const withSession = (args: string[]): string[] => sessionName ? ["--session", sessionName, ...args] : args;
2952
+ return [
2953
+ {
2954
+ id: "inspect-focused-combobox",
2955
+ params: { args: withSession(["snapshot", "-i"]) },
2956
+ reason: "Inspect the focused combobox and any portal/listbox refs before choosing an option.",
2957
+ safety: "Prefer visible option refs or select when a native/selectable option list is exposed.",
2958
+ tool: "agent_browser",
2959
+ },
2960
+ {
2961
+ id: "try-open-combobox-with-arrow",
2962
+ params: { args: withSession(["press", "ArrowDown"]) },
2963
+ reason: "Many searchable comboboxes open their option list with ArrowDown after focus.",
2964
+ safety: "Use only when the focused combobox is still the intended control, then re-snapshot before selecting.",
2965
+ tool: "agent_browser",
2966
+ },
2967
+ {
2968
+ id: "try-open-combobox-with-enter",
2969
+ params: { args: withSession(["press", "Enter"]) },
2970
+ reason: "Some comboboxes open or confirm their option list with Enter after focus.",
2971
+ safety: "Enter may select a highlighted/default option; prefer ArrowDown first unless Enter is the app's expected opener.",
2972
+ tool: "agent_browser",
2973
+ },
2974
+ ];
2975
+ }
2976
+
2977
+ function formatComboboxFocusDiagnosticText(diagnostic: ComboboxFocusDiagnostic | undefined): string | undefined {
2978
+ if (!diagnostic) return undefined;
2979
+ const label = diagnostic.activeElement.name ? ` (${diagnostic.activeElement.name})` : "";
2980
+ return [
2981
+ `Combobox diagnostic: focused combobox did not expose visible options${label}.`,
2982
+ `Reason: ${diagnostic.message}`,
2983
+ ...diagnostic.recommendations.map((recommendation) => `- ${recommendation}`),
2984
+ ].join("\n");
2985
+ }
2986
+
2987
+ function getRecordStartLikeCommand(command: string | undefined, commandTokens: string[]): RecordingDependencyWarning["command"] | undefined {
2988
+ if (command !== "record") return undefined;
2989
+ const subcommand = commandTokens[1]?.toLowerCase();
2990
+ if (subcommand === "start") return "record start";
2991
+ if (subcommand === "restart") return "record restart";
2992
+ return undefined;
2993
+ }
2994
+
2995
+ async function executableExistsOnPath(command: string): Promise<boolean> {
2996
+ const pathValue = process.env.PATH ?? "";
2997
+ const extensions = process.platform === "win32"
2998
+ ? (process.env.PATHEXT ?? ".EXE;.CMD;.BAT;.COM").split(";").filter(Boolean)
2999
+ : [""];
3000
+ for (const directory of pathValue.split(delimiter).filter(Boolean)) {
3001
+ for (const extension of extensions) {
3002
+ try {
3003
+ const candidate = join(directory, `${command}${extension}`);
3004
+ await access(candidate, fsConstants.X_OK);
3005
+ if ((await stat(candidate)).isFile()) return true;
3006
+ } catch {
3007
+ // Try the next candidate.
3008
+ }
3009
+ }
3010
+ }
3011
+ return false;
3012
+ }
3013
+
3014
+ async function collectRecordingDependencyWarning(options: {
3015
+ command: string | undefined;
3016
+ commandTokens: string[];
3017
+ succeeded: boolean;
3018
+ }): Promise<RecordingDependencyWarning | undefined> {
3019
+ if (!options.succeeded) return undefined;
3020
+ const recordCommand = getRecordStartLikeCommand(options.command, options.commandTokens);
3021
+ if (!recordCommand) return undefined;
3022
+ if (await executableExistsOnPath("ffmpeg")) return undefined;
3023
+ return {
3024
+ command: recordCommand,
3025
+ dependency: "ffmpeg",
3026
+ message: `${recordCommand} can begin recording, but record stop needs ffmpeg on PATH to encode the WebM output.`,
3027
+ reason: "ffmpeg-missing-for-recording",
3028
+ recommendations: [
3029
+ "Install ffmpeg before relying on this recording workflow; on macOS with Homebrew, brew install ffmpeg or brew install ffmpeg-full.",
3030
+ "If ffmpeg was just installed, restart pi or ensure the PATH visible to pi includes the ffmpeg binary before running record stop.",
3031
+ ],
3032
+ };
3033
+ }
3034
+
3035
+ function formatRecordingDependencyWarningText(warning: RecordingDependencyWarning | undefined): string | undefined {
3036
+ if (!warning) return undefined;
3037
+ return [
3038
+ "Recording dependency warning: ffmpeg not found on PATH.",
3039
+ `Reason: ${warning.message}`,
3040
+ ...warning.recommendations.map((recommendation) => `- ${recommendation}`),
3041
+ ].join("\n");
3042
+ }
3043
+
2690
3044
  function getSnapshotRefRecord(data: unknown): Record<string, unknown> | undefined {
2691
3045
  return isRecord(data) && isRecord(data.refs) ? data.refs : undefined;
2692
3046
  }
@@ -3357,10 +3711,19 @@ async function closeManagedSession(options: { cwd: string; sessionName: string;
3357
3711
  }
3358
3712
  }
3359
3713
 
3714
+ function getInstalledDocsPaths(): { readmePath: string; commandReferencePath: string; toolContractPath: string } {
3715
+ const packageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..", "..");
3716
+ return {
3717
+ readmePath: join(packageRoot, "README.md"),
3718
+ commandReferencePath: join(packageRoot, "docs", "COMMAND_REFERENCE.md"),
3719
+ toolContractPath: join(packageRoot, "docs", "TOOL_CONTRACT.md"),
3720
+ };
3721
+ }
3722
+
3360
3723
  export default function agentBrowserExtension(pi: ExtensionAPI) {
3361
3724
  const ephemeralSessionSeed = createEphemeralSessionSeed();
3362
3725
  const hasBraveApiKey = hasUsableBraveApiKey();
3363
- const toolPromptGuidelines = buildToolPromptGuidelines({ includeBraveSearch: hasBraveApiKey });
3726
+ const toolPromptGuidelines = buildToolPromptGuidelines({ includeBraveSearch: hasBraveApiKey, docs: getInstalledDocsPaths() });
3364
3727
  const implicitSessionIdleTimeoutMs = String(getImplicitSessionIdleTimeoutMs());
3365
3728
  const implicitSessionCloseTimeoutMs = getImplicitSessionCloseTimeoutMs();
3366
3729
  let managedSessionActive = false;
@@ -3762,6 +4125,14 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
3762
4125
  }
3763
4126
  }
3764
4127
  const redactedProcessArgs = redactInvocationArgs(processArgs);
4128
+ const shouldProbeScrollNoop = executionPlan.commandInfo.command === "scroll" && executionPlan.startupScopedFlags.length === 0;
4129
+ const scrollPositionBefore = shouldProbeScrollNoop
4130
+ ? await collectScrollPositionSnapshot({
4131
+ cwd: ctx.cwd,
4132
+ sessionName: executionPlan.sessionName,
4133
+ signal,
4134
+ })
4135
+ : undefined;
3765
4136
 
3766
4137
  onUpdate?.({
3767
4138
  content: [{ type: "text", text: `Running agent-browser ${buildInvocationPreview(redactedProcessArgs)}` }],
@@ -4015,6 +4386,31 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
4015
4386
  signal,
4016
4387
  });
4017
4388
  }
4389
+ const comboboxFocusDiagnostic = succeeded
4390
+ ? await collectComboboxFocusDiagnostic({
4391
+ command: executionPlan.commandInfo.command,
4392
+ commandTokens,
4393
+ cwd: ctx.cwd,
4394
+ semanticAction: compiledSemanticAction,
4395
+ sessionName: executionPlan.sessionName,
4396
+ signal,
4397
+ })
4398
+ : undefined;
4399
+ const recordingDependencyWarning = await collectRecordingDependencyWarning({
4400
+ command: executionPlan.commandInfo.command,
4401
+ commandTokens,
4402
+ succeeded,
4403
+ });
4404
+ const scrollNoopDiagnostic = succeeded && shouldProbeScrollNoop
4405
+ ? buildScrollNoopDiagnostic(
4406
+ scrollPositionBefore,
4407
+ await collectScrollPositionSnapshot({
4408
+ cwd: ctx.cwd,
4409
+ sessionName: executionPlan.sessionName,
4410
+ signal,
4411
+ }),
4412
+ )
4413
+ : undefined;
4018
4414
  let currentRefSnapshot: SessionRefSnapshot | undefined;
4019
4415
  if (executionPlan.sessionName) {
4020
4416
  const activeSessionTabTargetState = sessionTabTargets.get(executionPlan.sessionName);
@@ -4242,6 +4638,12 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
4242
4638
  if (selectorTextVisibilityDiagnostics.length > 0) {
4243
4639
  (nextActions ??= []).push(...buildSelectorTextVisibilityNextActions({ diagnostics: selectorTextVisibilityDiagnostics, sessionName: executionPlan.sessionName }));
4244
4640
  }
4641
+ if (scrollNoopDiagnostic) {
4642
+ (nextActions ??= []).push(...buildScrollNoopNextActions(executionPlan.sessionName));
4643
+ }
4644
+ if (comboboxFocusDiagnostic) {
4645
+ (nextActions ??= []).push(...buildComboboxFocusNextActions(executionPlan.sessionName));
4646
+ }
4245
4647
  if (categoryDetails.failureCategory === "stale-ref" && redactedCompiledSemanticAction) {
4246
4648
  (nextActions ??= []).push({
4247
4649
  id: "retry-semantic-action-after-stale-ref",
@@ -4251,6 +4653,9 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
4251
4653
  tool: "agent_browser" as const,
4252
4654
  });
4253
4655
  }
4656
+ const pageChangeSummary = (scrollNoopDiagnostic || comboboxFocusDiagnostic) && presentation.pageChangeSummary
4657
+ ? { ...presentation.pageChangeSummary, nextActionIds: nextActions?.map((action) => action.id) }
4658
+ : presentation.pageChangeSummary;
4254
4659
  const details = {
4255
4660
  args: redactedArgs,
4256
4661
  compiledJob: redactedCompiledJob,
@@ -4284,8 +4689,11 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
4284
4689
  imagePath: presentation.imagePath,
4285
4690
  imagePaths: presentation.imagePaths,
4286
4691
  nextActions,
4287
- pageChangeSummary: presentation.pageChangeSummary,
4692
+ pageChangeSummary,
4288
4693
  overlayBlockers: overlayBlockerDiagnostic,
4694
+ comboboxFocus: comboboxFocusDiagnostic,
4695
+ recordingDependencyWarning,
4696
+ scrollNoop: scrollNoopDiagnostic,
4289
4697
  qaPreset,
4290
4698
  selectorTextVisibility: selectorTextVisibilityDiagnostics[0],
4291
4699
  selectorTextVisibilityAll: selectorTextVisibilityDiagnostics.length > 1 ? selectorTextVisibilityDiagnostics : undefined,
@@ -4313,11 +4721,14 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
4313
4721
  const semanticActionCandidateText = nextActions ? formatSemanticActionCandidateText(nextActions) : undefined;
4314
4722
  const overlayBlockerText = overlayBlockerDiagnostic ? formatOverlayBlockerText(overlayBlockerDiagnostic) : undefined;
4315
4723
  const selectorTextVisibilityText = formatSelectorTextVisibilityText(selectorTextVisibilityDiagnostics);
4724
+ const scrollNoopDiagnosticText = formatScrollNoopDiagnosticText(scrollNoopDiagnostic);
4725
+ const comboboxFocusDiagnosticText = formatComboboxFocusDiagnosticText(comboboxFocusDiagnostic);
4726
+ const recordingDependencyWarningText = formatRecordingDependencyWarningText(recordingDependencyWarning);
4316
4727
  const evalStdinHintText = formatEvalStdinHintText(evalStdinHint);
4317
4728
  const artifactCleanupText = formatArtifactCleanupGuidanceText(artifactCleanup);
4318
4729
  const timeoutPartialProgressText = timeoutPartialProgress ? formatTimeoutPartialProgressText(timeoutPartialProgress) : undefined;
4319
4730
  const managedSessionOutcomeText = formatManagedSessionOutcomeText(managedSessionOutcome);
4320
- const rawAppendedDiagnosticText = [semanticActionCandidateText, overlayBlockerText, selectorTextVisibilityText, evalStdinHintText, artifactCleanupText, timeoutPartialProgressText, managedSessionOutcomeText].filter((item): item is string => item !== undefined).join("\n\n");
4731
+ const rawAppendedDiagnosticText = [semanticActionCandidateText, overlayBlockerText, selectorTextVisibilityText, scrollNoopDiagnosticText, comboboxFocusDiagnosticText, recordingDependencyWarningText, evalStdinHintText, artifactCleanupText, timeoutPartialProgressText, managedSessionOutcomeText].filter((item): item is string => item !== undefined).join("\n\n");
4321
4732
  const appendedDiagnosticText = redactSensitiveText(redactExactSensitiveText(rawAppendedDiagnosticText, exactSensitiveValues));
4322
4733
  const shouldAppendDiagnosticText = appendedDiagnosticText.length > 0 && (!userRequestedJson || plainTextInspection);
4323
4734
  const content = shouldAppendDiagnosticText && redactedContent[0]?.type === "text"
@@ -13,6 +13,10 @@ export const TOOL_PROMPT_GUIDELINES_PREFIX = [
13
13
  "Use agent_browser whenever the task requires a real browser or live web content.",
14
14
  ] as const;
15
15
 
16
+ export function buildInstalledDocsGuideline(paths: { readmePath: string; commandReferencePath: string; toolContractPath: string }): string {
17
+ return `For deeper guidance without bloating context, read installed package docs on demand: ${paths.readmePath} for setup/external dependencies, ${paths.commandReferencePath} for command workflows, and ${paths.toolContractPath} for result/details contracts. Do not load the full command reference unless needed; prefer targeted sections.`;
18
+ }
19
+
16
20
  export const QUICK_START_GUIDELINES = [
17
21
  "Quick start mental model: use exactly one of args (exact agent-browser CLI args after the binary), semanticAction (a thin find-locator shorthand compiled to find argv), job (a constrained short-workflow schema compiled to batch), qa (a lightweight QA preset built on job/batch), or the experimental sourceLookup / networkSourceLookup helpers (each compiled to batch); stdin is only for batch, eval --stdin, auth save --password-stdin, and wrapper-generated batch stdin from job, qa, sourceLookup, or networkSourceLookup, and other command/stdin combinations are rejected before launch; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device state.",
18
22
  "There is no first-class reusable named browser recipe runtime above top-level job, the qa preset, and raw batch stdin; keep recurring flows in documentation examples or those inputs (closed RQ-0068; see docs/ARCHITECTURE.md#no-reusable-recipe-layer-yet).",
@@ -47,6 +51,7 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
47
51
  "For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
48
52
  "For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
49
53
  "For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.",
54
+ "On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, prefer scrollintoview <@ref> or target the actual scrollable region. For comboboxes, a click/semanticAction may only focus the field; re-snapshot and fall back to type, press Enter/arrow keys, select, or visible option refs.",
50
55
  "When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
51
56
  "When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel. Prefer plain expressions like ({ title: document.title }) or explicitly invoked functions like (() => ({ title: document.title }))(); if a function-shaped snippet returns {}, details.evalStdinHint may warn that the function was serialized instead of called. If get text on a CSS selector surfaces details.selectorTextVisibility or selectorTextVisibilityAll, prefer a visible @ref, a more specific selector, or the inspect-visible-text-candidates nextAction over hidden tab content.",
52
57
  "When details.pageChangeSummary is present, use changeType and summary as a compact signal for navigation, DOM mutation, confirmations, or artifacts; when nextActionIds is set, match those ids to entries in details.nextActions (or per-step nextActions inside batch) for concrete follow-up payloads instead of inferring from prose alone. If a no-navigation click surfaces details.overlayBlockers, inspect the fresh snapshot evidence before using a close/dismiss candidate nextAction; ordinary page chrome without dialog/alertdialog evidence should not trigger this diagnostic.",
@@ -83,11 +88,26 @@ export function buildSharedBrowserPlaybookGuidelines(options: { includeBraveSear
83
88
  ];
84
89
  }
85
90
 
86
- export function buildToolPromptGuidelines(options: { includeBraveSearch: boolean }): string[] {
91
+ const RUNTIME_PROMPT_GUIDELINES = [
92
+ "Use exactly one input mode: args, semanticAction, job, qa, sourceLookup, or networkSourceLookup. Use stdin only for batch, eval --stdin, auth save --password-stdin, or wrapper-generated batch modes.",
93
+ "Common flow: open, snapshot -i, interact with current @refs or semanticAction, then re-snapshot after navigation, scrolling, rerenders, or DOM changes.",
94
+ "Prefer stable locators for visible text/names: semanticAction or upstream find with role/text/label/placeholder/alt/title/testid. Use current @refs only from the latest same-page snapshot.",
95
+ "Use sessionMode=fresh for launch-scoped state such as --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, providers, or iOS devices; otherwise let the implicit session carry continuity.",
96
+ "For artifacts, read visible metadata and details.artifactVerification before using files. record stop needs ffmpeg on PATH. close does not delete saved files; cleanup is host-owned.",
97
+ "When details.nextActions is present, prefer those exact follow-up payloads over prose or guessed selectors.",
98
+ "For dense snapshots, check Omitted high-value controls and details.data.highValueControlRefIds before opening large spill files.",
99
+ "For dashboards, verify scroll with screenshot/snapshot; if nothing moved, use scrollintoview <@ref> or target the real scroll region. Combobox clicks may only focus; re-snapshot and fall back to type, Enter/arrows, select, or option refs.",
100
+ "For extraction, prefer get title/url/text/html/value/attr/count or eval --stdin that returns a value; do not rely on console.log. If selector visibility warnings appear, prefer visible @refs or nextActions.",
101
+ "For non-core debugging, pass upstream commands through args: network, diff, trace/profiler/record, console/errors, stream, dashboard, chat, react, vitals, pushstate, dialog, frame, tab.",
102
+ ] as const;
103
+
104
+ export function buildToolPromptGuidelines(options: { includeBraveSearch: boolean; docs?: { readmePath: string; commandReferencePath: string; toolContractPath: string } }): string[] {
87
105
  return [
88
106
  ...TOOL_PROMPT_GUIDELINES_PREFIX,
89
- ...QUICK_START_GUIDELINES,
90
- ...buildSharedBrowserPlaybookGuidelines(options),
91
- ...TOOL_PROMPT_GUIDELINES_SUFFIX,
107
+ ...(options.docs ? [buildInstalledDocsGuideline(options.docs)] : []),
108
+ ...RUNTIME_PROMPT_GUIDELINES,
109
+ ...(options.includeBraveSearch ? [BRAVE_SEARCH_PROMPT_GUIDELINE] : []),
110
+ TOOL_PROMPT_GUIDELINES_SUFFIX[0],
111
+ TOOL_PROMPT_GUIDELINES_SUFFIX[1],
92
112
  ];
93
113
  }
@@ -123,9 +123,9 @@ const GLOBAL_BOOLEAN_FLAGS_WITH_OPTIONAL_VALUES = new Set([
123
123
  "-v",
124
124
  ]);
125
125
  const SENSITIVE_QUERY_PARAM_PATTERN =
126
- /^(?:access(?:_|-)?token|api(?:_|-)?key|auth|authorization|bearer|client(?:_|-)?secret|code|cookie|id(?:_|-)?token|key|pass(?:word)?|refresh(?:_|-)?token|secret|session(?:_|-)?id|sig(?:nature)?|token)$/i;
126
+ /^(?:access(?:_|-)?token|api(?:_|-)?key|auth|authorization|bearer|client(?:_|-)?secret|code|cookie|id(?:_|-)?token|key|pass(?:word)?|refresh(?:_|-)?token|secret|sentry(?:_|-)?key|session(?:_|-)?id|sig(?:nature)?|token|write(?:_|-)?key)$/i;
127
127
  const SENSITIVE_FIELD_NAME_PATTERN =
128
- /^(?:access(?:_|-)?token|api(?:_|-)?key|auth(?:orization)?|bearer|client(?:_|-)?secret|cookie|id(?:_|-)?token|pass(?:word)?|proxy(?:_|-)?authorization|refresh(?:_|-)?token|secret|session(?:_|-)?id|set(?:_|-)?cookie|sig(?:nature)?|token|x(?:_|-)?api(?:_|-)?key)$/i;
128
+ /^(?:access(?:_|-)?token|api(?:_|-)?key|auth(?:orization)?|bearer|client(?:_|-)?secret|cookie|id(?:_|-)?token|pass(?:word)?|proxy(?:_|-)?authorization|refresh(?:_|-)?token|secret|sentry(?:_|-)?key|session(?:_|-)?id|set(?:_|-)?cookie|sig(?:nature)?|token|write(?:_|-)?key|x(?:_|-)?api(?:_|-)?key)$/i;
129
129
 
130
130
  const VALUE_FLAGS = new Set([
131
131
  "--session",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-agent-browser-native",
3
- "version": "0.2.27",
3
+ "version": "0.2.29",
4
4
  "description": "pi extension that exposes agent-browser as a native tool for browser automation",
5
5
  "type": "module",
6
6
  "author": "Mitch Fultz (https://github.com/fitchmultz)",
@@ -24,7 +24,7 @@
24
24
  "url": "https://github.com/fitchmultz/pi-agent-browser-native/issues"
25
25
  },
26
26
  "engines": {
27
- "node": ">=20.6.0"
27
+ "node": ">=22.19.0"
28
28
  },
29
29
  "bin": {
30
30
  "pi-agent-browser-doctor": "scripts/doctor.mjs"
@@ -51,11 +51,13 @@
51
51
  "peerDependencies": {
52
52
  "@earendil-works/pi-ai": "*",
53
53
  "@earendil-works/pi-coding-agent": "*",
54
+ "@earendil-works/pi-tui": "*",
54
55
  "typebox": "*"
55
56
  },
56
57
  "devDependencies": {
57
- "@earendil-works/pi-ai": "^0.74.0",
58
- "@earendil-works/pi-coding-agent": "^0.74.0",
58
+ "@earendil-works/pi-ai": "^0.75.3",
59
+ "@earendil-works/pi-coding-agent": "^0.75.3",
60
+ "@earendil-works/pi-tui": "^0.75.3",
59
61
  "@types/node": "^25.6.1",
60
62
  "tsx": "^4.21.0",
61
63
  "typebox": "^1.1.38",