pi-agent-browser-native 0.2.51 → 0.2.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -1
- package/README.md +6 -17
- package/dist/extensions/agent-browser/lib/command-policy.js +11 -0
- package/dist/extensions/agent-browser/lib/input-modes/job.js +31 -15
- package/dist/extensions/agent-browser/lib/input-modes/params.js +19 -40
- package/dist/extensions/agent-browser/lib/playbook.js +3 -2
- package/dist/extensions/agent-browser/lib/results/presentation/batch.js +3 -2
- package/dist/extensions/agent-browser/lib/results/presentation/diagnostics.js +27 -9
- package/dist/extensions/agent-browser/lib/results/presentation/large-output.js +26 -1
- package/dist/extensions/agent-browser/lib/results/presentation.js +9 -7
- package/dist/extensions/agent-browser/lib/web-search.js +1 -1
- package/docs/ARCHITECTURE.md +1 -1
- package/docs/COMMAND_REFERENCE.md +54 -14
- package/docs/RELEASE.md +4 -8
- package/docs/REQUIREMENTS.md +1 -1
- package/docs/SUPPORT_MATRIX.md +14 -13
- package/docs/TOOL_CONTRACT.md +6 -5
- package/package.json +5 -5
- package/scripts/agent-browser-capability-baseline.mjs +25 -3
- package/scripts/platform-smoke/browser-dogfood-windows.ps1 +8 -2
- package/scripts/platform-smoke.mjs +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,40 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
## 0.2.53 - 2026-06-18
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
|
|
9
|
+
- Rebaselined upstream capability metadata, command reference, support matrix, platform-smoke image tag, and real-upstream output-shape metadata for `agent-browser` `0.28.0` / vercel-labs/agent-browser@6323df571ffd17d14e60ec19fcb56cc1caf498ab.
|
|
10
|
+
- Documented upstream `mcp`, `plugin add/list/show/run`, plugin-backed `auth login --credential-provider`, and `AGENT_BROWSER_PLUGINS` surfaces while keeping the wrapper thin and compatibility-shim-free.
|
|
11
|
+
- Marked `mcp` and known `plugin` commands as sessionless wrapper calls so local/infra commands do not get an implicit managed browser session.
|
|
12
|
+
- Collapsed duplicated release/platform-smoke prose across README, release docs, and agent guidance in favor of `docs/platform-smoke.md` as the detailed source of truth.
|
|
13
|
+
- Simplified duplicate internal schema/job compiler plumbing without changing the public tool schema or generated argv behavior.
|
|
14
|
+
|
|
15
|
+
### Fixed
|
|
16
|
+
|
|
17
|
+
- Retried the Windows platform dogfood smoke once after transient first browser-open failures, matching the existing Windows browser prewarm tolerance while preserving real dogfood failures.
|
|
18
|
+
|
|
19
|
+
### Validation
|
|
20
|
+
|
|
21
|
+
- Ran `npm run verify -- release` against `agent-browser` `0.28.0`; the gate passed default verification, command-reference checks, build, lifecycle verification, packaged Pi smoke, and macOS/Ubuntu/Windows-native platform smoke after refreshing the Ubuntu image and Windows `crabbox-ready` snapshot.
|
|
22
|
+
- Ran `npm run verify -- real-upstream`, `npm run verify -- dogfood`, `npm run docs`, `npm run verify -- command-reference`, and `git diff --check`.
|
|
23
|
+
|
|
24
|
+
## 0.2.52 - 2026-06-15
|
|
25
|
+
|
|
26
|
+
### Changed
|
|
27
|
+
|
|
28
|
+
- Rebaselined the upstream capability metadata, command reference, support matrix, platform-smoke image tag, and real-upstream output-shape metadata for `agent-browser` `0.27.3` / vercel-labs/agent-browser@2c7991c9eccca1c9db6eee1a26a713414778de5a. This is an install-only upstream update from the prior baseline; no wrapper feature, shim, or inventory-token change was added.
|
|
29
|
+
- Updated the local Pi development baseline to `@earendil-works/*` `0.79.4`, refreshed `.pi-fleet-tested-version`, and refreshed `package-lock.json` with npm 11 while keeping the intentional doctor floor at Pi `0.79.0`.
|
|
30
|
+
|
|
31
|
+
### Fixed
|
|
32
|
+
|
|
33
|
+
- Updated the lifecycle release harness prompt-readiness check to accept Pi 0.79.4 footer units such as `1.0M`, avoiding false readiness timeouts after successful startup.
|
|
34
|
+
|
|
35
|
+
### Validation
|
|
36
|
+
|
|
37
|
+
- Ran `npm publish --dry-run` against `agent-browser` `0.27.3` and Pi `0.79.4`; the gate passed default verification, command-reference checks, build, lifecycle verification, packaged Pi smoke, and macOS/Ubuntu/Windows-native platform smoke.
|
|
38
|
+
|
|
5
39
|
## 0.2.51 - 2026-06-11
|
|
6
40
|
|
|
7
41
|
### Fixed
|
|
@@ -328,7 +362,7 @@
|
|
|
328
362
|
### Changed
|
|
329
363
|
- `sourceLookup`, broad `get text`, fill verification, tab/session mismatch, and stale-ref guidance now include Electron-aware context and recovery actions for packaged desktop apps.
|
|
330
364
|
- Verification coverage now includes deterministic Electron lifecycle/probe benchmark scenarios, fake-upstream Electron discovery/lifecycle tests, lifecycle restore/shutdown cleanup checks, and real-app dogfood evidence recorded in the Electron plan.
|
|
331
|
-
- The configured-source lifecycle harness (`npm run verify -- lifecycle`, `scripts/verify-lifecycle.mjs`) now defaults to Pi model `zai/glm-5.
|
|
365
|
+
- The configured-source lifecycle harness (`npm run verify -- lifecycle`, `scripts/verify-lifecycle.mjs`) now defaults to Pi model `zai/glm-5.2` with `--model <id>` override; `npm run verify` lifecycle passthrough rejects `--model` without a value.
|
|
332
366
|
- Updated the local Pi development baseline to `@earendil-works/*` `0.75.4` and refreshed the npm lockfile.
|
|
333
367
|
|
|
334
368
|
### Fixed
|
package/README.md
CHANGED
|
@@ -183,7 +183,7 @@ npm exec --yes --package pi-agent-browser-native@latest -- pi-agent-browser-conf
|
|
|
183
183
|
npm exec --yes --package pi-agent-browser-native@latest -- pi-agent-browser-config show
|
|
184
184
|
```
|
|
185
185
|
|
|
186
|
-
The optional `agent_browser_web_search` companion tool is available when a usable Exa or Brave credential source is configured or resolvable from startup config or trusted session config. It is not an `agent_browser` input mode and does not launch a browser; agents may use it whenever current/live external web information helps, then use `agent_browser` when they need page interaction, screenshots, authenticated/profile content, or DOM inspection. If both keys are available, the default provider is Exa because its `/search` endpoint returns agent-friendly highlights and search modes; set `webSearch.preferredProvider` to `"brave"` when you prefer Brave Search.
|
|
186
|
+
The optional `agent_browser_web_search` companion tool is available when a usable Exa or Brave credential source is configured or resolvable from startup config or trusted session config. It is not an `agent_browser` input mode and does not launch a browser; agents may use it whenever current/live external web information helps, then use `agent_browser` when they need page interaction, screenshots, authenticated/profile content, or DOM inspection. Prefer it over automating public search-engine forms such as Google in headless browser jobs: those flows may be redirected to anti-bot or CAPTCHA pages, and this wrapper does not provide or recommend CAPTCHA bypass. If both keys are available, the default provider is Exa because its `/search` endpoint returns agent-friendly highlights and search modes; set `webSearch.preferredProvider` to `"brave"` when you prefer Brave Search.
|
|
187
187
|
|
|
188
188
|
Get an Exa API key from the [Exa dashboard](https://dashboard.exa.ai/api-keys) or a Brave Search API key from the [Brave Search API dashboard](https://api-dashboard.search.brave.com/). Most users can simply export `EXA_API_KEY` or `BRAVE_API_KEY` in the environment that launches `pi`; config is only needed when you want Pi-scoped secret references, a preferred provider, or to disable this built-in search tool.
|
|
189
189
|
|
|
@@ -412,7 +412,7 @@ After either path, use `qa: { "attached": true, ... }` for a current-session smo
|
|
|
412
412
|
|
|
413
413
|
### Lightweight QA preset
|
|
414
414
|
|
|
415
|
-
For a quick smoke/QA pass, use top-level `qa`. It compiles to the same batch path as `job` and uses `batch --bail` so failed readiness/text/selector assertions stop before slower diagnostics can burn the wrapper watchdog. The URL form clears enabled network/console/page-error buffers before opening the target URL, waits for page readiness, checks optional expected text or selector, inspects fresh network requests, console messages, and page errors when preceding assertions pass, and can capture an evidence screenshot. Expected text is checked with bounded visible-text `wait --fn … --timeout 5000` predicates after the requested load state so dense pages can pass on visible headings/copy and missing text becomes crisp QA evidence. The attached form (`qa: { "attached": true }`) runs checks against the current managed session, such as an attached Electron app, rejects `url`, and deliberately preserves existing diagnostics instead of clearing evidence; its diagnostic reads default off so stale buffers do not fail a current-page smoke unless `checkNetwork`, `checkConsole`, or `checkErrors` is explicitly `true`. `loadState` defaults to `"domcontentloaded"`; set it to `"load"` or `"networkidle"` only when the stricter state is useful and the site is not expected to keep background requests alive. For URL-opening QA, `checkNetwork`, `checkConsole`, and `checkErrors` default to true; set one to `false` to skip that diagnostic read. Network failures are classified by likely impact and failed rows are listed first in network previews: actionable document/script/API-style failures still fail QA, while some low-impact browser icon asset misses (for example certain `favicon` or `apple-touch-icon` paths when upstream marks the row failed and resource metadata looks image-like) surface only as warnings instead of failing an otherwise healthy smoke check (`details.qaPreset.warnings`, with human-readable `details.qaPreset.summary` when the preset still passes). Exact predicates live in [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#qa) and `classifyNetworkRequestFailure` in `extensions/agent-browser/lib/results/network.ts` (re-exported from the compatibility barrel).
|
|
415
|
+
For a quick smoke/QA pass, use top-level `qa`. It compiles to the same batch path as `job` and uses `batch --bail` so failed readiness/text/selector assertions stop before slower diagnostics can burn the wrapper watchdog. The URL form clears enabled network/console/page-error buffers before opening the target URL, waits for page readiness, checks optional expected text or selector, inspects fresh network requests, console messages, and page errors when preceding assertions pass, and can capture an evidence screenshot. Successful reset rows are labeled as reset-scoped output and ignored by QA failure analysis so stale pre-target errors do not fail an otherwise healthy target page; real post-open diagnostic rows still fail or warn according to the normal QA rules. Expected text is checked with bounded visible-text `wait --fn … --timeout 5000` predicates after the requested load state so dense pages can pass on visible headings/copy and missing text becomes crisp QA evidence. The attached form (`qa: { "attached": true }`) runs checks against the current managed session, such as an attached Electron app, rejects `url`, and deliberately preserves existing diagnostics instead of clearing evidence; its diagnostic reads default off so stale buffers do not fail a current-page smoke unless `checkNetwork`, `checkConsole`, or `checkErrors` is explicitly `true`. `loadState` defaults to `"domcontentloaded"`; set it to `"load"` or `"networkidle"` only when the stricter state is useful and the site is not expected to keep background requests alive. For URL-opening QA, `checkNetwork`, `checkConsole`, and `checkErrors` default to true; set one to `false` to skip that diagnostic read. Network failures are classified by likely impact and failed rows are listed first in network previews: actionable document/script/API-style failures still fail QA, while some low-impact browser icon asset misses (for example certain `favicon` or `apple-touch-icon` paths when upstream marks the row failed and resource metadata looks image-like) surface only as warnings instead of failing an otherwise healthy smoke check (`details.qaPreset.warnings`, with human-readable `details.qaPreset.summary` when the preset still passes). Exact predicates live in [`docs/TOOL_CONTRACT.md`](docs/TOOL_CONTRACT.md#qa) and `classifyNetworkRequestFailure` in `extensions/agent-browser/lib/results/network.ts` (re-exported from the compatibility barrel).
|
|
416
416
|
|
|
417
417
|
```json
|
|
418
418
|
{
|
|
@@ -449,7 +449,7 @@ For asynchronous exports, click first and then wait for the download:
|
|
|
449
449
|
{ "args": ["wait", "--download", "/tmp/report.csv"] }
|
|
450
450
|
```
|
|
451
451
|
|
|
452
|
-
When a user gives exact artifact paths for screenshots, recordings, downloads, PDFs, traces, or HAR files, use those paths or explicitly report why the artifact was unavailable; do not silently substitute a different path in the final report. The wrapper creates missing parent directories for direct artifact paths such as `state save`, screenshots, PDFs, downloads, and `wait --download`. For simple loopback `download <selector> <path>` anchor links with HTTP(S) `href`, it can save the in-page response directly to the requested path before falling back to upstream click/download behavior; non-loopback/profile downloads stay upstream-owned. With upstream `agent-browser
|
|
452
|
+
When a user gives exact artifact paths for screenshots, recordings, downloads, PDFs, traces, or HAR files, use those paths or explicitly report why the artifact was unavailable; do not silently substitute a different path in the final report. The wrapper creates missing parent directories for direct artifact paths such as `state save`, screenshots, PDFs, downloads, and `wait --download`. For simple loopback `download <selector> <path>` anchor links with HTTP(S) `href`, it can save the in-page response directly to the requested path before falling back to upstream click/download behavior; non-loopback/profile downloads stay upstream-owned. With current upstream `agent-browser`, treat `details.savedFilePath` as upstream-reported metadata and confirm `details.artifacts[].exists` / `details.artifactVerification.verified` before relying on the requested `wait --download <path>` file being present on disk; non-file download payloads such as `data:` URLs are not verified local artifacts.
|
|
453
453
|
|
|
454
454
|
For evidence-only screenshots or QA captures, branch on `details.artifactVerification` and `details.artifacts` before reporting PASS/FAIL; inline image attachments are optional when size limits allow—do not require vision review unless the user asked for visual inspection. If the latest prompt names exact required artifact paths, browser close can be blocked with `details.promptGuard` until those artifacts are saved and verified.
|
|
455
455
|
|
|
@@ -613,18 +613,7 @@ npm run verify -- dogfood
|
|
|
613
613
|
|
|
614
614
|
That mode drives the native wrapper through top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close against a deterministic local fixture. It complements, but does not replace, the interactive Pi/tmux release dogfood in [`docs/RELEASE.md`](docs/RELEASE.md#pre-release-checks).
|
|
615
615
|
|
|
616
|
-
Cross-platform release coverage uses Crabbox to run macOS, Ubuntu Linux, and native Windows target suites:
|
|
617
|
-
|
|
618
|
-
```bash
|
|
619
|
-
npm run check:platform-smoke
|
|
620
|
-
npm run smoke:platform:ubuntu-image
|
|
621
|
-
npm run smoke:platform:doctor
|
|
622
|
-
npm run smoke:platform:all
|
|
623
|
-
```
|
|
624
|
-
|
|
625
|
-
The required matrix is documented in [`docs/platform-smoke.md`](docs/platform-smoke.md). It runs `platform-build` (fast target-local verify, pack, clean packed Pi install with `--approve`, `pi list --approve`) and `browser-dogfood-smoke` (real `agent-browser`/browser wrapper smoke) on every target. Inspect `.artifacts/platform-smoke/` and check `crabbox list --provider local-container` plus `crabbox list --provider parallels` after release runs so cleanup proof is not chat-only.
|
|
626
|
-
|
|
627
|
-
For package release confidence, follow [`docs/RELEASE.md`](docs/RELEASE.md). The release gate is:
|
|
616
|
+
Cross-platform release coverage uses Crabbox to run macOS, Ubuntu Linux, and native Windows target suites; see [`docs/platform-smoke.md`](docs/platform-smoke.md) for the required matrix, standalone coverage (`npm run smoke:platform:all` and per-target `smoke:platform:macos` / `:ubuntu` / `:windows-native`), and artifact/lease inspection. The release gate is:
|
|
628
617
|
|
|
629
618
|
```bash
|
|
630
619
|
npm run doctor
|
|
@@ -634,7 +623,7 @@ npm run smoke:platform:doctor
|
|
|
634
623
|
npm run verify -- release
|
|
635
624
|
```
|
|
636
625
|
|
|
637
|
-
`npm run verify -- release` includes the default verification gate, packaged Pi smoke coverage, and the release-blocking Crabbox platform matrix. The package also has a `prepublishOnly` hook that runs the same release gate and `npm pack --dry-run` during `npm publish`.
|
|
626
|
+
`npm run verify -- release` includes the default verification gate, packaged Pi smoke coverage, and the release-blocking Crabbox platform matrix (the same matrix `npm run smoke:platform:all` runs standalone). For the full maintainer release flow, follow [`docs/RELEASE.md`](docs/RELEASE.md). The package also has a `prepublishOnly` hook that runs the same release gate and `npm pack --dry-run` during `npm publish`.
|
|
638
627
|
|
|
639
628
|
## How it works
|
|
640
629
|
|
|
@@ -687,7 +676,7 @@ Configured-source lifecycle validation:
|
|
|
687
676
|
npm run verify -- lifecycle
|
|
688
677
|
```
|
|
689
678
|
|
|
690
|
-
The harness defaults to Pi model `zai/glm-5.
|
|
679
|
+
The harness defaults to Pi model `zai/glm-5.2` and **180000 ms** per-step tmux waits; pass `--model <id>` and/or `--timeout-ms <ms>` after `lifecycle` when you need different settings (see [Configured-source lifecycle validation](docs/RELEASE.md#configured-source-lifecycle-validation) in `docs/RELEASE.md`). It launches Pi 0.79 with `--approve` and a deterministic `--session-id`, drives `/reload`, closes Pi, relaunches the exact same session, asserts the JSONL header id, and checks managed-session continuity, compiled-entrypoint pickup after process restart, persisted spill reachability, and real Pi `tool_result` failure-patch behavior.
|
|
691
680
|
|
|
692
681
|
Use lifecycle validation when testing `/reload`, exact-session relaunch, `/resume`, managed-session continuity, or persisted artifact behavior. Branch-backed state and `session_tree` cleanup ownership are covered by focused extension harness tests. Maintainers must run the lifecycle harness before every publish; see [Pre-release checks](docs/RELEASE.md#pre-release-checks).
|
|
693
682
|
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { hasOnlyBooleanFlags, hasOnlyOptionFlags, isNonFlagToken, stripSessionlessShapeGlobalFlags } from "./argv-grammar.js";
|
|
7
7
|
const SESSIONLESS_AUTH_SUBCOMMANDS = new Set(["save", "list", "show", "delete", "remove"]);
|
|
8
|
+
const PLUGIN_SESSIONLESS_SUBCOMMANDS = new Set(["list", "show", "add", "run"]);
|
|
8
9
|
const EMPTY_BOOLEAN_FLAGS = new Set();
|
|
9
10
|
const JSON_BOOLEAN_FLAGS = new Set(["--json"]);
|
|
10
11
|
const AUTH_SAVE_BOOLEAN_FLAGS = new Set(["--json", "--password-stdin"]);
|
|
@@ -57,6 +58,12 @@ function isSessionlessStateCommand(commandTokens) {
|
|
|
57
58
|
return false;
|
|
58
59
|
return secondArg === undefined || (secondArg === "--all" && rest.length === 0);
|
|
59
60
|
}
|
|
61
|
+
function isSessionlessPluginCommand(commandTokens) {
|
|
62
|
+
const [, subcommand] = commandTokens;
|
|
63
|
+
if (subcommand === undefined)
|
|
64
|
+
return true;
|
|
65
|
+
return PLUGIN_SESSIONLESS_SUBCOMMANDS.has(subcommand);
|
|
66
|
+
}
|
|
60
67
|
function isSessionlessCommand(commandTokens) {
|
|
61
68
|
const normalizedTokens = stripSessionlessShapeGlobalFlags(commandTokens);
|
|
62
69
|
const [command, subcommand] = normalizedTokens;
|
|
@@ -64,6 +71,10 @@ function isSessionlessCommand(commandTokens) {
|
|
|
64
71
|
return ["list", "get", "path"].includes(subcommand ?? "");
|
|
65
72
|
if (command === "auth")
|
|
66
73
|
return isSessionlessAuthCommand(normalizedTokens);
|
|
74
|
+
if (command === "plugin")
|
|
75
|
+
return isSessionlessPluginCommand(normalizedTokens);
|
|
76
|
+
if (command === "mcp")
|
|
77
|
+
return true;
|
|
67
78
|
if (command === "dashboard")
|
|
68
79
|
return isSessionlessDashboardCommand(normalizedTokens);
|
|
69
80
|
if (command === "device")
|
|
@@ -190,18 +190,21 @@ function compilePathArtifactJobStep(step, action) {
|
|
|
190
190
|
return { error: result.error };
|
|
191
191
|
return { args: action === "waitForDownload" ? ["wait", "--download", result.value] : ["screenshot", result.value] };
|
|
192
192
|
}
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
193
|
+
// ponytail: allowedFields for each action live in JOB_STEP_ALLOWED_FIELDS (same key
|
|
194
|
+
// alignment enforced by Record<AgentBrowserJobStepAction, …>), so the compiler map no
|
|
195
|
+
// longer mirrors that set per entry; the call site looks it up by action.
|
|
196
|
+
const JOB_STEP_COMPILERS = {
|
|
197
|
+
assertText: compileAssertTextJobStep,
|
|
198
|
+
assertUrl: compileAssertUrlJobStep,
|
|
199
|
+
click: compileClickJobStep,
|
|
200
|
+
fill: compileFillJobStep,
|
|
201
|
+
open: compileOpenJobStep,
|
|
202
|
+
screenshot: (step) => compilePathArtifactJobStep(step, "screenshot"),
|
|
203
|
+
select: compileSelectJobStep,
|
|
204
|
+
snapshot: () => ({ args: ["snapshot", "-i"] }),
|
|
205
|
+
type: compileTypeJobStep,
|
|
206
|
+
wait: compileWaitJobStep,
|
|
207
|
+
waitForDownload: (step) => compilePathArtifactJobStep(step, "waitForDownload"),
|
|
205
208
|
};
|
|
206
209
|
export function compileAgentBrowserJob(input) {
|
|
207
210
|
if (!isRecord(input)) {
|
|
@@ -226,11 +229,11 @@ export function compileAgentBrowserJob(input) {
|
|
|
226
229
|
return { error: `job.steps[${index}].action must be one of: ${AGENT_BROWSER_JOB_STEP_ACTIONS.join(", ")}.` };
|
|
227
230
|
}
|
|
228
231
|
const jobAction = action;
|
|
229
|
-
const
|
|
230
|
-
const unsupportedFieldError = getUnsupportedJobStepFieldError(rawStep, jobAction,
|
|
232
|
+
const compile = JOB_STEP_COMPILERS[jobAction];
|
|
233
|
+
const unsupportedFieldError = getUnsupportedJobStepFieldError(rawStep, jobAction, JOB_STEP_ALLOWED_FIELDS[jobAction]);
|
|
231
234
|
if (unsupportedFieldError)
|
|
232
235
|
return { error: `job.steps[${index}]: ${unsupportedFieldError}` };
|
|
233
|
-
const compiledStep =
|
|
236
|
+
const compiledStep = compile(rawStep, index);
|
|
234
237
|
if (compiledStep.error)
|
|
235
238
|
return { error: compiledStep.error.startsWith(`job.steps[${index}]`) ? compiledStep.error : `job.steps[${index}]: ${compiledStep.error}` };
|
|
236
239
|
steps.push({ action: jobAction, args: compiledStep.args, generatedFrom: compiledStep.generatedFrom }, ...(compiledStep.extraSteps ?? []));
|
|
@@ -289,6 +292,9 @@ export function buildQaCompactPassText(options) {
|
|
|
289
292
|
if (pageParts.length > 0)
|
|
290
293
|
lines.push(`Page: ${pageParts.join(" — ")}`);
|
|
291
294
|
lines.push(`Checks run: ${describeQaChecksRun(options.checks)} (${options.batchStepCount} batch step${options.batchStepCount === 1 ? "" : "s"})`);
|
|
295
|
+
if (options.checks.diagnosticsResetAtStart && (options.checks.checkNetwork || options.checks.checkConsole || options.checks.checkErrors)) {
|
|
296
|
+
lines.push("Diagnostic reset: URL QA cleared enabled network/console/page-error buffers before opening the target; reset rows in details.batchSteps are not counted as current-page failures.");
|
|
297
|
+
}
|
|
292
298
|
if (options.checks.attached && !options.checks.diagnosticsResetAtStart && (options.checks.checkNetwork || options.checks.checkConsole || options.checks.checkErrors)) {
|
|
293
299
|
lines.push("Attached diagnostics: existing upstream session console/network/error buffers were preserved; rows may include events from before qa.attached started.");
|
|
294
300
|
}
|
|
@@ -369,6 +375,13 @@ function extractQaTextAssertionResultText(item) {
|
|
|
369
375
|
}
|
|
370
376
|
return undefined;
|
|
371
377
|
}
|
|
378
|
+
function isDiagnosticResetCommand(item) {
|
|
379
|
+
const command = item.command;
|
|
380
|
+
if (!Array.isArray(command) || !command.every((token) => typeof token === "string"))
|
|
381
|
+
return false;
|
|
382
|
+
const [name, subcommand] = command;
|
|
383
|
+
return command.includes("--clear") && (name === "console" || name === "errors" || (name === "network" && subcommand === "requests"));
|
|
384
|
+
}
|
|
372
385
|
export function analyzeQaPresetTimeout(compiled) {
|
|
373
386
|
if (compiled.checks.expectedText.length === 0)
|
|
374
387
|
return undefined;
|
|
@@ -392,6 +405,9 @@ export function analyzeQaPresetResults(data, compiled) {
|
|
|
392
405
|
}
|
|
393
406
|
const result = isRecord(item.result) ? item.result : undefined;
|
|
394
407
|
const commandName = getCommandNameFromBatchItem(item);
|
|
408
|
+
if (compiled?.checks.diagnosticsResetAtStart && isDiagnosticResetCommand(item)) {
|
|
409
|
+
continue;
|
|
410
|
+
}
|
|
395
411
|
if (commandName === "errors" && Array.isArray(result?.errors) && result.errors.length > 0) {
|
|
396
412
|
failedChecks.push(`${result.errors.length} page error(s)`);
|
|
397
413
|
}
|
|
@@ -7,6 +7,21 @@ import { JsonSchema } from "../json-schema.js";
|
|
|
7
7
|
import { StringEnum as localStringEnum } from "../string-enum-schema.js";
|
|
8
8
|
import { ELECTRON_DISCOVERY_DEFAULT_MAX_RESULTS, ELECTRON_DISCOVERY_MAX_RESULTS, } from "../electron/discovery.js";
|
|
9
9
|
import { AGENT_BROWSER_ELECTRON_HANDOFFS, AGENT_BROWSER_ELECTRON_TARGET_TYPES, AGENT_BROWSER_JOB_STEP_ACTIONS, AGENT_BROWSER_JOB_TYPE_DELAYED_TEXT_MAX_CHARACTERS, AGENT_BROWSER_QA_LOAD_STATES, AGENT_BROWSER_SEMANTIC_ACTIONS, AGENT_BROWSER_SEMANTIC_LOCATORS, DEFAULT_SESSION_MODE, SOURCE_LOOKUP_MAX_WORKSPACE_FILES, } from "./types.js";
|
|
10
|
+
// ponytail: the four electron.launch variants differ only in their single target field
|
|
11
|
+
// (appPath/appName/bundleId/executablePath); the action literal and the shared optional
|
|
12
|
+
// launch fields are identical, so a helper keeps the duplicate schema blocks in sync.
|
|
13
|
+
function electronLaunchVariant(Type, StringEnum, targetField) {
|
|
14
|
+
return Type.Object({
|
|
15
|
+
action: StringEnum(["launch"], { description: "Launch an Electron app with an isolated wrapper-owned profile." }),
|
|
16
|
+
...targetField,
|
|
17
|
+
appArgs: Type.Optional(Type.Array(Type.String({ description: "Argument passed to the Electron application.", minLength: 1 }), { description: "Optional Electron app argv. Wrapper-owned lifecycle/debug flags are rejected." })),
|
|
18
|
+
handoff: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_HANDOFFS, { description: "Post-launch handoff depth. Defaults to snapshot." })),
|
|
19
|
+
targetType: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_TARGET_TYPES, { description: "Preferred CDP target type. Defaults to page." })),
|
|
20
|
+
timeoutMs: Type.Optional(Type.Integer({ description: "Bounded launch timeout in milliseconds.", minimum: 1 })),
|
|
21
|
+
allow: Type.Optional(Type.Array(Type.String({ description: "App identifier allowed by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned allow list for electron.launch policy checks." })),
|
|
22
|
+
deny: Type.Optional(Type.Array(Type.String({ description: "App identifier denied by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned deny list for electron.launch policy checks; deny wins over allow." })),
|
|
23
|
+
}, { additionalProperties: false });
|
|
24
|
+
}
|
|
10
25
|
export function createAgentBrowserParamsSchema(Type = JsonSchema, StringEnum = localStringEnum) {
|
|
11
26
|
return Type.Object({
|
|
12
27
|
args: Type.Optional(Type.Array(Type.String({ description: "Exact agent-browser CLI arguments, excluding the binary name. Do not pass --json; the wrapper injects it. First-call recipe: open → snapshot -i → click/fill @eN → snapshot -i." }), {
|
|
@@ -71,46 +86,10 @@ export function createAgentBrowserParamsSchema(Type = JsonSchema, StringEnum = l
|
|
|
71
86
|
query: Type.Optional(Type.String({ description: "Optional case-insensitive substring filter for electron.list across app name, bundle id, desktop id, and paths.", minLength: 1 })),
|
|
72
87
|
maxResults: Type.Optional(Type.Integer({ description: `Maximum electron.list apps to return. Defaults to ${ELECTRON_DISCOVERY_DEFAULT_MAX_RESULTS}; values above ${ELECTRON_DISCOVERY_MAX_RESULTS} are clamped.`, minimum: 1 })),
|
|
73
88
|
}, { additionalProperties: false }),
|
|
74
|
-
Type.
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
handoff: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_HANDOFFS, { description: "Post-launch handoff depth. Defaults to snapshot." })),
|
|
79
|
-
targetType: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_TARGET_TYPES, { description: "Preferred CDP target type. Defaults to page." })),
|
|
80
|
-
timeoutMs: Type.Optional(Type.Integer({ description: "Bounded launch timeout in milliseconds.", minimum: 1 })),
|
|
81
|
-
allow: Type.Optional(Type.Array(Type.String({ description: "App identifier allowed by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned allow list for electron.launch policy checks." })),
|
|
82
|
-
deny: Type.Optional(Type.Array(Type.String({ description: "App identifier denied by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned deny list for electron.launch policy checks; deny wins over allow." })),
|
|
83
|
-
}, { additionalProperties: false }),
|
|
84
|
-
Type.Object({
|
|
85
|
-
action: StringEnum(["launch"], { description: "Launch an Electron app with an isolated wrapper-owned profile." }),
|
|
86
|
-
appName: Type.String({ description: "Electron launch target: app display name discovered by electron.list. Exactly one launch target is required for electron.launch.", minLength: 1 }),
|
|
87
|
-
appArgs: Type.Optional(Type.Array(Type.String({ description: "Argument passed to the Electron application.", minLength: 1 }), { description: "Optional Electron app argv. Wrapper-owned lifecycle/debug flags are rejected." })),
|
|
88
|
-
handoff: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_HANDOFFS, { description: "Post-launch handoff depth. Defaults to snapshot." })),
|
|
89
|
-
targetType: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_TARGET_TYPES, { description: "Preferred CDP target type. Defaults to page." })),
|
|
90
|
-
timeoutMs: Type.Optional(Type.Integer({ description: "Bounded launch timeout in milliseconds.", minimum: 1 })),
|
|
91
|
-
allow: Type.Optional(Type.Array(Type.String({ description: "App identifier allowed by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned allow list for electron.launch policy checks." })),
|
|
92
|
-
deny: Type.Optional(Type.Array(Type.String({ description: "App identifier denied by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned deny list for electron.launch policy checks; deny wins over allow." })),
|
|
93
|
-
}, { additionalProperties: false }),
|
|
94
|
-
Type.Object({
|
|
95
|
-
action: StringEnum(["launch"], { description: "Launch an Electron app with an isolated wrapper-owned profile." }),
|
|
96
|
-
bundleId: Type.String({ description: "Electron launch target: macOS bundle identifier discovered by electron.list. Exactly one launch target is required for electron.launch.", minLength: 1 }),
|
|
97
|
-
appArgs: Type.Optional(Type.Array(Type.String({ description: "Argument passed to the Electron application.", minLength: 1 }), { description: "Optional Electron app argv. Wrapper-owned lifecycle/debug flags are rejected." })),
|
|
98
|
-
handoff: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_HANDOFFS, { description: "Post-launch handoff depth. Defaults to snapshot." })),
|
|
99
|
-
targetType: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_TARGET_TYPES, { description: "Preferred CDP target type. Defaults to page." })),
|
|
100
|
-
timeoutMs: Type.Optional(Type.Integer({ description: "Bounded launch timeout in milliseconds.", minimum: 1 })),
|
|
101
|
-
allow: Type.Optional(Type.Array(Type.String({ description: "App identifier allowed by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned allow list for electron.launch policy checks." })),
|
|
102
|
-
deny: Type.Optional(Type.Array(Type.String({ description: "App identifier denied by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned deny list for electron.launch policy checks; deny wins over allow." })),
|
|
103
|
-
}, { additionalProperties: false }),
|
|
104
|
-
Type.Object({
|
|
105
|
-
action: StringEnum(["launch"], { description: "Launch an Electron app with an isolated wrapper-owned profile." }),
|
|
106
|
-
executablePath: Type.String({ description: "Electron launch target: executable path. Discovery is not required when this is provided. Exactly one launch target is required for electron.launch.", minLength: 1 }),
|
|
107
|
-
appArgs: Type.Optional(Type.Array(Type.String({ description: "Argument passed to the Electron application.", minLength: 1 }), { description: "Optional Electron app argv. Wrapper-owned lifecycle/debug flags are rejected." })),
|
|
108
|
-
handoff: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_HANDOFFS, { description: "Post-launch handoff depth. Defaults to snapshot." })),
|
|
109
|
-
targetType: Type.Optional(StringEnum(AGENT_BROWSER_ELECTRON_TARGET_TYPES, { description: "Preferred CDP target type. Defaults to page." })),
|
|
110
|
-
timeoutMs: Type.Optional(Type.Integer({ description: "Bounded launch timeout in milliseconds.", minimum: 1 })),
|
|
111
|
-
allow: Type.Optional(Type.Array(Type.String({ description: "App identifier allowed by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned allow list for electron.launch policy checks." })),
|
|
112
|
-
deny: Type.Optional(Type.Array(Type.String({ description: "App identifier denied by the caller for electron.launch.", minLength: 1 }), { description: "Optional caller-owned deny list for electron.launch policy checks; deny wins over allow." })),
|
|
113
|
-
}, { additionalProperties: false }),
|
|
89
|
+
electronLaunchVariant(Type, StringEnum, { appPath: Type.String({ description: "Electron launch target: macOS .app bundle path. Exactly one launch target is required for electron.launch.", minLength: 1 }) }),
|
|
90
|
+
electronLaunchVariant(Type, StringEnum, { appName: Type.String({ description: "Electron launch target: app display name discovered by electron.list. Exactly one launch target is required for electron.launch.", minLength: 1 }) }),
|
|
91
|
+
electronLaunchVariant(Type, StringEnum, { bundleId: Type.String({ description: "Electron launch target: macOS bundle identifier discovered by electron.list. Exactly one launch target is required for electron.launch.", minLength: 1 }) }),
|
|
92
|
+
electronLaunchVariant(Type, StringEnum, { executablePath: Type.String({ description: "Electron launch target: executable path. Discovery is not required when this is provided. Exactly one launch target is required for electron.launch.", minLength: 1 }) }),
|
|
114
93
|
Type.Object({
|
|
115
94
|
action: StringEnum(["status", "cleanup"], { description: "Inspect or cleanup one wrapper-tracked Electron launch by launchId." }),
|
|
116
95
|
launchId: Type.String({ description: "Wrapper launch id for electron.status and electron.cleanup.", minLength: 1 }),
|
|
@@ -24,10 +24,11 @@ export const QUICK_START_GUIDELINES = [
|
|
|
24
24
|
"For artifact-producing commands, read the visible artifact block and details.artifactVerification before using files: check requested path, absolute path, existence, size bytes, artifact kind, optional mediaType, status, optional limitation, and verified/missing/pending/unverified counts. details.artifacts contains per-file metadata; record start rows are pending/openRecording until record stop writes the target. The wrapper creates parent directories for direct artifact paths and can save simple loopback HTTP(S) anchor downloads directly to the requested path before upstream download fallback. Browser close does not delete explicit saved files; if close reports details.artifactCleanup, use host file tools to remove paths listed in explicitArtifactPaths (when non-empty) after inspection. If close fails with details.promptGuard.reason=requested-artifacts-missing-before-close, save the exact required artifact path before closing. For annotated screenshots inside batch, put --annotate in top-level args (for example { args: [\"--annotate\", \"batch\"], stdin: \"[[\\\"screenshot\\\",\\\"/tmp/page.png\\\"]]\" }) rather than inside the screenshot step; if annotation labels crowd a dense page, use a scoped or non-annotated screenshot plus snapshot refs instead.",
|
|
25
25
|
"When details.nextActions is present, prefer those exact native agent_browser follow-up payloads over prose guidance; they may include args, stdin, sessionMode, networkSourceLookup, safety notes, or artifactPath for saved files.",
|
|
26
26
|
];
|
|
27
|
-
export const WEB_SEARCH_PROMPT_GUIDELINE = "Use agent_browser_web_search for quick live search/URL discovery; it
|
|
27
|
+
export const WEB_SEARCH_PROMPT_GUIDELINE = "Use agent_browser_web_search for quick live search/URL discovery; prefer it over browser-automating public search-engine forms, which can hit anti-bot/CAPTCHA-gated pages. Use agent_browser for interaction/DOM/screenshots/auth after you have a target URL. One query, inspect, one follow-up max; on HTTP 429 stop/report limits.";
|
|
28
28
|
export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
|
|
29
29
|
"Standard workflow: open the page, snapshot -i, interact using current @refs from that snapshot, and re-snapshot after navigation, scrolling, rerendering, or other major DOM changes because refs are page-scoped; the wrapper fails mutation-prone stale/recycled refs before upstream can silently target a different current-page element. On dense pages, use wrapper-side snapshot -i --search <text> or snapshot -i --filter role=<role> to render matching refs while preserving the full ref map in details.refSnapshot, add snapshot --viewport when scroll position or above/below-fold context matters, and add snapshot --diff when a quick before/after ref-map delta would prevent reading a full spill file.",
|
|
30
30
|
"For ordinary forms from one snapshot, batch multiple fill @refs before the submit/click step to avoid serial tool calls; if a fill may autosubmit, navigate, or rerender later fields, split the flow and refresh refs first.",
|
|
31
|
+
"Do not use browser automation to drive public search-engine forms such as Google for discovery; headless jobs that type a query and press Enter can be redirected to anti-bot or CAPTCHA pages. Use agent_browser_web_search when configured, ask for/search from a direct target URL, or navigate to known result URLs. Do not attempt CAPTCHA bypass.",
|
|
31
32
|
"Snapshot choice: prefer snapshot -i for routine clicks/fills (interactive @refs, main-content-first). Use snapshot --compact when you need a denser same-page tree without full spill; use full snapshot (no -i) only when you need the complete accessibility tree. Re-snapshot after navigation or major DOM changes. When snapshot -i compacts because the tree is oversized, scan visible output for Omitted high-value controls and optional details.data.highValueControlRefIds before opening the spill file: those list bounded searchboxes, textboxes, comboboxes, buttons, tabs, checkboxes, radios, options, and menuitems that did not fit the key/other ref previews.",
|
|
32
33
|
"When a visible text or accessible-name target should survive ref churn, prefer find locators such as role, text, label, placeholder, alt, title, or testid with the intended action instead of guessing a CSS selector.",
|
|
33
34
|
"For desktop or host-controlled rich inputs, if semanticAction fill misses, refresh refs and prefer a current editable @ref from details.richInputRecovery or the latest snapshot; focus or click that ref, then use keyboard inserttext or keyboard type with the intended text. Do not auto-submit with Enter or a submit button unless the user flow explicitly calls for it.",
|
|
@@ -44,7 +45,7 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
|
|
|
44
45
|
"For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: \"tabs\" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close commands (`close`, `quit`, or `exit`) only close the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.",
|
|
45
46
|
"For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
|
|
46
47
|
"For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; dialog commands and eval snippets that look like alert/confirm/prompt/dialog triggers are shorter-bounded than normal browser calls, and timed-out dialog-like interactions may add inspect-dialog-after-timeout, dismiss-dialog-after-timeout, or recover-fresh-session-after-dialog-timeout nextActions. When --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.",
|
|
47
|
-
"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.27.
|
|
48
|
+
"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.27.3, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort: use explicit --timeout or top-level timeoutMs for legitimately slow waits, and treat a successful payload like \"waited\":\"timeout\" as elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
|
|
48
49
|
"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
|
|
49
50
|
"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
|
|
50
51
|
"For downloads, prefer download <selector> <path> when an element click should save a file; simple loopback anchor downloads are saved to the requested path when the wrapper can resolve an HTTP(S) href. Do not rely on click alone when you need the downloaded file on disk.",
|
|
@@ -229,13 +229,14 @@ async function buildBatchStepPresentation(options) {
|
|
|
229
229
|
};
|
|
230
230
|
}
|
|
231
231
|
const commandInfo = parseCommandInfo(command ?? []);
|
|
232
|
+
const commandInfoWithTokens = command ? { ...commandInfo, commandTokens: command } : commandInfo;
|
|
232
233
|
const networkRouteDiagnostics = commandInfo.command === "network" && commandInfo.subcommand === "requests"
|
|
233
234
|
? buildNetworkRouteDiagnostics(item.result, networkRoutes)
|
|
234
235
|
: undefined;
|
|
235
236
|
const presentation = await buildNestedToolPresentation({
|
|
236
237
|
artifactManifest,
|
|
237
238
|
artifactRequest,
|
|
238
|
-
commandInfo,
|
|
239
|
+
commandInfo: commandInfoWithTokens,
|
|
239
240
|
cwd,
|
|
240
241
|
args: command,
|
|
241
242
|
envelope: { data: item.result, success: true },
|
|
@@ -264,7 +265,7 @@ async function buildBatchStepPresentation(options) {
|
|
|
264
265
|
});
|
|
265
266
|
const pageChangeSummary = buildPageChangeSummary({
|
|
266
267
|
artifacts: presentation.artifacts,
|
|
267
|
-
commandInfo,
|
|
268
|
+
commandInfo: commandInfoWithTokens,
|
|
268
269
|
data: presentation.data,
|
|
269
270
|
nextActions,
|
|
270
271
|
savedFilePath: presentation.savedFilePath,
|
|
@@ -107,6 +107,9 @@ export function enrichStreamStatusData(commandInfo, data) {
|
|
|
107
107
|
wsUrl: getStreamWebSocketUrl(data.port),
|
|
108
108
|
};
|
|
109
109
|
}
|
|
110
|
+
function isClearDiagnosticCommand(commandInfo) {
|
|
111
|
+
return commandInfo.subcommand === "--clear" || commandInfo.commandTokens?.includes("--clear") === true;
|
|
112
|
+
}
|
|
110
113
|
export function formatDiagnosticSummary(commandInfo, data) {
|
|
111
114
|
if (commandInfo.command === "session") {
|
|
112
115
|
const sessions = getArrayField(data, "sessions");
|
|
@@ -181,7 +184,7 @@ export function formatDiagnosticSummary(commandInfo, data) {
|
|
|
181
184
|
if (commandInfo.subcommand === "requests") {
|
|
182
185
|
const requests = getArrayField(data, "requests");
|
|
183
186
|
if (requests)
|
|
184
|
-
return `Network requests: ${requests.length}`;
|
|
187
|
+
return isClearDiagnosticCommand(commandInfo) ? `Network requests reset: ${requests.length} cleared` : `Network requests: ${requests.length}`;
|
|
185
188
|
}
|
|
186
189
|
if (commandInfo.subcommand === "route") {
|
|
187
190
|
const routed = getStringField(data, "routed") ?? getStringField(data, "url") ?? getStringField(data, "pattern");
|
|
@@ -228,12 +231,12 @@ export function formatDiagnosticSummary(commandInfo, data) {
|
|
|
228
231
|
if (commandInfo.command === "console") {
|
|
229
232
|
const messages = getArrayField(data, "messages");
|
|
230
233
|
if (messages)
|
|
231
|
-
return `Console messages: ${messages.length}`;
|
|
234
|
+
return isClearDiagnosticCommand(commandInfo) ? `Console reset: ${messages.length} cleared` : `Console messages: ${messages.length}`;
|
|
232
235
|
}
|
|
233
236
|
if (commandInfo.command === "errors") {
|
|
234
237
|
const errors = getArrayField(data, "errors");
|
|
235
238
|
if (errors)
|
|
236
|
-
return `Page errors: ${errors.length}`;
|
|
239
|
+
return isClearDiagnosticCommand(commandInfo) ? `Page errors reset: ${errors.length} cleared` : `Page errors: ${errors.length}`;
|
|
237
240
|
}
|
|
238
241
|
if (commandInfo.command === "dashboard") {
|
|
239
242
|
if (typeof data.port === "number")
|
|
@@ -344,10 +347,15 @@ function formatNetworkRequestLine(item, index) {
|
|
|
344
347
|
appendNetworkPreview(lines, "Error", getPreviewCandidate(item, NETWORK_PREVIEW_FIELD_CANDIDATES.error), NETWORK_ERROR_PREVIEW_MAX_CHARS);
|
|
345
348
|
return lines;
|
|
346
349
|
}
|
|
347
|
-
function formatNetworkRequestsText(data) {
|
|
350
|
+
function formatNetworkRequestsText(data, commandInfo) {
|
|
348
351
|
const requests = getArrayField(data, "requests");
|
|
349
352
|
if (!requests)
|
|
350
353
|
return undefined;
|
|
354
|
+
if (isClearDiagnosticCommand(commandInfo)) {
|
|
355
|
+
return requests.length === 0
|
|
356
|
+
? "Network request buffer cleared; no prior request rows were returned. This reset output is not evidence of current-page network activity."
|
|
357
|
+
: `Network request buffer cleared; upstream returned ${requests.length} cleared/stale row${requests.length === 1 ? "" : "s"}. Treat these as reset output, not current-page request failures.`;
|
|
358
|
+
}
|
|
351
359
|
if (requests.length === 0)
|
|
352
360
|
return "No network requests captured. Scope: upstream session aggregate unless the upstream command output says it was cleared or filtered for this page.";
|
|
353
361
|
const shown = ["Scope: upstream session aggregate unless the upstream command output says it was cleared or filtered for this page; do not attribute old requests to the current page without URL/time evidence."];
|
|
@@ -584,10 +592,15 @@ export function buildStreamNextActions(commandInfo, data, sessionName) {
|
|
|
584
592
|
},
|
|
585
593
|
];
|
|
586
594
|
}
|
|
587
|
-
function formatConsoleText(data) {
|
|
595
|
+
function formatConsoleText(data, commandInfo) {
|
|
588
596
|
const messages = getArrayField(data, "messages");
|
|
589
597
|
if (!messages)
|
|
590
598
|
return undefined;
|
|
599
|
+
if (isClearDiagnosticCommand(commandInfo)) {
|
|
600
|
+
return messages.length === 0
|
|
601
|
+
? "Console buffer cleared; no prior message rows were returned. This reset output is not evidence of current-page console activity."
|
|
602
|
+
: `Console buffer cleared; upstream returned ${messages.length} cleared/stale message row${messages.length === 1 ? "" : "s"}. Treat these as reset output, not current-page console errors.`;
|
|
603
|
+
}
|
|
591
604
|
if (messages.length === 0)
|
|
592
605
|
return "No console messages. Scope: upstream session aggregate unless the upstream command output says it was cleared or filtered for this page.";
|
|
593
606
|
const shown = ["Scope: upstream session aggregate unless the upstream command output says it was cleared or filtered for this page; do not attribute old messages to the current page without URL/time evidence."];
|
|
@@ -604,10 +617,15 @@ function formatConsoleText(data) {
|
|
|
604
617
|
}
|
|
605
618
|
return shown.join("\n");
|
|
606
619
|
}
|
|
607
|
-
function formatErrorsText(data) {
|
|
620
|
+
function formatErrorsText(data, commandInfo) {
|
|
608
621
|
const errors = getArrayField(data, "errors");
|
|
609
622
|
if (!errors)
|
|
610
623
|
return undefined;
|
|
624
|
+
if (isClearDiagnosticCommand(commandInfo)) {
|
|
625
|
+
return errors.length === 0
|
|
626
|
+
? "Page error buffer cleared; no prior error rows were returned. This reset output is not evidence of current-page errors."
|
|
627
|
+
: `Page error buffer cleared; upstream returned ${errors.length} cleared/stale error row${errors.length === 1 ? "" : "s"}. Treat these as reset output, not current-page errors.`;
|
|
628
|
+
}
|
|
611
629
|
if (errors.length === 0)
|
|
612
630
|
return "No page errors.";
|
|
613
631
|
const shown = errors.slice(0, DIAGNOSTIC_LOG_PREVIEW_LIMIT).map((item, index) => {
|
|
@@ -927,7 +945,7 @@ export function formatDiagnosticText(commandInfo, data) {
|
|
|
927
945
|
if (commandInfo.command === "state")
|
|
928
946
|
return formatStateText(data);
|
|
929
947
|
if (commandInfo.command === "network" && commandInfo.subcommand === "requests")
|
|
930
|
-
return formatNetworkRequestsText(data);
|
|
948
|
+
return formatNetworkRequestsText(data, commandInfo);
|
|
931
949
|
if (commandInfo.command === "network" && commandInfo.subcommand === "request")
|
|
932
950
|
return formatNetworkRequestText(data);
|
|
933
951
|
if (commandInfo.command === "diff")
|
|
@@ -945,9 +963,9 @@ export function formatDiagnosticText(commandInfo, data) {
|
|
|
945
963
|
if (commandInfo.command === "chat")
|
|
946
964
|
return formatChatText(data);
|
|
947
965
|
if (commandInfo.command === "console")
|
|
948
|
-
return formatConsoleText(data);
|
|
966
|
+
return formatConsoleText(data, commandInfo);
|
|
949
967
|
if (commandInfo.command === "errors")
|
|
950
|
-
return formatErrorsText(data);
|
|
968
|
+
return formatErrorsText(data, commandInfo);
|
|
951
969
|
if (commandInfo.command === "dashboard")
|
|
952
970
|
return formatDashboardText(data);
|
|
953
971
|
if (commandInfo.command === "doctor")
|
|
@@ -13,6 +13,8 @@ const LARGE_OUTPUT_INLINE_MAX_CHARS = 8_000;
|
|
|
13
13
|
const LARGE_OUTPUT_INLINE_MAX_LINES = 120;
|
|
14
14
|
const LARGE_OUTPUT_PREVIEW_MAX_CHARS = 2_500;
|
|
15
15
|
const LARGE_OUTPUT_PREVIEW_MAX_LINES = 40;
|
|
16
|
+
const LARGE_OUTPUT_PREVIEW_MAX_LINE_CHARS = 240;
|
|
17
|
+
const LARGE_OUTPUT_FAILURE_COMMAND_MAX_CHARS = 240;
|
|
16
18
|
const LARGE_OUTPUT_FILE_PREFIX = "pi-agent-browser-output";
|
|
17
19
|
function shouldCompactLargeOutput(text) {
|
|
18
20
|
return text.length > LARGE_OUTPUT_INLINE_MAX_CHARS || countLines(text) > LARGE_OUTPUT_INLINE_MAX_LINES;
|
|
@@ -26,7 +28,7 @@ function buildLargeOutputPreview(text) {
|
|
|
26
28
|
break;
|
|
27
29
|
}
|
|
28
30
|
const remainingChars = LARGE_OUTPUT_PREVIEW_MAX_CHARS - previewChars;
|
|
29
|
-
const previewLine = truncateText(line, Math.max(40, remainingChars));
|
|
31
|
+
const previewLine = truncateText(line, Math.min(Math.max(40, remainingChars), LARGE_OUTPUT_PREVIEW_MAX_LINE_CHARS));
|
|
30
32
|
previewLines.push(previewLine);
|
|
31
33
|
previewChars += previewLine.length + 1;
|
|
32
34
|
}
|
|
@@ -35,6 +37,27 @@ function buildLargeOutputPreview(text) {
|
|
|
35
37
|
previewText: previewLines.join("\n"),
|
|
36
38
|
};
|
|
37
39
|
}
|
|
40
|
+
function buildLargeOutputFailureContext(presentation) {
|
|
41
|
+
const failure = presentation.batchFailure;
|
|
42
|
+
if (!failure)
|
|
43
|
+
return [];
|
|
44
|
+
const failedStep = failure.failedStep;
|
|
45
|
+
const commandText = truncateText(failedStep.commandText, LARGE_OUTPUT_FAILURE_COMMAND_MAX_CHARS);
|
|
46
|
+
const lines = [
|
|
47
|
+
"Failure context:",
|
|
48
|
+
`- First failing step: ${failedStep.index + 1} — ${commandText}`,
|
|
49
|
+
`- Batch result: ${failure.successCount}/${failure.totalCount} succeeded${failure.failureCount > 1 ? `; ${failure.failureCount} failed` : ""}`,
|
|
50
|
+
];
|
|
51
|
+
if (failedStep.failureCategory)
|
|
52
|
+
lines.push(`- Failure category: ${failedStep.failureCategory}`);
|
|
53
|
+
const failureText = (failedStep.text || failedStep.summary).replace(/\s+/g, " ").trim();
|
|
54
|
+
if (failureText)
|
|
55
|
+
lines.push(`- Failure detail: ${truncateText(failureText, 700)}`);
|
|
56
|
+
const stepPaths = [failedStep.fullOutputPath, ...(failedStep.fullOutputPaths ?? [])].filter((path, index, paths) => typeof path === "string" && path.length > 0 && paths.indexOf(path) === index);
|
|
57
|
+
if (stepPaths.length > 0)
|
|
58
|
+
lines.push(`- Failed-step spill path${stepPaths.length === 1 ? "" : "s"}: ${stepPaths.join(", ")}`);
|
|
59
|
+
return lines;
|
|
60
|
+
}
|
|
38
61
|
async function writeLargeOutputSpillFile(options) {
|
|
39
62
|
const payload = typeof options.data === "string"
|
|
40
63
|
? redactModelFacingText(options.data)
|
|
@@ -91,8 +114,10 @@ export async function compactLargePresentationOutput(options) {
|
|
|
91
114
|
}
|
|
92
115
|
const { omittedLineCount, previewText } = buildLargeOutputPreview(text);
|
|
93
116
|
const commandLabel = options.commandInfo.command ?? "agent-browser";
|
|
117
|
+
const failureContext = buildLargeOutputFailureContext(options.presentation);
|
|
94
118
|
const lines = [
|
|
95
119
|
`Large ${commandLabel} output compacted.`,
|
|
120
|
+
...(failureContext.length > 0 ? ["", ...failureContext] : []),
|
|
96
121
|
"",
|
|
97
122
|
"Preview:",
|
|
98
123
|
previewText,
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* Scope: Presentation shaping only; upstream stdout parsing and snapshot compaction internals live in separate modules.
|
|
5
5
|
*/
|
|
6
6
|
import { isRecord } from "../parsing.js";
|
|
7
|
+
import { extractCommandTokens } from "../runtime.js";
|
|
7
8
|
import { buildAgentBrowserNextActions } from "./action-recommendations.js";
|
|
8
9
|
import { buildAgentBrowserResultCategoryDetails } from "./categories.js";
|
|
9
10
|
import { detectConfirmationRequired } from "./confirmation.js";
|
|
@@ -37,16 +38,17 @@ function shouldAddAnnotatedScreenshotGuidance(commandInfo, args) {
|
|
|
37
38
|
}
|
|
38
39
|
export async function buildToolPresentation(options) {
|
|
39
40
|
const { args, artifactManifest, artifactRequest, commandInfo, compiledSemanticAction, cwd, envelope, errorText, networkRouteDiagnostics, networkRoutes, persistentArtifactStore, sessionName, } = options;
|
|
40
|
-
const
|
|
41
|
+
const commandInfoWithTokens = commandInfo.commandTokens || !args ? commandInfo : { ...commandInfo, commandTokens: extractCommandTokens(args) };
|
|
42
|
+
const presentationCommandInfo = resolvePresentationCommandInfo(commandInfoWithTokens, compiledSemanticAction);
|
|
41
43
|
if (errorText) {
|
|
42
44
|
return buildErrorPresentation({ args, commandInfo, errorText, sessionName });
|
|
43
45
|
}
|
|
44
|
-
const data = enrichStreamStatusData(
|
|
45
|
-
const presentationData = redactPresentationData(
|
|
46
|
+
const data = enrichStreamStatusData(commandInfoWithTokens, envelope?.data);
|
|
47
|
+
const presentationData = redactPresentationData(commandInfoWithTokens, data);
|
|
46
48
|
const artifacts = await extractFileArtifacts({ artifactManifest, artifactRequest, commandInfo: presentationCommandInfo, cwd, data, sessionName });
|
|
47
49
|
const artifactVerification = buildArtifactVerificationSummary(artifacts);
|
|
48
50
|
const artifactSummary = formatArtifactSummary(artifacts);
|
|
49
|
-
const summary = artifactSummary ?? formatPresentationSummary(
|
|
51
|
+
const summary = artifactSummary ?? formatPresentationSummary(commandInfoWithTokens, data, compiledSemanticAction);
|
|
50
52
|
const artifactText = artifacts.length > 0 ? formatArtifactMetadataLines(artifacts).join("\n") : undefined;
|
|
51
53
|
let presentation;
|
|
52
54
|
if (commandInfo.command === "batch" && isAgentBrowserBatchResultArray(data)) {
|
|
@@ -69,7 +71,7 @@ export async function buildToolPresentation(options) {
|
|
|
69
71
|
presentation = {
|
|
70
72
|
artifactVerification,
|
|
71
73
|
artifacts: artifacts.length > 0 ? artifacts : undefined,
|
|
72
|
-
content: [{ type: "text", text: artifactText ?? formatPresentationContentText(
|
|
74
|
+
content: [{ type: "text", text: artifactText ?? formatPresentationContentText(commandInfoWithTokens, data, compiledSemanticAction) }],
|
|
73
75
|
data: presentationData,
|
|
74
76
|
summary,
|
|
75
77
|
};
|
|
@@ -160,10 +162,10 @@ export async function buildToolPresentation(options) {
|
|
|
160
162
|
savedFilePath: presentationWithManifest.savedFilePath,
|
|
161
163
|
successCategory: presentationWithManifest.successCategory,
|
|
162
164
|
});
|
|
163
|
-
const networkNextActions =
|
|
165
|
+
const networkNextActions = commandInfoWithTokens.command === "network" && commandInfoWithTokens.subcommand === "requests" && presentationWithManifest.resultCategory === "success"
|
|
164
166
|
? buildNetworkRequestsNextActions(data, sessionName, presentationWithManifest.networkRouteDiagnostics)
|
|
165
167
|
: undefined;
|
|
166
|
-
const streamNextActions = presentationWithManifest.resultCategory === "success" ? buildStreamNextActions(
|
|
168
|
+
const streamNextActions = presentationWithManifest.resultCategory === "success" ? buildStreamNextActions(commandInfoWithTokens, data, sessionName) : undefined;
|
|
167
169
|
presentationWithManifest.nextActions = mergeNextActions(presentationWithManifest.nextActions, genericNextActions, networkNextActions, streamNextActions);
|
|
168
170
|
presentationWithManifest.pageChangeSummary = presentationWithManifest.pageChangeSummary ?? buildPageChangeSummary({
|
|
169
171
|
artifacts: presentationWithManifest.artifacts,
|