pi-agent-browser-native 0.2.43 → 0.2.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/README.md +26 -16
- package/docs/ARCHITECTURE.md +12 -10
- package/docs/COMMAND_REFERENCE.md +49 -27
- package/docs/ELECTRON.md +1 -1
- package/docs/RELEASE.md +16 -9
- package/docs/REQUIREMENTS.md +6 -3
- package/docs/SUPPORT_MATRIX.md +18 -14
- package/docs/TOOL_CONTRACT.md +87 -46
- package/docs/platform-smoke.md +15 -9
- package/extensions/agent-browser/index.ts +29 -445
- package/extensions/agent-browser/lib/bash-guard.ts +205 -0
- package/extensions/agent-browser/lib/electron/cdp.ts +69 -0
- package/extensions/agent-browser/lib/electron/cleanup.ts +5 -58
- package/extensions/agent-browser/lib/electron/discovery.ts +2 -9
- package/extensions/agent-browser/lib/electron/launch.ts +11 -65
- package/extensions/agent-browser/lib/electron/text.ts +13 -0
- package/extensions/agent-browser/lib/fs-utils.ts +18 -0
- package/extensions/agent-browser/lib/input-modes/job.ts +207 -21
- package/extensions/agent-browser/lib/input-modes/params.ts +17 -7
- package/extensions/agent-browser/lib/input-modes/semantic-action.ts +22 -2
- package/extensions/agent-browser/lib/input-modes/types.ts +5 -1
- package/extensions/agent-browser/lib/input-modes.ts +1 -0
- package/extensions/agent-browser/lib/orchestration/browser-run/click-dispatch.ts +82 -11
- package/extensions/agent-browser/lib/orchestration/browser-run/diagnostics.ts +153 -30
- package/extensions/agent-browser/lib/orchestration/browser-run/final-result.ts +53 -2
- package/extensions/agent-browser/lib/orchestration/browser-run/index.ts +1 -0
- package/extensions/agent-browser/lib/orchestration/browser-run/prepare.ts +751 -32
- package/extensions/agent-browser/lib/orchestration/browser-run/process-output.ts +38 -7
- package/extensions/agent-browser/lib/orchestration/browser-run/prompt-guards.ts +0 -46
- package/extensions/agent-browser/lib/orchestration/browser-run/session-state.ts +10 -1
- package/extensions/agent-browser/lib/orchestration/browser-run/types.ts +28 -1
- package/extensions/agent-browser/lib/orchestration/electron-host/index.ts +1 -6
- package/extensions/agent-browser/lib/orchestration/input-plan.ts +15 -3
- package/extensions/agent-browser/lib/orchestration/output-file.ts +86 -0
- package/extensions/agent-browser/lib/pi-tool-rendering.ts +231 -0
- package/extensions/agent-browser/lib/playbook.ts +26 -26
- package/extensions/agent-browser/lib/process.ts +1 -1
- package/extensions/agent-browser/lib/prompt-policy.ts +1 -18
- package/extensions/agent-browser/lib/results/artifact-manifest.ts +1 -4
- package/extensions/agent-browser/lib/results/artifact-state.ts +7 -3
- package/extensions/agent-browser/lib/results/contracts.ts +6 -2
- package/extensions/agent-browser/lib/results/envelope.ts +11 -2
- package/extensions/agent-browser/lib/results/network-routes.ts +7 -4
- package/extensions/agent-browser/lib/results/network.ts +7 -1
- package/extensions/agent-browser/lib/results/presentation/artifacts.ts +88 -20
- package/extensions/agent-browser/lib/results/presentation/batch.ts +84 -12
- package/extensions/agent-browser/lib/results/presentation/diagnostics.ts +81 -26
- package/extensions/agent-browser/lib/results/presentation/errors.ts +13 -0
- package/extensions/agent-browser/lib/results/presentation/registry.ts +60 -0
- package/extensions/agent-browser/lib/results/presentation.ts +10 -1
- package/extensions/agent-browser/lib/results/snapshot-high-value-controls.ts +16 -5
- package/extensions/agent-browser/lib/results/snapshot.ts +2 -0
- package/extensions/agent-browser/lib/runtime.ts +10 -1
- package/extensions/agent-browser/lib/session-page-state.ts +15 -6
- package/extensions/agent-browser/lib/web-search.ts +1 -1
- package/package.json +5 -5
- package/platform-smoke.config.mjs +15 -3
- package/scripts/doctor.mjs +70 -1
- package/scripts/platform-smoke/build-ubuntu-image.mjs +25 -0
- package/scripts/platform-smoke/crabbox-runner.mjs +62 -30
- package/scripts/platform-smoke/doctor.mjs +28 -11
- package/scripts/platform-smoke/linux-image/Dockerfile +3 -5
- package/scripts/platform-smoke/targets.mjs +60 -22
- package/scripts/platform-smoke.mjs +1 -0
- package/extensions/agent-browser/lib/orchestration/browser-run/browser-action-model.ts +0 -154
package/docs/platform-smoke.md
CHANGED
|
@@ -6,15 +6,18 @@ This is a release-blocking gate. Missing Crabbox setup, Docker, macOS SSH, the n
|
|
|
6
6
|
|
|
7
7
|
## Required release gate
|
|
8
8
|
|
|
9
|
-
Run the cheap harness checks first, then the full matrix:
|
|
9
|
+
Run the cheap harness checks first, build the project-owned Ubuntu image, run doctor explicitly, then run the full matrix and inspect the evidence:
|
|
10
10
|
|
|
11
11
|
```sh
|
|
12
12
|
npm run check:platform-smoke
|
|
13
13
|
npm run smoke:platform:ubuntu-image
|
|
14
|
+
npm run smoke:platform:doctor
|
|
14
15
|
npm run smoke:platform:all
|
|
16
|
+
crabbox list --provider local-container
|
|
17
|
+
crabbox list --provider parallels
|
|
15
18
|
```
|
|
16
19
|
|
|
17
|
-
`smoke:platform:all` runs `smoke:platform:doctor` before any target suite starts. The canonical `npm run verify -- release` gate also runs the same platform doctor and full `macos,ubuntu,windows-native` matrix after default verification and packaged Pi smoke, so `npm publish` cannot pass `prepublishOnly` without the platform
|
|
20
|
+
`smoke:platform:all` also runs `smoke:platform:doctor` before any target suite starts, so the explicit doctor step is a readable release checklist step rather than a hidden precondition. The canonical `npm run verify -- release` gate also runs the configured-source lifecycle harness, then the same platform doctor and full `macos,ubuntu,windows-native` matrix after default verification and packaged Pi smoke, so `npm publish` cannot pass `prepublishOnly` without lifecycle and platform gates. After the matrix, inspect `.artifacts/platform-smoke/<run-id>/...` summaries and manifests; a green Crabbox exit without matching suite assertions is not release proof. Use provider-specific `crabbox list` commands for cleanup review because this host may have unrelated Crabbox providers configured that require credentials.
|
|
18
21
|
|
|
19
22
|
Per-target commands are for diagnosis:
|
|
20
23
|
|
|
@@ -43,7 +46,7 @@ crabbox --version
|
|
|
43
46
|
crabbox providers
|
|
44
47
|
```
|
|
45
48
|
|
|
46
|
-
Use `PLATFORM_SMOKE_CRABBOX=/path/to/crabbox` only when testing a non-default Crabbox binary.
|
|
49
|
+
Use Crabbox `0.26.0` or newer. Use `PLATFORM_SMOKE_CRABBOX=/path/to/crabbox` only when testing a non-default Crabbox binary.
|
|
47
50
|
|
|
48
51
|
Standard configuration knobs:
|
|
49
52
|
|
|
@@ -51,9 +54,12 @@ Standard configuration knobs:
|
|
|
51
54
|
PLATFORM_SMOKE_MAC_HOST=localhost
|
|
52
55
|
PLATFORM_SMOKE_MAC_USER="$USER"
|
|
53
56
|
PLATFORM_SMOKE_MAC_WORK_ROOT="/Users/$USER/crabbox/pi-agent-browser-native"
|
|
57
|
+
# Optional only when localhost SSH does not use port 22.
|
|
58
|
+
PLATFORM_SMOKE_MAC_PORT=22
|
|
54
59
|
|
|
55
60
|
# Default local image built by npm run smoke:platform:ubuntu-image.
|
|
56
|
-
|
|
61
|
+
# The tag suffix is derived from scripts/agent-browser-capability-baseline.mjs.
|
|
62
|
+
PLATFORM_SMOKE_UBUNTU_IMAGE="pi-agent-browser-native-platform:node24-agent-browser<baseline-version>"
|
|
57
63
|
|
|
58
64
|
PLATFORM_SMOKE_WINDOWS_VM="pi-extension-windows-template"
|
|
59
65
|
PLATFORM_SMOKE_WINDOWS_SNAPSHOT="crabbox-ready"
|
|
@@ -64,7 +70,7 @@ PLATFORM_SMOKE_WINDOWS_WORK_ROOT="C:\\crabbox\\pi-agent-browser-native"
|
|
|
64
70
|
PLATFORM_SMOKE_AUTH_ENV=""
|
|
65
71
|
```
|
|
66
72
|
|
|
67
|
-
The Ubuntu target image is derived from `node:24-bookworm`, installs `agent-browser
|
|
73
|
+
The Ubuntu target image is derived from `node:24-bookworm`, installs the `agent-browser` version from [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs), installs Debian Chromium through apt, creates a non-root `circleci` user, and sets `AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium`. Rebuild it after upstream rebaselining, or override `PLATFORM_SMOKE_UBUNTU_IMAGE` with an equivalent prepared local image. Do not install `agent-browser` ad hoc inside the Ubuntu smoke command; a missing tool is image/template drift.
|
|
68
74
|
|
|
69
75
|
The configured upstream `agent-browser` baseline is imported from [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs). Target-local browser suites verify that exact `agent-browser` version before running. Bake the exact upstream CLI and browser runtime into the Windows template/snapshot for speed and reproducibility; missing or stale Windows `agent-browser` / browser readiness is a blocked setup, not something the smoke command repairs. The Windows browser suite checks the preinstalled browser cache and prewarms one short local file URL before the extension harness runs.
|
|
70
76
|
|
|
@@ -78,7 +84,7 @@ Crabbox does not install project runtime tools. The macOS host, Ubuntu image, an
|
|
|
78
84
|
- Browser/runtime dependencies needed by upstream `agent-browser`.
|
|
79
85
|
- Native PowerShell and OpenSSH Server on Windows.
|
|
80
86
|
|
|
81
|
-
For Windows, reuse `pi-extension-windows-template` with the shared canonical `crabbox-ready` power-off snapshot. Do not create one-off project VMs. If a reusable tool is missing, update the shared template, verify from a fresh SSH session, remove caches/secrets/checkouts, shut down cleanly, and promote a known-good power-off snapshot.
|
|
87
|
+
For Windows, reuse `pi-extension-windows-template` with the shared canonical `crabbox-ready` power-off snapshot configured in [`platform-smoke.config.mjs`](../platform-smoke.config.mjs). Do not create one-off project VMs or run tests directly on the source VM. If a reusable tool is missing, update the shared template, verify from a fresh SSH session, remove caches/secrets/checkouts, shut down cleanly, and promote a known-good power-off snapshot.
|
|
82
88
|
|
|
83
89
|
## What the suites prove
|
|
84
90
|
|
|
@@ -108,7 +114,7 @@ The dogfood suite intentionally uses the checkout harness while `platform-build`
|
|
|
108
114
|
|
|
109
115
|
## Artifact contract
|
|
110
116
|
|
|
111
|
-
Every target
|
|
117
|
+
Every target run writes host-side evidence under one run id shared by that target’s suites:
|
|
112
118
|
|
|
113
119
|
```text
|
|
114
120
|
.artifacts/platform-smoke/<run-id>/<target>/<suite>/
|
|
@@ -117,9 +123,9 @@ Every target suite writes host-side evidence under:
|
|
|
117
123
|
Required files include:
|
|
118
124
|
|
|
119
125
|
```text
|
|
120
|
-
summary.json
|
|
126
|
+
summary.json # includes ok, target, suite, exit code, elapsed time, writtenAt
|
|
121
127
|
artifact-manifest.json
|
|
122
|
-
target.json
|
|
128
|
+
target.json # package, package version, Crabbox binary/version, provider, work root/image/template
|
|
123
129
|
suite.json
|
|
124
130
|
command.txt
|
|
125
131
|
exit-code.txt
|
|
@@ -7,19 +7,14 @@
|
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
import type { ChildProcess } from "node:child_process";
|
|
10
|
-
import { readFile } from "node:fs/promises";
|
|
11
10
|
import { dirname, join, resolve } from "node:path";
|
|
12
11
|
import { fileURLToPath } from "node:url";
|
|
13
12
|
|
|
14
13
|
import {
|
|
15
|
-
highlightCode,
|
|
16
14
|
isToolCallEventType,
|
|
17
|
-
keyHint,
|
|
18
15
|
type AgentToolResult,
|
|
19
16
|
type ExtensionAPI,
|
|
20
17
|
type ExtensionContext,
|
|
21
|
-
type Theme,
|
|
22
|
-
type ToolResultEvent,
|
|
23
18
|
} from "@earendil-works/pi-coding-agent";
|
|
24
19
|
import { Text } from "@earendil-works/pi-tui";
|
|
25
20
|
import {
|
|
@@ -42,12 +37,12 @@ import {
|
|
|
42
37
|
getImplicitSessionIdleTimeoutMs,
|
|
43
38
|
hasLaunchScopedTabCorrectionFlag,
|
|
44
39
|
extractExplicitSessionName,
|
|
45
|
-
redactInvocationArgs,
|
|
46
40
|
restoreManagedSessionStateFromBranch,
|
|
47
41
|
resolveManagedSessionState,
|
|
48
42
|
validateToolArgs,
|
|
49
43
|
type CompatibilityWorkaround,
|
|
50
44
|
} from "./lib/runtime.js";
|
|
45
|
+
import { isRecord } from "./lib/parsing.js";
|
|
51
46
|
import { buildPromptPolicy, getLatestUserPrompt, shouldAppendBrowserSystemPrompt } from "./lib/prompt-policy.js";
|
|
52
47
|
import { isCloseCommand } from "./lib/command-taxonomy.js";
|
|
53
48
|
import {
|
|
@@ -97,6 +92,7 @@ import {
|
|
|
97
92
|
type ElectronLaunchRecord,
|
|
98
93
|
} from "./lib/orchestration/electron-host/index.js";
|
|
99
94
|
import { buildValidationFailureResult, resolveAgentBrowserInput } from "./lib/orchestration/input-plan.js";
|
|
95
|
+
import { applyAgentBrowserOutputPath } from "./lib/orchestration/output-file.js";
|
|
100
96
|
import type { NetworkRouteRecord } from "./lib/results/contracts.js";
|
|
101
97
|
import type { SessionArtifactManifest } from "./lib/results/contracts.js";
|
|
102
98
|
import {
|
|
@@ -121,153 +117,19 @@ import {
|
|
|
121
117
|
import { withOptionalSessionArgs } from "./lib/results/next-actions.js";
|
|
122
118
|
import { canRegisterWebSearchTool, loadAgentBrowserConfigSync } from "./lib/config.js";
|
|
123
119
|
import { createAgentBrowserWebSearchTool } from "./lib/web-search.js";
|
|
120
|
+
import {
|
|
121
|
+
isDirectAgentBrowserBashAllowed,
|
|
122
|
+
isHarmlessAgentBrowserInspectionCommand,
|
|
123
|
+
looksLikeDirectAgentBrowserBash,
|
|
124
|
+
} from "./lib/bash-guard.js";
|
|
125
|
+
import {
|
|
126
|
+
AgentBrowserResultComponent,
|
|
127
|
+
buildAgentBrowserToolResultPatch,
|
|
128
|
+
formatAgentBrowserRenderCall,
|
|
129
|
+
formatAgentBrowserRenderResult,
|
|
130
|
+
} from "./lib/pi-tool-rendering.js";
|
|
124
131
|
|
|
125
132
|
const DEFAULT_SESSION_MODE = "auto" as const;
|
|
126
|
-
const DIRECT_AGENT_BROWSER_BASH_BYPASS_ENV = "PI_AGENT_BROWSER_ALLOW_DIRECT_BASH";
|
|
127
|
-
const PACKAGE_NAME = "pi-agent-browser-native";
|
|
128
|
-
|
|
129
|
-
const TUI_COLLAPSED_OUTPUT_MAX_LINES = 10;
|
|
130
|
-
const TUI_INVOCATION_PREVIEW_MAX_CHARS = 120;
|
|
131
|
-
const ANSI_CONTROL_SEQUENCE_PATTERN = /\x1B(?:\][^\x07\x1B]*(?:\x07|\x1B\\)|\[[0-?]*[ -/]*[@-~]|P[^\x1B]*(?:\x1B\\)|_[^\x1B]*(?:\x1B\\)|\^[^\x1B]*(?:\x1B\\)|[@-Z\\-_])/g;
|
|
132
|
-
const UNSAFE_DISPLAY_CONTROL_PATTERN = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x80-\x9F]/g;
|
|
133
|
-
|
|
134
|
-
function sanitizeDisplayText(text: string): string {
|
|
135
|
-
return text
|
|
136
|
-
.replace(ANSI_CONTROL_SEQUENCE_PATTERN, "")
|
|
137
|
-
.replace(/\r/g, "")
|
|
138
|
-
.replace(UNSAFE_DISPLAY_CONTROL_PATTERN, "�");
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
function replaceTabsForDisplay(text: string): string {
|
|
142
|
-
return text.replaceAll("\t", " ");
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
function trimTrailingBlankLines(lines: string[]): string[] {
|
|
146
|
-
let end = lines.length;
|
|
147
|
-
while (end > 0 && lines[end - 1].trim().length === 0) {
|
|
148
|
-
end -= 1;
|
|
149
|
-
}
|
|
150
|
-
return lines.slice(0, end);
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
function isJsonDocumentText(text: string): boolean {
|
|
154
|
-
const trimmed = text.trim();
|
|
155
|
-
if (!trimmed.startsWith("{") && !trimmed.startsWith("[")) {
|
|
156
|
-
return false;
|
|
157
|
-
}
|
|
158
|
-
try {
|
|
159
|
-
JSON.parse(trimmed);
|
|
160
|
-
return true;
|
|
161
|
-
} catch {
|
|
162
|
-
return false;
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
function getPrimaryTextContent(result: AgentToolResult<unknown>): string {
|
|
167
|
-
const textContent = result.content.find((item) => item.type === "text");
|
|
168
|
-
return textContent?.type === "text" ? textContent.text : "";
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
function colorizeToolOutputLines(text: string, theme: Theme, isError: boolean): string[] {
|
|
172
|
-
const normalizedLines = trimTrailingBlankLines(replaceTabsForDisplay(sanitizeDisplayText(text)).split("\n"));
|
|
173
|
-
const normalizedText = normalizedLines.join("\n");
|
|
174
|
-
if (normalizedText.length === 0) {
|
|
175
|
-
return [];
|
|
176
|
-
}
|
|
177
|
-
if (isJsonDocumentText(normalizedText)) {
|
|
178
|
-
return highlightCode(normalizedText, "json");
|
|
179
|
-
}
|
|
180
|
-
return normalizedLines.map((line) => {
|
|
181
|
-
if (line.length === 0) {
|
|
182
|
-
return "";
|
|
183
|
-
}
|
|
184
|
-
return isError ? theme.fg("error", line) : theme.fg("toolOutput", line);
|
|
185
|
-
});
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
function formatExpandHint(theme: Theme): string {
|
|
189
|
-
try {
|
|
190
|
-
return keyHint("app.tools.expand", "to expand");
|
|
191
|
-
} catch {
|
|
192
|
-
return `${theme.fg("dim", "ctrl+o")} ${theme.fg("muted", "to expand")}`;
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
function formatVisualTruncationNotice(remainingLines: number, totalLines: number, theme: Theme): string {
|
|
197
|
-
return `${theme.fg("muted", `... (${remainingLines} more lines, ${totalLines} total, `)}${formatExpandHint(theme)}${theme.fg("muted", ")")}`;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
function formatAgentBrowserRenderCall(args: unknown, theme: Theme): string {
|
|
201
|
-
const input = isRecord(args) ? args : {};
|
|
202
|
-
const semanticAction = compileAgentBrowserSemanticAction(input.semanticAction);
|
|
203
|
-
const job = compileAgentBrowserJob(input.job);
|
|
204
|
-
const qa = compileAgentBrowserQaPreset(input.qa);
|
|
205
|
-
const sourceLookup = compileAgentBrowserSourceLookup(input.sourceLookup);
|
|
206
|
-
const networkSourceLookup = compileAgentBrowserNetworkSourceLookup(input.networkSourceLookup);
|
|
207
|
-
const electron = compileAgentBrowserElectron(input.electron);
|
|
208
|
-
const generatedBatch = networkSourceLookup.compiled ?? sourceLookup.compiled ?? job.compiled ?? qa.compiled;
|
|
209
|
-
const rawArgs = Array.isArray(input.args)
|
|
210
|
-
? input.args.filter((value): value is string => typeof value === "string")
|
|
211
|
-
: electron.compiled
|
|
212
|
-
? ["electron", electron.compiled.action]
|
|
213
|
-
: (semanticAction.compiled?.args ?? generatedBatch?.args ?? []);
|
|
214
|
-
const redactedArgs = redactInvocationArgs(rawArgs);
|
|
215
|
-
const invocation = sanitizeDisplayText(redactedArgs.join(" ")).replace(/\s+/g, " ").trim();
|
|
216
|
-
const invocationPreview =
|
|
217
|
-
invocation.length > TUI_INVOCATION_PREVIEW_MAX_CHARS
|
|
218
|
-
? `${invocation.slice(0, TUI_INVOCATION_PREVIEW_MAX_CHARS - 3)}...`
|
|
219
|
-
: invocation;
|
|
220
|
-
let text = theme.fg("toolTitle", theme.bold("agent_browser"));
|
|
221
|
-
if (invocationPreview.length > 0) {
|
|
222
|
-
text += ` ${theme.fg("accent", invocationPreview)}`;
|
|
223
|
-
}
|
|
224
|
-
if (input.sessionMode === "fresh") {
|
|
225
|
-
text += theme.fg("dim", " sessionMode=fresh");
|
|
226
|
-
}
|
|
227
|
-
if (typeof input.stdin === "string") {
|
|
228
|
-
text += theme.fg("dim", " + stdin");
|
|
229
|
-
}
|
|
230
|
-
return text;
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
function formatAgentBrowserRenderResult(
|
|
234
|
-
result: AgentToolResult<unknown>,
|
|
235
|
-
options: { expanded: boolean; isPartial: boolean },
|
|
236
|
-
theme: Theme,
|
|
237
|
-
isError: boolean,
|
|
238
|
-
): string {
|
|
239
|
-
if (options.isPartial) {
|
|
240
|
-
return theme.fg("warning", "Running agent-browser...");
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
const outputText = getPrimaryTextContent(result);
|
|
244
|
-
const outputLines = colorizeToolOutputLines(outputText, theme, isError);
|
|
245
|
-
if (outputLines.length === 0) {
|
|
246
|
-
const details = isRecord(result.details) ? result.details : undefined;
|
|
247
|
-
const rawSummary = typeof details?.summary === "string" ? details.summary : isError ? "agent-browser failed" : "Done";
|
|
248
|
-
const sanitizedSummary = sanitizeDisplayText(rawSummary).trim();
|
|
249
|
-
const summary = sanitizedSummary.length > 0 ? sanitizedSummary : isError ? "agent-browser failed" : "Done";
|
|
250
|
-
return isError ? theme.fg("error", summary) : theme.fg("success", summary);
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
return `\n${outputLines.join("\n")}`;
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
function formatModelVisibleFailureCategoryNotice(details: unknown): string | undefined {
|
|
257
|
-
if (!isRecord(details) || details.resultCategory !== "failure") return undefined;
|
|
258
|
-
const failureCategory = typeof details.failureCategory === "string" && details.failureCategory.length > 0
|
|
259
|
-
? details.failureCategory
|
|
260
|
-
: undefined;
|
|
261
|
-
return `Result category: failure${failureCategory ? `; failureCategory: ${failureCategory}` : ""}; Pi tool isError: true.`;
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
type AgentBrowserToolContent = AgentToolResult<unknown>["content"];
|
|
265
|
-
type AgentBrowserToolContentItem = AgentBrowserToolContent[number];
|
|
266
|
-
|
|
267
|
-
type AgentBrowserToolResultPatch = {
|
|
268
|
-
content?: AgentBrowserToolContent;
|
|
269
|
-
isError?: boolean;
|
|
270
|
-
};
|
|
271
133
|
|
|
272
134
|
type OwnedManagedSession = {
|
|
273
135
|
branchOwned: boolean;
|
|
@@ -283,290 +145,7 @@ interface BranchManagedResourceEvents {
|
|
|
283
145
|
managedSessionCloseRanks: Map<string, number>;
|
|
284
146
|
}
|
|
285
147
|
|
|
286
|
-
function
|
|
287
|
-
const details = isRecord(event.details) ? event.details : undefined;
|
|
288
|
-
const detailArgs = Array.isArray(details?.args) ? details.args : undefined;
|
|
289
|
-
const inputArgs = isRecord(event.input) && Array.isArray(event.input.args) ? event.input.args : undefined;
|
|
290
|
-
return detailArgs?.includes("--json") === true || inputArgs?.includes("--json") === true;
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
function agentBrowserToolResultHasParseableJsonContent(content: AgentBrowserToolContent): boolean {
|
|
294
|
-
return content.some((item) => {
|
|
295
|
-
if (item.type !== "text" || typeof item.text !== "string") return false;
|
|
296
|
-
const text = item.text.trim();
|
|
297
|
-
if (text.length === 0) return false;
|
|
298
|
-
try {
|
|
299
|
-
JSON.parse(text);
|
|
300
|
-
return true;
|
|
301
|
-
} catch {
|
|
302
|
-
return false;
|
|
303
|
-
}
|
|
304
|
-
});
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
function appendModelVisibleFailureCategoryNotice(content: AgentBrowserToolContent, notice: string): AgentBrowserToolContent | undefined {
|
|
308
|
-
const noticeContent: AgentBrowserToolContentItem = { type: "text", text: notice };
|
|
309
|
-
const textIndex = content.findIndex((item) => item.type === "text" && typeof item.text === "string");
|
|
310
|
-
if (textIndex === -1) return [noticeContent, ...content];
|
|
311
|
-
const textItem = content[textIndex];
|
|
312
|
-
if (textItem.type !== "text" || typeof textItem.text !== "string" || textItem.text.includes(notice)) return undefined;
|
|
313
|
-
return content.map((item, index) => index === textIndex
|
|
314
|
-
? { ...item, text: `${textItem.text}\n\n${notice}` }
|
|
315
|
-
: item);
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
function buildAgentBrowserToolResultPatch(event: ToolResultEvent): AgentBrowserToolResultPatch | undefined {
|
|
319
|
-
if (event.toolName !== "agent_browser") return undefined;
|
|
320
|
-
const preservesParseableJson = agentBrowserToolResultRequestedJson(event) && agentBrowserToolResultHasParseableJsonContent(event.content);
|
|
321
|
-
const notice = preservesParseableJson ? undefined : formatModelVisibleFailureCategoryNotice(event.details);
|
|
322
|
-
const content = notice ? appendModelVisibleFailureCategoryNotice(event.content, notice) : undefined;
|
|
323
|
-
const shouldMarkError = isRecord(event.details) && event.details.resultCategory === "failure" && event.isError !== true;
|
|
324
|
-
if (!shouldMarkError && !content) return undefined;
|
|
325
|
-
return {
|
|
326
|
-
...(content ? { content } : {}),
|
|
327
|
-
...(shouldMarkError ? { isError: true } : {}),
|
|
328
|
-
};
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
class AgentBrowserResultComponent {
|
|
332
|
-
private expanded = false;
|
|
333
|
-
private theme: Theme | undefined;
|
|
334
|
-
private readonly text = new Text("", 0, 0);
|
|
335
|
-
|
|
336
|
-
setState(value: string, expanded: boolean, theme: Theme): void {
|
|
337
|
-
this.text.setText(value);
|
|
338
|
-
this.expanded = expanded;
|
|
339
|
-
this.theme = theme;
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
render(width: number): string[] {
|
|
343
|
-
const lines = this.text.render(width);
|
|
344
|
-
if (this.expanded || lines.length <= TUI_COLLAPSED_OUTPUT_MAX_LINES) {
|
|
345
|
-
return lines;
|
|
346
|
-
}
|
|
347
|
-
const theme = this.theme;
|
|
348
|
-
if (!theme) {
|
|
349
|
-
return lines.slice(0, TUI_COLLAPSED_OUTPUT_MAX_LINES);
|
|
350
|
-
}
|
|
351
|
-
const hiddenLineCount = lines.length - TUI_COLLAPSED_OUTPUT_MAX_LINES;
|
|
352
|
-
return [
|
|
353
|
-
...lines.slice(0, TUI_COLLAPSED_OUTPUT_MAX_LINES),
|
|
354
|
-
formatVisualTruncationNotice(hiddenLineCount, lines.length, theme),
|
|
355
|
-
];
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
invalidate(): void {
|
|
359
|
-
this.text.invalidate();
|
|
360
|
-
}
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
const DIRECT_AGENT_BROWSER_EXECUTABLE_PATTERN = /^(?:[.~]|\.\.?|\/)?(?:[^\s;&|]+\/)?agent-browser$/;
|
|
365
|
-
const HARMLESS_AGENT_BROWSER_INSPECTION_PATTERN = /^\s*(?:command\s+-v|which|type\s+-P)\s+agent-browser\s*$/;
|
|
366
|
-
|
|
367
|
-
type ShellQuoteState = 'double' | 'single' | undefined;
|
|
368
|
-
|
|
369
|
-
function isShellAssignmentToken(token: string): boolean {
|
|
370
|
-
return /^[A-Za-z_][A-Za-z0-9_]*=/.test(token);
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
function stripOuterQuotes(token: string): string {
|
|
374
|
-
if (token.length >= 2 && ((token.startsWith('"') && token.endsWith('"')) || (token.startsWith("'") && token.endsWith("'")))) {
|
|
375
|
-
return token.slice(1, -1);
|
|
376
|
-
}
|
|
377
|
-
return token;
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
function segmentLaunchesAgentBrowser(tokens: string[]): boolean {
|
|
381
|
-
let index = 0;
|
|
382
|
-
while (index < tokens.length && isShellAssignmentToken(tokens[index])) {
|
|
383
|
-
index += 1;
|
|
384
|
-
}
|
|
385
|
-
if (index >= tokens.length) {
|
|
386
|
-
return false;
|
|
387
|
-
}
|
|
388
|
-
|
|
389
|
-
let executableToken = tokens[index];
|
|
390
|
-
if (executableToken === 'env') {
|
|
391
|
-
index += 1;
|
|
392
|
-
while (index < tokens.length && isShellAssignmentToken(tokens[index])) {
|
|
393
|
-
index += 1;
|
|
394
|
-
}
|
|
395
|
-
executableToken = tokens[index] ?? '';
|
|
396
|
-
}
|
|
397
|
-
if (executableToken === 'npx' || executableToken === 'bunx') {
|
|
398
|
-
index += 1;
|
|
399
|
-
while (index < tokens.length && tokens[index].startsWith('-')) {
|
|
400
|
-
index += 1;
|
|
401
|
-
}
|
|
402
|
-
executableToken = tokens[index] ?? '';
|
|
403
|
-
}
|
|
404
|
-
if (executableToken === 'pnpm' || executableToken === 'yarn') {
|
|
405
|
-
index += 1;
|
|
406
|
-
if (tokens[index] !== 'dlx') {
|
|
407
|
-
return false;
|
|
408
|
-
}
|
|
409
|
-
index += 1;
|
|
410
|
-
while (index < tokens.length && tokens[index].startsWith('-')) {
|
|
411
|
-
index += 1;
|
|
412
|
-
}
|
|
413
|
-
executableToken = tokens[index] ?? '';
|
|
414
|
-
}
|
|
415
|
-
return DIRECT_AGENT_BROWSER_EXECUTABLE_PATTERN.test(executableToken);
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
// Best-effort detection for common direct launches only. This is an ergonomics guard,
|
|
419
|
-
// not a general-purpose bash parser or security boundary.
|
|
420
|
-
function looksLikeDirectAgentBrowserBash(command: string): boolean {
|
|
421
|
-
let currentToken = '';
|
|
422
|
-
let quoteState: ShellQuoteState;
|
|
423
|
-
let awaitingHeredocDelimiter: { stripTabs: boolean } | undefined;
|
|
424
|
-
let pendingHeredoc: { delimiter: string; stripTabs: boolean } | undefined;
|
|
425
|
-
let pendingHeredocLine = '';
|
|
426
|
-
let segmentTokens: string[] = [];
|
|
427
|
-
|
|
428
|
-
const acceptToken = (token: string) => {
|
|
429
|
-
if (token.length === 0) {
|
|
430
|
-
return;
|
|
431
|
-
}
|
|
432
|
-
if (awaitingHeredocDelimiter) {
|
|
433
|
-
pendingHeredoc = {
|
|
434
|
-
delimiter: stripOuterQuotes(token),
|
|
435
|
-
stripTabs: awaitingHeredocDelimiter.stripTabs,
|
|
436
|
-
};
|
|
437
|
-
awaitingHeredocDelimiter = undefined;
|
|
438
|
-
return;
|
|
439
|
-
}
|
|
440
|
-
segmentTokens.push(token);
|
|
441
|
-
};
|
|
442
|
-
const flushToken = () => {
|
|
443
|
-
acceptToken(currentToken);
|
|
444
|
-
currentToken = '';
|
|
445
|
-
};
|
|
446
|
-
const flushSegment = () => {
|
|
447
|
-
const launchesAgentBrowser = segmentLaunchesAgentBrowser(segmentTokens);
|
|
448
|
-
segmentTokens = [];
|
|
449
|
-
return launchesAgentBrowser;
|
|
450
|
-
};
|
|
451
|
-
|
|
452
|
-
for (let index = 0; index < command.length; index += 1) {
|
|
453
|
-
const char = command[index];
|
|
454
|
-
if (pendingHeredoc) {
|
|
455
|
-
if (char === '\n') {
|
|
456
|
-
const candidate = pendingHeredoc.stripTabs ? pendingHeredocLine.replace(/^\t+/, '') : pendingHeredocLine;
|
|
457
|
-
if (candidate === pendingHeredoc.delimiter) {
|
|
458
|
-
pendingHeredoc = undefined;
|
|
459
|
-
}
|
|
460
|
-
pendingHeredocLine = '';
|
|
461
|
-
continue;
|
|
462
|
-
}
|
|
463
|
-
pendingHeredocLine += char;
|
|
464
|
-
continue;
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
if (quoteState === 'single') {
|
|
468
|
-
currentToken += char;
|
|
469
|
-
if (char === "'") {
|
|
470
|
-
quoteState = undefined;
|
|
471
|
-
}
|
|
472
|
-
continue;
|
|
473
|
-
}
|
|
474
|
-
if (quoteState === 'double') {
|
|
475
|
-
currentToken += char;
|
|
476
|
-
if (char === '\\' && index + 1 < command.length) {
|
|
477
|
-
currentToken += command[index + 1];
|
|
478
|
-
index += 1;
|
|
479
|
-
continue;
|
|
480
|
-
}
|
|
481
|
-
if (char === '"') {
|
|
482
|
-
quoteState = undefined;
|
|
483
|
-
}
|
|
484
|
-
continue;
|
|
485
|
-
}
|
|
486
|
-
if (char === "'" || char === '"') {
|
|
487
|
-
currentToken += char;
|
|
488
|
-
quoteState = char === "'" ? 'single' : 'double';
|
|
489
|
-
continue;
|
|
490
|
-
}
|
|
491
|
-
if (char === '\\' && index + 1 < command.length) {
|
|
492
|
-
currentToken += char;
|
|
493
|
-
currentToken += command[index + 1];
|
|
494
|
-
index += 1;
|
|
495
|
-
continue;
|
|
496
|
-
}
|
|
497
|
-
if (char === '\n') {
|
|
498
|
-
flushToken();
|
|
499
|
-
if (flushSegment()) {
|
|
500
|
-
return true;
|
|
501
|
-
}
|
|
502
|
-
continue;
|
|
503
|
-
}
|
|
504
|
-
if (/\s/.test(char)) {
|
|
505
|
-
flushToken();
|
|
506
|
-
continue;
|
|
507
|
-
}
|
|
508
|
-
const threeCharOperator = command.slice(index, index + 3);
|
|
509
|
-
if (threeCharOperator === '<<-') {
|
|
510
|
-
flushToken();
|
|
511
|
-
awaitingHeredocDelimiter = { stripTabs: true };
|
|
512
|
-
index += 2;
|
|
513
|
-
continue;
|
|
514
|
-
}
|
|
515
|
-
const twoCharOperator = command.slice(index, index + 2);
|
|
516
|
-
if (twoCharOperator === '<<') {
|
|
517
|
-
flushToken();
|
|
518
|
-
awaitingHeredocDelimiter = { stripTabs: false };
|
|
519
|
-
index += 1;
|
|
520
|
-
continue;
|
|
521
|
-
}
|
|
522
|
-
if (twoCharOperator === '&&' || twoCharOperator === '||') {
|
|
523
|
-
flushToken();
|
|
524
|
-
if (flushSegment()) {
|
|
525
|
-
return true;
|
|
526
|
-
}
|
|
527
|
-
index += 1;
|
|
528
|
-
continue;
|
|
529
|
-
}
|
|
530
|
-
if (char === '|' || char === ';' || char === '&') {
|
|
531
|
-
flushToken();
|
|
532
|
-
if (flushSegment()) {
|
|
533
|
-
return true;
|
|
534
|
-
}
|
|
535
|
-
continue;
|
|
536
|
-
}
|
|
537
|
-
currentToken += char;
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
flushToken();
|
|
541
|
-
return flushSegment();
|
|
542
|
-
}
|
|
543
|
-
|
|
544
|
-
function isHarmlessAgentBrowserInspectionCommand(command: string): boolean {
|
|
545
|
-
return HARMLESS_AGENT_BROWSER_INSPECTION_PATTERN.test(command);
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
function isTruthyEnvValue(value: string | undefined): boolean {
|
|
549
|
-
return value === "1" || value?.toLowerCase() === "true" || value?.toLowerCase() === "yes";
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
async function isPackageDevelopmentCwd(cwd: string): Promise<boolean> {
|
|
553
|
-
try {
|
|
554
|
-
const packageJson = JSON.parse(await readFile(join(cwd, "package.json"), "utf8")) as { name?: unknown };
|
|
555
|
-
return packageJson.name === PACKAGE_NAME;
|
|
556
|
-
} catch {
|
|
557
|
-
return false;
|
|
558
|
-
}
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
async function isDirectAgentBrowserBashAllowed(cwd: string): Promise<boolean> {
|
|
562
|
-
return isTruthyEnvValue(process.env[DIRECT_AGENT_BROWSER_BASH_BYPASS_ENV]) || await isPackageDevelopmentCwd(cwd);
|
|
563
|
-
}
|
|
564
|
-
|
|
565
|
-
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
566
|
-
return typeof value === "object" && value !== null;
|
|
567
|
-
}
|
|
568
|
-
|
|
569
|
-
function getBatchAnnotateValidationError(args: string[], stdin: string | undefined): string | undefined {
|
|
148
|
+
function getBatchPreflightValidationError(args: string[], stdin: string | undefined): string | undefined {
|
|
570
149
|
const commandTokens = extractCommandTokens(args);
|
|
571
150
|
if (commandTokens[0] !== "batch" || stdin === undefined) {
|
|
572
151
|
return undefined;
|
|
@@ -575,14 +154,18 @@ function getBatchAnnotateValidationError(args: string[], stdin: string | undefin
|
|
|
575
154
|
if (parsed.error || parsed.steps === undefined) {
|
|
576
155
|
return undefined;
|
|
577
156
|
}
|
|
578
|
-
const
|
|
579
|
-
|
|
580
|
-
|
|
157
|
+
for (const [index, step] of parsed.steps.entries()) {
|
|
158
|
+
if (!Array.isArray(step) || !step.every((token) => typeof token === "string") || step.length === 0) continue;
|
|
159
|
+
const stepValidationError = validateToolArgs(step);
|
|
160
|
+
if (stepValidationError) return `Unsupported batch step ${index + 1}: ${stepValidationError}`;
|
|
161
|
+
if (step[0] === "screenshot" && step.includes("--annotate")) {
|
|
162
|
+
return [
|
|
163
|
+
`Unsupported batch screenshot annotation in step ${index + 1}: put --annotate in top-level args, not inside the batch step.`,
|
|
164
|
+
`Use: { "args": ["--annotate", "batch"], "stdin": "[[\\"screenshot\\",\\"/path/to/image.png\\"]]" }`,
|
|
165
|
+
].join("\n");
|
|
166
|
+
}
|
|
581
167
|
}
|
|
582
|
-
return
|
|
583
|
-
`Unsupported batch screenshot annotation in step ${badStepIndex + 1}: put --annotate in top-level args, not inside the batch step.`,
|
|
584
|
-
`Use: { "args": ["--annotate", "batch"], "stdin": "[[\\"screenshot\\",\\"/path/to/image.png\\"]]" }`,
|
|
585
|
-
].join("\n");
|
|
168
|
+
return undefined;
|
|
586
169
|
}
|
|
587
170
|
|
|
588
171
|
function restoreArtifactManifestFromBranch(branch: unknown[]): SessionArtifactManifest | undefined {
|
|
@@ -1208,8 +791,9 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
1208
791
|
},
|
|
1209
792
|
async execute(_toolCallId, params, signal, onUpdate, ctx) {
|
|
1210
793
|
const promptPolicy = buildPromptPolicy(getLatestUserPrompt(ctx.sessionManager.getBranch()));
|
|
794
|
+
const outputPath = isRecord(params) && typeof params.outputPath === "string" ? params.outputPath : undefined;
|
|
1211
795
|
const resolvedInput = resolveAgentBrowserInput({
|
|
1212
|
-
|
|
796
|
+
getBatchPreflightValidationError,
|
|
1213
797
|
managedSessionActive,
|
|
1214
798
|
params,
|
|
1215
799
|
});
|
|
@@ -1270,7 +854,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
1270
854
|
? await managedSessionExecutionQueue.run(runElectronHostInput)
|
|
1271
855
|
: await runElectronHostInput();
|
|
1272
856
|
if (electronHostResult) {
|
|
1273
|
-
return electronHostResult;
|
|
857
|
+
return applyAgentBrowserOutputPath({ cwd: ctx.cwd, outputPath, result: electronHostResult });
|
|
1274
858
|
}
|
|
1275
859
|
|
|
1276
860
|
const explicitSessionName = extractExplicitSessionName(toolArgs);
|
|
@@ -1335,7 +919,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
1335
919
|
});
|
|
1336
920
|
if (serializeBrowserCommand) branchStateGeneration += 1;
|
|
1337
921
|
}
|
|
1338
|
-
return result;
|
|
922
|
+
return applyAgentBrowserOutputPath({ cwd: ctx.cwd, outputPath, preserveTextContent: Array.isArray(params.args) && params.args.includes("--json"), result });
|
|
1339
923
|
};
|
|
1340
924
|
|
|
1341
925
|
return serializeBrowserCommand
|