@oh-my-pi/pi-coding-agent 14.5.11 → 14.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1655 +1,336 @@
1
- import * as fs from "node:fs";
2
- import * as os from "node:os";
3
- import * as path from "node:path";
4
- import { Readability } from "@mozilla/readability";
5
1
  import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallback } from "@oh-my-pi/pi-agent-core";
6
2
  import { StringEnum } from "@oh-my-pi/pi-ai";
7
- import { $which, getPuppeteerDir, logger, prompt, Snowflake, untilAborted } from "@oh-my-pi/pi-utils";
3
+ import { prompt, untilAborted } from "@oh-my-pi/pi-utils";
8
4
  import { type Static, Type } from "@sinclair/typebox";
9
- import { type HTMLElement, parseHTML } from "linkedom";
10
- import type {
11
- Browser,
12
- CDPSession,
13
- ElementHandle,
14
- KeyInput,
15
- Page,
16
- default as Puppeteer,
17
- SerializedAXNode,
18
- } from "puppeteer-core";
19
5
  import browserDescription from "../prompts/tools/browser.md" with { type: "text" };
20
6
  import type { ToolSession } from "../sdk";
21
- import { resizeImage } from "../utils/image-resize";
22
- import { htmlToBasicMarkdown } from "../web/scrapers/types";
7
+ import {
8
+ acquireBrowser,
9
+ acquireTab,
10
+ type BrowserHandle,
11
+ type BrowserKind,
12
+ type BrowserKindTag,
13
+ dropHeadlessBrowsers,
14
+ getTab,
15
+ releaseAllTabs,
16
+ releaseTab,
17
+ } from "./browser/registry";
18
+ import { collectObservation, formatObservation, type Observation, runInTab, type ScreenshotResult } from "./browser/vm";
23
19
  import type { OutputMeta } from "./output-meta";
24
- import { expandPath, resolveToCwd } from "./path-utils";
25
- import stealthTamperingScript from "./puppeteer/00_stealth_tampering.txt" with { type: "text" };
26
- import stealthActivityScript from "./puppeteer/01_stealth_activity.txt" with { type: "text" };
27
- import stealthHairlineScript from "./puppeteer/02_stealth_hairline.txt" with { type: "text" };
28
- import stealthBotdScript from "./puppeteer/03_stealth_botd.txt" with { type: "text" };
29
- import stealthIframeScript from "./puppeteer/04_stealth_iframe.txt" with { type: "text" };
30
- import stealthWebglScript from "./puppeteer/05_stealth_webgl.txt" with { type: "text" };
31
- import stealthScreenScript from "./puppeteer/06_stealth_screen.txt" with { type: "text" };
32
- import stealthFontsScript from "./puppeteer/07_stealth_fonts.txt" with { type: "text" };
33
- import stealthAudioScript from "./puppeteer/08_stealth_audio.txt" with { type: "text" };
34
- import stealthLocaleScript from "./puppeteer/09_stealth_locale.txt" with { type: "text" };
35
- import stealthPluginsScript from "./puppeteer/10_stealth_plugins.txt" with { type: "text" };
36
- import stealthHardwareScript from "./puppeteer/11_stealth_hardware.txt" with { type: "text" };
37
- import stealthCodecsScript from "./puppeteer/12_stealth_codecs.txt" with { type: "text" };
38
- import stealthWorkerScript from "./puppeteer/13_stealth_worker.txt" with { type: "text" };
39
- import { formatScreenshot } from "./render-utils";
20
+ import { resolveToCwd } from "./path-utils";
40
21
  import { ToolAbortError, ToolError, throwIfAborted } from "./tool-errors";
41
22
  import { toolResult } from "./tool-result";
42
23
  import { clampTimeout } from "./tool-timeouts";
43
24
 
44
- /**
45
- * Lazy-import puppeteer from a safe CWD so cosmiconfig doesn't choke
46
- * on malformed package.json files in the user's project tree.
47
- */
48
- let puppeteerModule: typeof Puppeteer | undefined;
49
- async function loadPuppeteer(): Promise<typeof Puppeteer> {
50
- if (puppeteerModule) return puppeteerModule;
51
- const prev = process.cwd();
52
- const safeDir = getPuppeteerDir();
53
- await Bun.write(path.join(safeDir, "package.json"), "{}");
54
- try {
55
- process.chdir(safeDir);
56
- puppeteerModule = (await import("puppeteer-core")).default;
57
- return puppeteerModule;
58
- } finally {
59
- process.chdir(prev);
60
- }
61
- }
62
-
63
- /**
64
- * Lazily download Chromium on first browser launch via @puppeteer/browsers.
65
- * Skipped when a system Chromium (NixOS) or PUPPETEER_EXECUTABLE_PATH is set.
66
- * The browser is cached under ~/.omp/puppeteer (getPuppeteerDir).
67
- */
68
- let chromiumExecutablePromise: Promise<string | undefined> | undefined;
69
- async function ensureChromiumExecutable(): Promise<string | undefined> {
70
- const sysChrome = resolveSystemChromium();
71
- if (sysChrome) return sysChrome;
72
- const envPath = process.env.PUPPETEER_EXECUTABLE_PATH;
73
- if (envPath) return envPath;
74
- if (chromiumExecutablePromise) return chromiumExecutablePromise;
75
-
76
- chromiumExecutablePromise = (async () => {
77
- const [browsers, revisions] = await Promise.all([
78
- import("@puppeteer/browsers"),
79
- import("puppeteer-core/internal/revisions.js"),
80
- ]);
81
- const platform = browsers.detectBrowserPlatform();
82
- if (!platform) {
83
- logger.warn("Could not detect browser platform; relying on puppeteer default resolution");
84
- return undefined;
85
- }
86
- const cacheDir = getPuppeteerDir();
87
- const buildId = await browsers.resolveBuildId(
88
- browsers.Browser.CHROME,
89
- platform,
90
- revisions.PUPPETEER_REVISIONS.chrome,
91
- );
92
- const executablePath = browsers.computeExecutablePath({
93
- browser: browsers.Browser.CHROME,
94
- buildId,
95
- cacheDir,
96
- platform,
97
- });
98
- if (fs.existsSync(executablePath)) return executablePath;
99
-
100
- logger.warn("Downloading Chromium for puppeteer (first browser use)", {
101
- buildId,
102
- platform,
103
- cacheDir,
104
- });
105
- let lastReportedPercent = -1;
106
- await browsers.install({
107
- browser: browsers.Browser.CHROME,
108
- buildId,
109
- cacheDir,
110
- platform,
111
- downloadProgressCallback: (downloaded, total) => {
112
- if (total <= 0) return;
113
- const pct = Math.floor((downloaded / total) * 100);
114
- if (pct >= lastReportedPercent + 10 || downloaded === total) {
115
- lastReportedPercent = pct;
116
- logger.debug(
117
- `Chromium download: ${pct}% (${Math.round(downloaded / 1_000_000)} / ${Math.round(total / 1_000_000)} MB)`,
118
- );
119
- }
120
- },
121
- });
122
- return executablePath;
123
- })().catch(err => {
124
- chromiumExecutablePromise = undefined;
125
- throw new ToolError(
126
- `Failed to install Chromium for puppeteer: ${(err as Error).message}. ` +
127
- "Set PUPPETEER_EXECUTABLE_PATH to use an existing Chrome/Chromium binary, or install one manually.",
128
- );
129
- });
130
- return chromiumExecutablePromise;
131
- }
132
-
133
- /**
134
- * Resolve a system-installed Chrome/Chromium so `puppeteer.launch()` can reuse
135
- * it instead of forcing a Chromium download. Returns `undefined` when no binary
136
- * is found, which lets the caller fall back to a managed download.
137
- *
138
- * Detection order (per platform):
139
- * - macOS: Google Chrome → Chromium → Microsoft Edge (system + user Applications)
140
- * - Linux: PATH lookups (google-chrome, chromium, etc.) → common /usr/bin paths,
141
- * with NixOS-specific profile paths added when /etc/NIXOS exists
142
- * - Windows: Program Files / LocalAppData install paths for Chrome and Edge
143
- *
144
- * Honored regardless of platform: PUPPETEER_EXECUTABLE_PATH callers should bypass
145
- * this entirely (handled in ensureChromiumExecutable).
146
- */
147
- let _resolvedChromium: string | null | undefined; // undefined = unchecked; null = not found
148
- function isExecutableFile(p: string): boolean {
149
- try {
150
- const st = fs.statSync(p);
151
- return st.isFile();
152
- } catch {
153
- return false;
154
- }
155
- }
156
-
157
- function systemChromiumCandidates(): string[] {
158
- const home = os.homedir();
159
- const candidates: string[] = [];
160
- switch (process.platform) {
161
- case "darwin": {
162
- for (const root of ["/Applications", path.join(home, "Applications")]) {
163
- candidates.push(
164
- path.join(root, "Google Chrome.app/Contents/MacOS/Google Chrome"),
165
- path.join(root, "Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta"),
166
- path.join(root, "Google Chrome Dev.app/Contents/MacOS/Google Chrome Dev"),
167
- path.join(root, "Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary"),
168
- path.join(root, "Chromium.app/Contents/MacOS/Chromium"),
169
- path.join(root, "Microsoft Edge.app/Contents/MacOS/Microsoft Edge"),
170
- );
171
- }
172
- break;
173
- }
174
- case "linux": {
175
- const names = ["google-chrome-stable", "google-chrome", "chromium", "chromium-browser", "chrome"];
176
- for (const name of names) {
177
- const found = $which(name);
178
- if (found) candidates.push(found);
179
- }
180
- candidates.push(
181
- "/usr/bin/google-chrome-stable",
182
- "/usr/bin/google-chrome",
183
- "/usr/bin/chromium",
184
- "/usr/bin/chromium-browser",
185
- "/snap/bin/chromium",
186
- "/var/lib/flatpak/exports/bin/com.google.Chrome",
187
- "/var/lib/flatpak/exports/bin/org.chromium.Chromium",
188
- );
189
- let onNixos = false;
190
- try {
191
- onNixos = fs.existsSync("/etc/NIXOS");
192
- } catch {}
193
- if (onNixos) {
194
- candidates.push(path.join(home, ".nix-profile/bin/chromium"), "/run/current-system/sw/bin/chromium");
195
- }
196
- break;
197
- }
198
- case "win32": {
199
- const programFiles = process.env.ProgramFiles ?? "C:\\Program Files";
200
- const programFilesX86 = process.env["ProgramFiles(x86)"] ?? "C:\\Program Files (x86)";
201
- const localAppData = process.env.LOCALAPPDATA ?? path.join(home, "AppData\\Local");
202
- candidates.push(
203
- path.join(programFiles, "Google\\Chrome\\Application\\chrome.exe"),
204
- path.join(programFilesX86, "Google\\Chrome\\Application\\chrome.exe"),
205
- path.join(localAppData, "Google\\Chrome\\Application\\chrome.exe"),
206
- path.join(programFiles, "Chromium\\Application\\chrome.exe"),
207
- path.join(localAppData, "Chromium\\Application\\chrome.exe"),
208
- path.join(programFiles, "Microsoft\\Edge\\Application\\msedge.exe"),
209
- path.join(programFilesX86, "Microsoft\\Edge\\Application\\msedge.exe"),
210
- );
211
- break;
212
- }
213
- }
214
- return candidates;
215
- }
216
-
217
- function resolveSystemChromium(): string | undefined {
218
- if (_resolvedChromium !== undefined) return _resolvedChromium ?? undefined;
219
- const seen = new Set<string>();
220
- for (const candidate of systemChromiumCandidates()) {
221
- if (!candidate || seen.has(candidate)) continue;
222
- seen.add(candidate);
223
- if (isExecutableFile(candidate)) {
224
- _resolvedChromium = candidate;
225
- logger.debug("Using system Chrome/Chromium", { path: candidate });
226
- return candidate;
227
- }
228
- }
229
- _resolvedChromium = null;
230
- return undefined;
231
- }
232
-
233
- const DEFAULT_VIEWPORT = { width: 1365, height: 768, deviceScaleFactor: 1.25 };
234
- const STEALTH_IGNORE_DEFAULT_ARGS = [
235
- "--disable-extensions",
236
- "--disable-default-apps",
237
- "--disable-component-extensions-with-background-pages",
238
- ];
239
- const STEALTH_ACCEPT_LANGUAGE = "en-US,en";
240
- const PUPPETEER_SOURCE_URL_SUFFIX = "//# sourceURL=__puppeteer_evaluation_script__";
241
- const INTERACTIVE_AX_ROLES = new Set([
242
- "button",
243
- "link",
244
- "textbox",
245
- "combobox",
246
- "listbox",
247
- "option",
248
- "checkbox",
249
- "radio",
250
- "switch",
251
- "tab",
252
- "menuitem",
253
- "menuitemcheckbox",
254
- "menuitemradio",
255
- "slider",
256
- "spinbutton",
257
- "searchbox",
258
- "treeitem",
259
- ]);
260
-
261
- declare global {
262
- interface Element extends HTMLElement {}
263
-
264
- function getComputedStyle(element: Element): Record<string, unknown>;
265
- var innerWidth: number;
266
- var innerHeight: number;
267
- var document: {
268
- elementFromPoint(x: number, y: number): Element | null;
269
- };
270
- }
271
-
272
- const LEGACY_SELECTOR_PREFIXES = ["p-aria/", "p-text/", "p-xpath/", "p-pierce/"] as const;
273
-
274
- function normalizeSelector(selector: string): string {
275
- if (!selector) return selector;
276
- if (selector.startsWith("p-") && !LEGACY_SELECTOR_PREFIXES.some(prefix => selector.startsWith(prefix))) {
277
- throw new ToolError(
278
- `Unsupported selector prefix. Use CSS or puppeteer query handlers (aria/, text/, xpath/, pierce/). Got: ${selector}`,
279
- );
280
- }
281
- if (selector.startsWith("p-text/")) {
282
- return `text/${selector.slice("p-text/".length)}`;
283
- }
284
- if (selector.startsWith("p-xpath/")) {
285
- return `xpath/${selector.slice("p-xpath/".length)}`;
286
- }
287
- if (selector.startsWith("p-pierce/")) {
288
- return `pierce/${selector.slice("p-pierce/".length)}`;
289
- }
290
- if (selector.startsWith("p-aria/")) {
291
- const rest = selector.slice("p-aria/".length);
292
- // Playwright-style: p-aria/[name="Sign in"] → aria/Sign in
293
- const nameMatch = rest.match(/\[\s*name\s*=\s*(?:"([^"]+)"|'([^']+)'|([^\]]+))\s*\]/);
294
- const name = nameMatch?.[1] ?? nameMatch?.[2] ?? nameMatch?.[3];
295
- if (name) return `aria/${name.trim()}`;
296
- return `aria/${rest}`;
297
- }
298
- return selector;
299
- }
300
-
301
- type ActionabilityResult = { ok: true; x: number; y: number } | { ok: false; reason: string };
302
-
303
- async function resolveActionableQueryHandlerClickTarget(handles: ElementHandle[]): Promise<ElementHandle | null> {
304
- const candidates: Array<{
305
- handle: ElementHandle;
306
- rect: { x: number; y: number; w: number; h: number };
307
- ownedProxy?: ElementHandle;
308
- }> = [];
309
-
310
- for (const handle of handles) {
311
- let clickable: ElementHandle = handle;
312
- let clickableProxy: ElementHandle | null = null;
313
- try {
314
- const proxy = await handle.evaluateHandle(el => {
315
- const target =
316
- (el as Element).closest(
317
- 'a,button,[role="button"],[role="link"],input[type="button"],input[type="submit"]',
318
- ) ?? el;
319
- return target;
320
- });
321
- const nodeHandle = proxy.asElement();
322
- clickableProxy = nodeHandle ? (nodeHandle as unknown as ElementHandle) : null;
323
- if (clickableProxy) {
324
- clickable = clickableProxy;
325
- }
326
- } catch {
327
- // ignore
328
- }
329
-
330
- try {
331
- const intersecting = await clickable.isIntersectingViewport();
332
- if (!intersecting) continue;
333
- const rect = (await clickable.evaluate(el => {
334
- const r = (el as Element).getBoundingClientRect();
335
- return { x: r.left, y: r.top, w: r.width, h: r.height };
336
- })) as { x: number; y: number; w: number; h: number };
337
- if (rect.w < 1 || rect.h < 1) continue;
338
- candidates.push({ handle: clickable, rect, ownedProxy: clickableProxy ?? undefined });
339
- } catch {
340
- // ignore
341
- } finally {
342
- if (clickableProxy && clickableProxy !== handle && clickable !== clickableProxy) {
343
- try {
344
- await clickableProxy.dispose();
345
- } catch {}
346
- }
347
- }
348
- }
349
-
350
- if (!candidates.length) return null;
351
-
352
- // Prefer top-most visible element (nav/header usually wins), tie-break by left-most.
353
- candidates.sort((a, b) => a.rect.y - b.rect.y || a.rect.x - b.rect.x);
354
- const winner = candidates[0]?.handle ?? null;
355
- // Dispose owned proxies for non-winning candidates
356
- for (let i = 1; i < candidates.length; i++) {
357
- const c = candidates[i]!;
358
- if (c.ownedProxy) {
359
- try {
360
- await c.ownedProxy.dispose();
361
- } catch {}
362
- }
363
- }
364
- return winner;
365
- }
25
+ export { extractReadableFromHtml, type ReadableFormat, type ReadableResult } from "./browser/readable";
26
+ export type { Observation, ObservationEntry } from "./browser/vm";
366
27
 
367
- async function isClickActionable(handle: ElementHandle): Promise<ActionabilityResult> {
368
- return (await handle.evaluate(el => {
369
- const element = el as HTMLElement;
370
- const style = globalThis.getComputedStyle(element);
371
- if (style.display === "none") return { ok: false as const, reason: "display:none" };
372
- if (style.visibility === "hidden") return { ok: false as const, reason: "visibility:hidden" };
373
- if (style.pointerEvents === "none") return { ok: false as const, reason: "pointer-events:none" };
374
- if (Number(style.opacity) === 0) return { ok: false as const, reason: "opacity:0" };
28
+ const DEFAULT_TAB_NAME = "main";
375
29
 
376
- const r = element.getBoundingClientRect();
377
- if (r.width < 1 || r.height < 1) return { ok: false as const, reason: "zero-size" };
378
-
379
- const vw = globalThis.innerWidth;
380
- const vh = globalThis.innerHeight;
381
- const left = Math.max(0, Math.min(vw, r.left));
382
- const right = Math.max(0, Math.min(vw, r.right));
383
- const top = Math.max(0, Math.min(vh, r.top));
384
- const bottom = Math.max(0, Math.min(vh, r.bottom));
385
- if (right - left < 1 || bottom - top < 1) return { ok: false as const, reason: "off-viewport" };
386
-
387
- const x = Math.floor((left + right) / 2);
388
- const y = Math.floor((top + bottom) / 2);
389
- const topEl = globalThis.document.elementFromPoint(x, y);
390
- if (!topEl) return { ok: false as const, reason: "elementFromPoint-null" };
391
- if (topEl === element || element.contains(topEl) || (topEl as Element).contains(element)) {
392
- return { ok: true as const, x, y };
393
- }
394
- return { ok: false as const, reason: "obscured" };
395
- })) as ActionabilityResult;
396
- }
397
-
398
- async function clickQueryHandlerText(
399
- page: Page,
400
- selector: string,
401
- timeoutMs: number,
402
- signal?: AbortSignal,
403
- ): Promise<void> {
404
- const timeoutSignal = AbortSignal.timeout(timeoutMs);
405
- const clickSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
406
- const start = Date.now();
407
- let lastSeen = 0;
408
- let lastReason: string | null = null;
409
-
410
- while (Date.now() - start < timeoutMs) {
411
- throwIfAborted(clickSignal);
412
- const handles = (await untilAborted(clickSignal, () => page.$$(selector))) as ElementHandle[];
413
- try {
414
- lastSeen = handles.length;
415
- const target = await resolveActionableQueryHandlerClickTarget(handles);
416
- if (!target) {
417
- lastReason = handles.length ? "no-visible-candidate" : "no-matches";
418
- await Bun.sleep(100);
419
- continue;
420
- }
421
- const actionability = await isClickActionable(target);
422
- if (!actionability.ok) {
423
- lastReason = actionability.reason;
424
- await Bun.sleep(100);
425
- continue;
426
- }
427
-
428
- try {
429
- await untilAborted(clickSignal, () => target.click());
430
- return;
431
- } catch (err) {
432
- lastReason = err instanceof Error ? err.message : String(err);
433
- await Bun.sleep(100);
434
- }
435
- } finally {
436
- await Promise.all(
437
- handles.map(async h => {
438
- try {
439
- await h.dispose();
440
- } catch {}
441
- }),
442
- );
443
- }
444
- }
445
-
446
- throw new ToolError(
447
- `Timed out clicking ${selector} (seen ${lastSeen} matches; last reason: ${lastReason ?? "unknown"}). ` +
448
- "If there are multiple matching elements, use observe+click_id or a more specific selector.",
449
- );
450
- }
451
-
452
- /**
453
- * Stealth init scripts for Puppeteer.
454
- */
455
-
456
- type PuppeteerCdpClient = {
457
- send: (method: string, params?: Record<string, unknown>) => Promise<unknown>;
458
- };
459
-
460
- type UserAgentOverride = {
461
- userAgent: string;
462
- platform: string;
463
- acceptLanguage: string;
464
- userAgentMetadata: {
465
- brands: Array<{ brand: string; version: string }>;
466
- fullVersion: string;
467
- platform: string;
468
- platformVersion: string;
469
- architecture: string;
470
- model: string;
471
- mobile: boolean;
472
- };
473
- };
474
-
475
- function resolvePageClient(page: Page): PuppeteerCdpClient | null {
476
- const pageWithClient = page as Page & {
477
- _client?: (() => PuppeteerCdpClient) | PuppeteerCdpClient;
478
- };
479
- if (!pageWithClient._client) return null;
480
- return typeof pageWithClient._client === "function" ? pageWithClient._client() : pageWithClient._client;
481
- }
482
-
483
- const puppeteerGetArgsSchema = Type.Array(
484
- Type.Object({
485
- selector: Type.String({
486
- description: "target element selector",
487
- examples: ["aria/Sign in", "text/Continue", "xpath/...", "pierce/..."],
488
- }),
489
- attribute: Type.Optional(Type.String({ description: "attribute name", examples: ["href", "data-id"] })),
490
- }),
491
- { description: "batch get_* args", minItems: 1 },
492
- );
493
-
494
- const browserSchema = Type.Object({
495
- action: StringEnum(
496
- [
497
- "open",
498
- "goto",
499
- "observe",
500
- "click",
501
- "click_id",
502
- "type",
503
- "type_id",
504
- "fill",
505
- "fill_id",
506
- "press",
507
- "scroll",
508
- "drag",
509
- "wait_for_selector",
510
- "evaluate",
511
- "get_text",
512
- "get_html",
513
- "get_attribute",
514
- "extract_readable",
515
- "screenshot",
516
- "close",
517
- ],
518
- { description: "action to perform" },
519
- ),
520
- url: Type.Optional(Type.String({ description: "url to navigate to", examples: ["https://example.com"] })),
521
- selector: Type.Optional(
30
+ const appSchema = Type.Object({
31
+ path: Type.Optional(
522
32
  Type.String({
523
- description: "target element selector",
524
- examples: ["aria/Sign in", "text/Continue", "xpath/...", "pierce/..."],
33
+ description: "absolute path to a binary to spawn (single-instance reuse)",
34
+ examples: ["/Applications/Cursor.app/Contents/MacOS/Cursor"],
525
35
  }),
526
36
  ),
527
- element_id: Type.Optional(Type.Number({ description: "observed element id" })),
528
- include_all: Type.Optional(Type.Boolean({ description: "include non-interactive nodes" })),
529
- viewport_only: Type.Optional(Type.Boolean({ description: "limit to viewport" })),
530
- args: Type.Optional(puppeteerGetArgsSchema),
531
- script: Type.Optional(
532
- Type.String({ description: "javascript expression", examples: ["document.title", "window.location.href"] }),
533
- ),
534
- text: Type.Optional(Type.String({ description: "text to type", examples: ["hello world"] })),
535
- value: Type.Optional(Type.String({ description: "value to set", examples: ["hello"] })),
536
- attribute: Type.Optional(Type.String({ description: "attribute to read", examples: ["href", "data-id"] })),
537
- key: Type.Optional(Type.String({ description: "keyboard key", examples: ["Enter", "Tab", "Escape"] })),
538
- timeout: Type.Optional(Type.Number({ description: "timeout in seconds", default: 30 })),
539
- wait_until: Type.Optional(
540
- StringEnum(["load", "domcontentloaded", "networkidle0", "networkidle2"], {
541
- description: "navigation wait condition",
37
+ cdp_url: Type.Optional(
38
+ Type.String({
39
+ description: "existing CDP endpoint to connect to (e.g. http://127.0.0.1:9222)",
542
40
  }),
543
41
  ),
544
- full_page: Type.Optional(Type.Boolean({ description: "full page screenshot" })),
545
- format: Type.Optional(
546
- StringEnum(["text", "markdown"], {
547
- description: "output format",
42
+ args: Type.Optional(Type.Array(Type.String(), { description: "extra CLI args when spawning" })),
43
+ target: Type.Optional(Type.String({ description: "substring matched against url+title to pick a BrowserWindow" })),
44
+ });
45
+
46
+ const browserSchema = Type.Object({
47
+ action: StringEnum(["open", "close", "run"], { description: "tab/browser operation" }),
48
+ name: Type.Optional(
49
+ Type.String({
50
+ description: "tab id; default 'main'. Multiple tabs can coexist; reusable across run() calls and subagents.",
51
+ examples: ["main", "docs", "gh"],
548
52
  }),
549
53
  ),
550
- path: Type.Optional(Type.String({ description: "screenshot save path", examples: ["out.png"] })),
54
+ url: Type.Optional(Type.String({ description: "open: navigate after acquiring tab" })),
55
+ app: Type.Optional(appSchema),
551
56
  viewport: Type.Optional(
552
57
  Type.Object({
553
- width: Type.Number({ description: "viewport width" }),
554
- height: Type.Number({ description: "viewport height" }),
555
- device_scale_factor: Type.Optional(Type.Number({ description: "device scale factor" })),
58
+ width: Type.Number(),
59
+ height: Type.Number(),
60
+ scale: Type.Optional(Type.Number()),
556
61
  }),
557
62
  ),
558
- delta_x: Type.Optional(Type.Number({ description: "scroll delta x" })),
559
- delta_y: Type.Optional(Type.Number({ description: "scroll delta y" })),
560
- from_selector: Type.Optional(
561
- Type.String({
562
- description: "drag start selector",
563
- examples: ["aria/Drag handle"],
63
+ wait_until: Type.Optional(
64
+ StringEnum(["load", "domcontentloaded", "networkidle0", "networkidle2"], {
65
+ description: "navigation wait condition for url",
66
+ }),
67
+ ),
68
+ dialogs: Type.Optional(
69
+ StringEnum(["accept", "dismiss"], {
70
+ description: "open: auto-handle alert/confirm/beforeunload dialogs (default: leave for caller to handle)",
564
71
  }),
565
72
  ),
566
- to_selector: Type.Optional(
73
+ code: Type.Optional(
567
74
  Type.String({
568
- description: "drag end selector",
569
- examples: ["text/Drop zone"],
75
+ description:
76
+ "run: JS body executed with `page`, `browser`, `tab`, `display`, `assert`, `wait` in scope. Treated as the body of an async function. Use `display(value)` to attach text/JSON/images; the function's return value is JSON-serialized as a final block.",
570
77
  }),
571
78
  ),
79
+ timeout: Type.Optional(Type.Number({ description: "timeout in seconds", default: 30 })),
80
+ all: Type.Optional(Type.Boolean({ description: "close: close every tab" })),
81
+ kill: Type.Optional(Type.Boolean({ description: "close: also kill spawned-app browsers (default: leave running)" })),
572
82
  });
573
83
 
574
- /** Input schema for the Puppeteer tool. */
84
+ /** Input schema for the browser tool. */
575
85
  export type BrowserParams = Static<typeof browserSchema>;
576
86
 
577
- /** Details describing a Puppeteer tool execution result. */
87
+ /** Details describing a browser tool execution result (for renderers + transcript). */
578
88
  export interface BrowserToolDetails {
579
89
  action: BrowserParams["action"];
90
+ name?: string;
580
91
  url?: string;
581
- selector?: string;
582
- elementId?: number;
583
- result?: string | string[];
584
- screenshotPath?: string;
585
- mimeType?: string;
586
- bytes?: number;
92
+ browser?: BrowserKindTag;
587
93
  viewport?: { width: number; height: number; deviceScaleFactor?: number };
588
94
  observation?: Observation;
589
- readable?: ReadableResult;
95
+ screenshots?: ScreenshotResult[];
96
+ result?: string;
590
97
  meta?: OutputMeta;
591
98
  }
592
99
 
593
- export interface ObservationEntry {
594
- id: number;
595
- role: string;
596
- name?: string;
597
- value?: string | number;
598
- description?: string;
599
- keyshortcuts?: string;
600
- states: string[];
601
- }
602
-
603
- export interface Observation {
604
- url: string;
605
- title?: string;
606
- viewport: { width: number; height: number; deviceScaleFactor?: number };
607
- scroll: {
608
- x: number;
609
- y: number;
610
- width: number;
611
- height: number;
612
- scrollWidth: number;
613
- scrollHeight: number;
614
- };
615
- elements: ObservationEntry[];
616
- }
617
-
618
- export interface ReadableResult {
619
- url: string;
620
- title?: string;
621
- byline?: string;
622
- excerpt?: string;
623
- contentLength: number;
624
- text?: string;
625
- markdown?: string;
626
- }
627
-
628
- type ReadableFormat = "text" | "markdown";
629
-
630
- /** Trim to non-empty string or undefined. */
631
- function normalize(text: string | null | undefined): string | undefined {
632
- const trimmed = text?.trim();
633
- return trimmed || undefined;
634
- }
635
-
636
- /**
637
- * Extract readable content from raw HTML.
638
- * Tries Readability (article-isolation scoring) first, then falls back to a
639
- * CSS selector chain over the same pre-parsed DOM. Returns null if neither
640
- * path yields usable content.
641
- */
642
- export function extractReadableFromHtml(html: string, url: string, format: ReadableFormat): ReadableResult | null {
643
- const { document } = parseHTML(html);
644
-
645
- // --- Primary: Readability article extraction ---
646
- const article = new Readability(document).parse();
647
- if (article) {
648
- const result = toReadableResult(url, format, article.textContent, article.content, {
649
- title: article.title,
650
- byline: article.byline,
651
- excerpt: article.excerpt,
652
- length: article.length,
653
- });
654
- if (result) return result;
655
- }
656
-
657
- // --- Fallback: CSS selector chain ---
658
- const candidates = [
659
- document.querySelector("[data-pagefind-body]"),
660
- document.querySelector("main article"),
661
- document.querySelector("article"),
662
- document.querySelector("main"),
663
- document.querySelector("[role='main']"),
664
- document.body,
665
- ];
666
- for (const el of candidates) {
667
- if (!el) continue;
668
- const innerHTML = el.innerHTML?.trim();
669
- const textContent = el.textContent?.trim();
670
- if (!innerHTML || !textContent) continue;
671
- const result = toReadableResult(url, format, textContent, innerHTML, {
672
- title: document.title,
673
- excerpt: textContent.slice(0, 240),
674
- length: textContent.length,
675
- });
676
- if (result) return result;
677
- }
678
-
679
- return null;
680
- }
681
-
682
- /** Shared builder for both extraction paths. */
683
- function toReadableResult(
684
- url: string,
685
- format: ReadableFormat,
686
- textContent: string | null | undefined,
687
- htmlContent: string | null | undefined,
688
- meta: { title?: string | null; byline?: string | null; excerpt?: string | null; length?: number | null },
689
- ): ReadableResult | null {
690
- const text = normalize(textContent);
691
- const markdown = format === "markdown" ? (normalize(htmlToBasicMarkdown(htmlContent ?? "")) ?? text) : undefined;
692
- const normalizedText = format === "text" ? text : undefined;
693
- if (!normalizedText && !markdown) return null;
694
- return {
695
- url,
696
- title: normalize(meta.title),
697
- byline: normalize(meta.byline),
698
- excerpt: normalize(meta.excerpt),
699
- contentLength: meta.length ?? text?.length ?? markdown?.length ?? 0,
700
- text: normalizedText,
701
- markdown,
702
- };
703
- }
704
-
705
- function ensureParam<T>(value: T | undefined, name: string, action: string): T {
706
- if (value === undefined || value === null || value === "") {
707
- throw new ToolError(`Missing required parameter '${name}' for action '${action}'.`);
100
+ function resolveBrowserKind(params: BrowserParams, session: ToolSession): BrowserKind {
101
+ const app = params.app;
102
+ if (app?.cdp_url) {
103
+ return { kind: "connected", cdpUrl: app.cdp_url.replace(/\/+$/, "") };
708
104
  }
709
- return value;
710
- }
711
-
712
- function formatEvaluateResult(value: unknown): string {
713
- if (typeof value === "string") return value;
714
- if (value === undefined) return "undefined";
715
- try {
716
- const serialized = JSON.stringify(value, null, 2);
717
- return serialized ?? "undefined";
718
- } catch {
719
- return String(value);
105
+ if (app?.path) {
106
+ const exe = resolveToCwd(app.path, session.cwd);
107
+ return { kind: "spawned", path: exe };
720
108
  }
109
+ const headless = session.settings.get("browser.headless") as boolean;
110
+ return { kind: "headless", headless };
721
111
  }
722
112
 
723
113
  /**
724
- * Puppeteer tool for headless browser automation.
114
+ * Browser tool: stateful, multi-tab. Three actions:
115
+ * - `open` → acquire/create a named tab on a browser kind (headless | spawned | connected) and optionally goto a url.
116
+ * - `close` → release a named tab (or all tabs); dispose browser when refcount hits 0.
117
+ * - `run` → execute JS code against an existing tab with `page`/`browser`/`tab` helpers in scope.
725
118
  */
726
119
  export class BrowserTool implements AgentTool<typeof browserSchema, BrowserToolDetails> {
727
- readonly name = "puppeteer";
728
- readonly label = "Puppeteer";
120
+ readonly name = "browser";
121
+ readonly label = "Browser";
729
122
  readonly description: string;
730
123
  readonly parameters = browserSchema;
731
124
  readonly strict = true;
732
- #browser: Browser | null = null;
733
- #page: Page | null = null;
734
- #currentHeadless: boolean | null = null;
735
- #browserSession: CDPSession | null = null;
736
- #userAgentOverride: UserAgentOverride | null = null;
737
- #elementIdCounter = 0;
738
- readonly #elementCache = new Map<number, ElementHandle>();
739
- readonly #patchedClients = new WeakSet<object>();
740
125
 
741
126
  constructor(private readonly session: ToolSession) {
742
127
  this.description = prompt.render(browserDescription, {});
743
128
  }
744
129
 
745
- async #closeBrowser(): Promise<void> {
746
- await this.#clearElementCache();
747
- if (this.#page && !this.#page.isClosed()) {
748
- await this.#page.close();
749
- }
750
- this.#page = null;
751
- if (this.#browser?.connected) {
752
- await this.#browser.close();
753
- }
754
- this.#browser = null;
755
- this.#browserSession = null;
756
- this.#userAgentOverride = null;
130
+ /** Restart browser to apply mode changes (e.g. headless toggle). Drops only headless browsers. */
131
+ async restartForModeChange(): Promise<void> {
132
+ await dropHeadlessBrowsers();
757
133
  }
758
134
 
759
- async #resetBrowser(params?: BrowserParams): Promise<Page> {
760
- await this.#closeBrowser();
761
- this.#currentHeadless = this.session.settings.get("browser.headless");
762
- const vp = params?.viewport;
763
- const initialViewport = vp
764
- ? {
765
- width: vp.width,
766
- height: vp.height,
767
- deviceScaleFactor: vp.device_scale_factor ?? DEFAULT_VIEWPORT.deviceScaleFactor,
768
- }
769
- : DEFAULT_VIEWPORT;
770
- const puppeteer = await loadPuppeteer();
771
- const launchArgs = [
772
- "--no-sandbox",
773
- "--disable-setuid-sandbox",
774
- "--disable-blink-features=AutomationControlled",
775
- `--window-size=${initialViewport.width},${initialViewport.height}`,
776
- ];
777
- const proxy = process.env.PUPPETEER_PROXY;
778
- if (proxy) {
779
- launchArgs.push(`--proxy-server=${proxy}`);
780
- // Chrome (since v72) bypasses proxies for localhost by default. When PUPPETEER_PROXY_BYPASS_LOOPBACK
781
- // is true, add <-loopback> so traffic to localhost reaches the proxy (e.g. for mitmdump/auth capture).
782
- const bypassLoopback = process.env.PUPPETEER_PROXY_BYPASS_LOOPBACK?.toLowerCase();
783
- if (
784
- bypassLoopback === "true" ||
785
- bypassLoopback === "1" ||
786
- bypassLoopback === "yes" ||
787
- bypassLoopback === "on"
788
- ) {
789
- launchArgs.push("--proxy-bypass-list=<-loopback>");
790
- }
791
- }
792
- const ignoreCert = process.env.PUPPETEER_PROXY_IGNORE_CERT_ERRORS?.toLowerCase();
793
- if (ignoreCert === "true" || ignoreCert === "1" || ignoreCert === "yes" || ignoreCert === "on") {
794
- launchArgs.push("--ignore-certificate-errors");
795
- }
796
- this.#browser = await puppeteer.launch({
797
- headless: this.#currentHeadless,
798
- defaultViewport: this.#currentHeadless ? initialViewport : null,
799
- executablePath: await ensureChromiumExecutable(),
800
- args: launchArgs,
801
- ignoreDefaultArgs: [...STEALTH_IGNORE_DEFAULT_ARGS],
802
- });
803
- this.#page = await this.#browser.newPage();
804
- await this.#applyStealthPatches(this.#page);
805
- if (this.#currentHeadless || params?.viewport) {
806
- await this.#applyViewport(this.#page, params?.viewport);
807
- }
808
- return this.#page;
809
- }
135
+ async execute(
136
+ _toolCallId: string,
137
+ params: BrowserParams,
138
+ signal?: AbortSignal,
139
+ _onUpdate?: AgentToolUpdateCallback<BrowserToolDetails>,
140
+ _ctx?: AgentToolContext,
141
+ ): Promise<AgentToolResult<BrowserToolDetails>> {
142
+ try {
143
+ throwIfAborted(signal);
144
+ const timeoutSeconds = clampTimeout("browser", params.timeout);
145
+ const timeoutMs = timeoutSeconds * 1000;
146
+ const name = params.name ?? DEFAULT_TAB_NAME;
147
+ const details: BrowserToolDetails = { action: params.action, name };
810
148
 
811
- async #ensurePage(params?: BrowserParams): Promise<Page> {
812
- const desiredHeadless = this.session.settings.get("browser.headless");
813
- if (this.#currentHeadless !== null && this.#currentHeadless !== desiredHeadless) {
814
- return this.#resetBrowser(params);
815
- }
816
- if (this.#page && !this.#page.isClosed()) {
817
- return this.#page;
818
- }
819
- if (!this.#browser?.isConnected()) {
820
- return this.#resetBrowser(params);
821
- }
822
- this.#page = await this.#browser.newPage();
823
- await this.#applyStealthPatches(this.#page);
824
- if (this.#currentHeadless || params?.viewport) {
825
- await this.#applyViewport(this.#page, params?.viewport);
149
+ switch (params.action) {
150
+ case "open":
151
+ return await this.#open(name, params, details, timeoutMs, signal);
152
+ case "close":
153
+ return await this.#close(name, params, details, signal);
154
+ case "run":
155
+ return await this.#run(name, params, details, timeoutMs, signal);
156
+ default:
157
+ throw new ToolError(`Unsupported action: ${(params as BrowserParams).action}`);
158
+ }
159
+ } catch (error) {
160
+ if (error instanceof ToolAbortError) throw error;
161
+ if (error instanceof Error && error.name === "AbortError") {
162
+ throw new ToolAbortError();
163
+ }
164
+ throw error;
826
165
  }
827
- return this.#page;
828
166
  }
829
167
 
830
- async #applyViewport(page: Page, viewport?: BrowserParams["viewport"]): Promise<void> {
831
- if (!viewport) {
832
- await page.setViewport(DEFAULT_VIEWPORT);
833
- return;
168
+ async #open(
169
+ name: string,
170
+ params: BrowserParams,
171
+ details: BrowserToolDetails,
172
+ timeoutMs: number,
173
+ signal?: AbortSignal,
174
+ ): Promise<AgentToolResult<BrowserToolDetails>> {
175
+ const kind = resolveBrowserKind(params, this.session);
176
+ details.browser = kind.kind;
177
+
178
+ // If a tab with this name already exists on a different browser kind, fail fast — caller must close first.
179
+ const existing = getTab(name);
180
+ if (existing && !sameBrowserKind(existing.browser.kind, kind)) {
181
+ throw new ToolError(
182
+ `Tab ${JSON.stringify(name)} is bound to a different browser (${describeKind(existing.browser.kind)}). Close it first.`,
183
+ );
834
184
  }
835
- await page.setViewport({
836
- width: viewport.width,
837
- height: viewport.height,
838
- deviceScaleFactor: viewport.device_scale_factor ?? DEFAULT_VIEWPORT.deviceScaleFactor,
839
- });
840
- }
841
185
 
842
- async #clearElementCache(): Promise<void> {
843
- if (this.#elementCache.size === 0) {
844
- this.#elementIdCounter = 0;
845
- return;
846
- }
847
- const handles = Array.from(this.#elementCache.values());
848
- this.#elementCache.clear();
849
- this.#elementIdCounter = 0;
850
- await Promise.all(
851
- handles.map(async handle => {
852
- try {
853
- await handle.dispose();
854
- } catch {
855
- return;
856
- }
186
+ const browser = await untilAborted(signal, () =>
187
+ acquireBrowser(kind, {
188
+ cwd: this.session.cwd,
189
+ viewport: params.viewport
190
+ ? {
191
+ width: params.viewport.width,
192
+ height: params.viewport.height,
193
+ deviceScaleFactor: params.viewport.scale,
194
+ }
195
+ : undefined,
196
+ appArgs: params.app?.args,
197
+ signal,
857
198
  }),
858
199
  );
859
- }
860
-
861
- async #resolveCachedHandle(id: number): Promise<ElementHandle> {
862
- const handle = this.#elementCache.get(id);
863
- if (!handle) {
864
- throw new ToolError(`Unknown element_id ${id}. Run observe to refresh the element list.`);
865
- }
866
- try {
867
- const isConnected = (await handle.evaluate(el => el.isConnected)) as boolean;
868
- if (!isConnected) {
869
- await this.#clearElementCache();
870
- throw new ToolError(`Element_id ${id} is stale. Run observe again.`);
871
- }
872
- } catch {
873
- await this.#clearElementCache();
874
- throw new ToolError(`Element_id ${id} is stale. Run observe again.`);
875
- }
876
- return handle;
877
- }
878
200
 
879
- #isInteractiveNode(node: SerializedAXNode): boolean {
880
- if (INTERACTIVE_AX_ROLES.has(node.role)) return true;
881
- return (
882
- node.checked !== undefined ||
883
- node.pressed !== undefined ||
884
- node.selected !== undefined ||
885
- node.expanded !== undefined ||
886
- node.focused === true
201
+ const result = await untilAborted(signal, () =>
202
+ acquireTab(name, browser, {
203
+ url: params.url,
204
+ waitUntil: params.wait_until,
205
+ viewport: params.viewport
206
+ ? {
207
+ width: params.viewport.width,
208
+ height: params.viewport.height,
209
+ deviceScaleFactor: params.viewport.scale,
210
+ }
211
+ : undefined,
212
+ target: params.app?.target,
213
+ timeoutMs,
214
+ dialogs: params.dialogs,
215
+ signal,
216
+ }),
887
217
  );
888
- }
889
-
890
- async #collectObservationEntries(
891
- node: SerializedAXNode,
892
- entries: ObservationEntry[],
893
- options: { viewportOnly: boolean; includeAll: boolean },
894
- ): Promise<void> {
895
- if (options.includeAll || this.#isInteractiveNode(node)) {
896
- const handle = await node.elementHandle();
897
- if (handle) {
898
- let inViewport = true;
899
- if (options.viewportOnly) {
900
- try {
901
- inViewport = await handle.isIntersectingViewport();
902
- } catch {
903
- inViewport = false;
904
- }
905
- }
906
- if (inViewport) {
907
- const id = ++this.#elementIdCounter;
908
- const states: string[] = [];
909
- if (node.disabled) states.push("disabled");
910
- if (node.checked !== undefined) states.push(`checked=${String(node.checked)}`);
911
- if (node.pressed !== undefined) states.push(`pressed=${String(node.pressed)}`);
912
- if (node.selected !== undefined) states.push(`selected=${String(node.selected)}`);
913
- if (node.expanded !== undefined) states.push(`expanded=${String(node.expanded)}`);
914
- if (node.required) states.push("required");
915
- if (node.readonly) states.push("readonly");
916
- if (node.multiselectable) states.push("multiselectable");
917
- if (node.multiline) states.push("multiline");
918
- if (node.modal) states.push("modal");
919
- if (node.focused) states.push("focused");
920
- this.#elementCache.set(id, handle);
921
- entries.push({
922
- id,
923
- role: node.role,
924
- name: node.name,
925
- value: node.value,
926
- description: node.description,
927
- keyshortcuts: node.keyshortcuts,
928
- states,
929
- });
930
- } else {
931
- await handle.dispose();
932
- }
933
- }
934
- }
935
- for (const child of node.children ?? []) {
936
- await this.#collectObservationEntries(child, entries, options);
937
- }
938
- }
939
-
940
- #formatObservation(observation: Observation): string {
941
- const viewport = `${observation.viewport.width}x${observation.viewport.height}`;
942
- const scroll = `x=${observation.scroll.x} y=${observation.scroll.y} viewport=${observation.scroll.width}x${observation.scroll.height} doc=${observation.scroll.scrollWidth}x${observation.scroll.scrollHeight}`;
218
+ const tab = result.tab;
219
+ const url = tab.page.url();
220
+ const title = (await untilAborted(signal, () => tab.page.title())) as string;
221
+ details.url = url;
222
+ details.viewport = tab.page.viewport() ?? undefined;
223
+ const verb = result.created ? "Opened" : "Reused";
943
224
  const lines = [
944
- `URL: ${observation.url}`,
945
- observation.title ? `Title: ${observation.title}` : "Title:",
946
- `Viewport: ${viewport}`,
947
- `Scroll: ${scroll}`,
948
- "Elements:",
949
- ];
950
- for (const entry of observation.elements) {
951
- const name = entry.name ? ` "${entry.name}"` : "";
952
- const value = entry.value !== undefined ? ` value=${JSON.stringify(entry.value)}` : "";
953
- const description = entry.description ? ` desc=${JSON.stringify(entry.description)}` : "";
954
- const shortcuts = entry.keyshortcuts ? ` shortcuts=${JSON.stringify(entry.keyshortcuts)}` : "";
955
- const state = entry.states.length ? ` (${entry.states.join(", ")})` : "";
956
- lines.push(`${entry.id}. ${entry.role}${name}${value}${description}${shortcuts}${state}`);
957
- }
958
- return lines.join("\n");
959
- }
960
-
961
- /**
962
- * Restart the browser to apply changes like headless mode.
963
- */
964
- async restartForModeChange(): Promise<void> {
965
- await this.#resetBrowser();
966
- }
967
-
968
- async #applyStealthPatches(page: Page): Promise<void> {
969
- this.#patchSourceUrl(page);
970
- await this.#applyUserAgentOverride(page);
971
- await this.#injectStealthScripts(page);
225
+ `${verb} tab ${JSON.stringify(name)} on ${describeBrowser(browser)}`,
226
+ `URL: ${url}`,
227
+ title ? `Title: ${title}` : null,
228
+ ].filter((l): l is string => typeof l === "string");
229
+ details.result = lines.join("\n");
230
+ return toolResult(details).text(lines.join("\n")).done();
972
231
  }
973
232
 
974
- async #applyUserAgentOverride(page: Page): Promise<void> {
975
- const client = resolvePageClient(page);
976
- if (!client) return;
977
- const override = await this.#resolveUserAgentOverride(page);
978
- await this.#sendUserAgentOverride(client, override);
979
- await this.#configureUserAgentTargets(override);
980
- }
981
-
982
- async #resolveUserAgentOverride(page: Page): Promise<UserAgentOverride> {
983
- if (this.#userAgentOverride) return this.#userAgentOverride;
984
- const rawUserAgent = await page.browser().userAgent();
985
- let userAgent = rawUserAgent.replace("HeadlessChrome/", "Chrome/");
986
- if (userAgent.includes("Linux") && !userAgent.includes("Android")) {
987
- userAgent = userAgent.replace(/\(([^)]+)\)/, "(Windows NT 10.0; Win64; x64)");
233
+ async #close(
234
+ name: string,
235
+ params: BrowserParams,
236
+ details: BrowserToolDetails,
237
+ signal?: AbortSignal,
238
+ ): Promise<AgentToolResult<BrowserToolDetails>> {
239
+ const kill = !!params.kill;
240
+ if (params.all) {
241
+ const count = await untilAborted(signal, () => releaseAllTabs({ kill }));
242
+ details.result = `Closed ${count} tab(s)`;
243
+ return toolResult(details).text(details.result).done();
988
244
  }
989
-
990
- const uaVersionMatch = userAgent.match(/Chrome\/([\d|.]+)/);
991
- const fallbackVersionMatch = uaVersionMatch ?? (await page.browser().version()).match(/\/([\d|.]+)/);
992
- const uaVersion = fallbackVersionMatch?.[1] ?? "0";
993
- const majorVersion = Number.parseInt(uaVersion.split(".")[0] ?? "0", 10) || 0;
994
- const isAndroid = userAgent.includes("Android");
995
- const platform = userAgent.includes("Mac OS X")
996
- ? "MacIntel"
997
- : isAndroid
998
- ? "Android"
999
- : userAgent.includes("Linux")
1000
- ? "Linux"
1001
- : "Win32";
1002
- const platformFull = userAgent.includes("Mac OS X")
1003
- ? "Mac OS X"
1004
- : isAndroid
1005
- ? "Android"
1006
- : userAgent.includes("Linux")
1007
- ? "Linux"
1008
- : "Windows";
1009
- const platformVersion = userAgent.includes("Mac OS X ")
1010
- ? (userAgent.match(/Mac OS X ([^)]+)/)?.[1] ?? "")
1011
- : userAgent.includes("Android ")
1012
- ? (userAgent.match(/Android ([^;]+)/)?.[1] ?? "")
1013
- : userAgent.includes("Windows ")
1014
- ? (userAgent.match(/Windows .*?([\d|.]+);?/)?.[1] ?? "")
1015
- : "";
1016
- const architecture = isAndroid ? "" : "x86";
1017
- const model = isAndroid ? (userAgent.match(/Android.*?;\s([^)]+)/)?.[1] ?? "") : "";
1018
-
1019
- const brandOrders = [
1020
- [0, 1, 2],
1021
- [0, 2, 1],
1022
- [1, 0, 2],
1023
- [1, 2, 0],
1024
- [2, 0, 1],
1025
- [2, 1, 0],
1026
- ];
1027
- const order = brandOrders[majorVersion % brandOrders.length] ?? brandOrders[0];
1028
- const escapedChars = [" ", " ", ";"];
1029
- const greaseyBrand = `${escapedChars[order[0]]}Not${escapedChars[order[1]]}A${escapedChars[order[2]]}Brand`;
1030
- const brands: { brand: string; version: string }[] = [];
1031
- brands[order[0]] = { brand: greaseyBrand, version: "99" };
1032
- brands[order[1]] = { brand: "Chromium", version: String(majorVersion) };
1033
- brands[order[2]] = { brand: "Google Chrome", version: String(majorVersion) };
1034
-
1035
- this.#userAgentOverride = {
1036
- userAgent,
1037
- platform,
1038
- acceptLanguage: STEALTH_ACCEPT_LANGUAGE,
1039
- userAgentMetadata: {
1040
- brands,
1041
- fullVersion: uaVersion,
1042
- platform: platformFull,
1043
- platformVersion,
1044
- architecture,
1045
- model,
1046
- mobile: isAndroid,
1047
- },
1048
- };
1049
- return this.#userAgentOverride;
245
+ const closed = await untilAborted(signal, () => releaseTab(name, { kill }));
246
+ details.result = closed ? `Closed tab ${JSON.stringify(name)}` : `No tab named ${JSON.stringify(name)}`;
247
+ return toolResult(details).text(details.result).done();
1050
248
  }
1051
249
 
1052
- async #configureUserAgentTargets(override: UserAgentOverride): Promise<void> {
1053
- if (!this.#browser) return;
1054
- if (!this.#browserSession) {
1055
- this.#browserSession = await this.#browser.target().createCDPSession();
1056
- await this.#browserSession.send("Target.setAutoAttach", {
1057
- autoAttach: true,
1058
- waitForDebuggerOnStart: false,
1059
- flatten: true,
1060
- });
1061
- this.#browserSession.on("Target.attachedToTarget", async (event: { sessionId: string }) => {
1062
- const connection = this.#browserSession?.connection();
1063
- const session = connection?.session(event.sessionId);
1064
- if (!session || !this.#userAgentOverride) return;
1065
- await this.#sendUserAgentOverride(this.#wrapSession(session), this.#userAgentOverride);
1066
- });
250
+ async #run(
251
+ name: string,
252
+ params: BrowserParams,
253
+ details: BrowserToolDetails,
254
+ timeoutMs: number,
255
+ signal?: AbortSignal,
256
+ ): Promise<AgentToolResult<BrowserToolDetails>> {
257
+ if (!params.code?.trim()) {
258
+ throw new ToolError("Missing required parameter 'code' for action 'run'.");
1067
259
  }
260
+ const tab = getTab(name);
261
+ if (!tab) {
262
+ throw new ToolError(
263
+ `No tab named ${JSON.stringify(name)}. Call open first (e.g. action: 'open', name: '${name}').`,
264
+ );
265
+ }
266
+ details.browser = tab.browser.kind.kind;
267
+ details.url = tab.page.url();
268
+
269
+ const { displays, returnValue, screenshots } = await runInTab({
270
+ tab,
271
+ code: params.code,
272
+ timeoutMs,
273
+ signal,
274
+ session: this.session,
275
+ });
1068
276
 
1069
- const targets = this.#browser.targets();
1070
- await Promise.all(
1071
- targets.map(async target => {
1072
- const session = await target.createCDPSession();
1073
- await this.#sendUserAgentOverride(this.#wrapSession(session), override);
1074
- }),
1075
- );
1076
- }
1077
-
1078
- #wrapSession(session: CDPSession): PuppeteerCdpClient {
1079
- return {
1080
- send: async (method, params) => session.send(method as never, params as never),
1081
- };
1082
- }
277
+ if (screenshots.length) details.screenshots = screenshots;
1083
278
 
1084
- async #sendUserAgentOverride(client: PuppeteerCdpClient, override: UserAgentOverride): Promise<void> {
1085
- try {
1086
- await client.send("Network.enable");
1087
- } catch {}
1088
- try {
1089
- await client.send("Network.setUserAgentOverride", override);
1090
- } catch (error) {
1091
- logger.debug("Failed to apply Network user agent override", {
1092
- error: error instanceof Error ? error.message : String(error),
1093
- });
279
+ const content = [...displays];
280
+ if (returnValue !== undefined) {
281
+ content.push({ type: "text", text: stringifyReturnValue(returnValue) });
1094
282
  }
1095
- try {
1096
- await client.send("Emulation.setUserAgentOverride", override);
1097
- } catch (error) {
1098
- logger.debug("Failed to apply Emulation user agent override", {
1099
- error: error instanceof Error ? error.message : String(error),
1100
- });
283
+ if (!content.length) {
284
+ content.push({ type: "text", text: `Ran code on tab ${JSON.stringify(name)}` });
1101
285
  }
286
+ const textOnly = content
287
+ .filter((c): c is { type: "text"; text: string } => c.type === "text")
288
+ .map(c => c.text)
289
+ .join("\n");
290
+ details.result = textOnly;
291
+ return toolResult(details).content(content).done();
1102
292
  }
293
+ }
1103
294
 
1104
- #patchSourceUrl(page: Page): void {
1105
- const client = resolvePageClient(page);
1106
- if (!client) return;
1107
- const clientKey = client as object;
1108
- if (this.#patchedClients.has(clientKey)) return;
1109
- this.#patchedClients.add(clientKey);
1110
- const originalSend = client.send.bind(client);
1111
- client.send = async (method: string, params?: Record<string, unknown>) => {
1112
- const next = async (payload?: Record<string, unknown>) => {
1113
- try {
1114
- return await originalSend(method, payload);
1115
- } catch (error) {
1116
- if (
1117
- error instanceof Error &&
1118
- error.message.includes(
1119
- "Protocol error (Network.getResponseBody): No resource with given identifier found",
1120
- )
1121
- ) {
1122
- return undefined;
1123
- }
1124
- throw error;
1125
- }
1126
- };
1127
- if (!method || !params) {
1128
- return next(params);
1129
- }
1130
- const key =
1131
- method === "Runtime.evaluate"
1132
- ? "expression"
1133
- : method === "Runtime.callFunctionOn"
1134
- ? "functionDeclaration"
1135
- : null;
1136
- if (!key) {
1137
- return next(params);
1138
- }
1139
- const value = params[key];
1140
- if (typeof value !== "string" || !value.includes(PUPPETEER_SOURCE_URL_SUFFIX)) {
1141
- return next(params);
1142
- }
1143
- const patchedParams = { ...params, [key]: value.replace(PUPPETEER_SOURCE_URL_SUFFIX, "") };
1144
- return next(patchedParams);
1145
- };
295
+ function describeBrowser(handle: BrowserHandle): string {
296
+ switch (handle.kind.kind) {
297
+ case "headless":
298
+ return `headless browser (${handle.kind.headless ? "hidden" : "visible"})`;
299
+ case "spawned":
300
+ return `spawned ${handle.kind.path} (pid ${handle.pid ?? "?"})`;
301
+ case "connected":
302
+ return `connected ${handle.cdpUrl ?? handle.kind.cdpUrl}`;
1146
303
  }
304
+ }
1147
305
 
1148
- /** Injects stealth scripts that cover common puppeteer detection surfaces. */
1149
- async #injectStealthScripts(page: Page): Promise<void> {
1150
- const scripts = [
1151
- stealthTamperingScript,
1152
- stealthActivityScript,
1153
- stealthHairlineScript,
1154
- stealthBotdScript,
1155
- stealthIframeScript,
1156
- stealthWebglScript,
1157
- stealthScreenScript,
1158
- stealthFontsScript,
1159
- stealthAudioScript,
1160
- stealthLocaleScript,
1161
- stealthPluginsScript,
1162
- stealthHardwareScript,
1163
- stealthCodecsScript,
1164
- stealthWorkerScript,
1165
- ];
1166
-
1167
- const joint = scripts
1168
- .map(
1169
- script => `
1170
- try {
1171
- ${script};
1172
- } catch (e) {}
1173
- `,
1174
- )
1175
- .join(";\n");
1176
-
1177
- await page.evaluateOnNewDocument(`(() => {
1178
- // Native function cache - captured before any tampering
1179
- const iframe = document.createElement("iframe");
1180
- iframe.style.display = "none";
1181
- document.head.appendChild(iframe);
1182
- const nativeWindow = iframe.contentWindow;
1183
- if (!nativeWindow) return;
1184
-
1185
- // Cache pristine native functions
1186
- const Function_toString = nativeWindow.Function.prototype.toString;
1187
- const Object_getOwnPropertyDescriptor = nativeWindow.Object.getOwnPropertyDescriptor;
1188
- const Object_getOwnPropertyDescriptors = nativeWindow.Object.getOwnPropertyDescriptors;
1189
- const Object_getPrototypeOf = nativeWindow.Object.getPrototypeOf;
1190
- const Object_defineProperty = nativeWindow.Object.defineProperty;
1191
- const Object_getOwnPropertyDescriptorOriginal = nativeWindow.Object.getOwnPropertyDescriptor;
1192
- const Object_create = nativeWindow.Object.create;
1193
- const Object_keys = nativeWindow.Object.keys;
1194
- const Object_getOwnPropertyNames = nativeWindow.Object.getOwnPropertyNames;
1195
- const Object_entries = nativeWindow.Object.entries;
1196
- const Object_setPrototypeOf = nativeWindow.Object.setPrototypeOf;
1197
- const Object_assign = nativeWindow.Object.assign;
1198
- const Window_setTimeout = nativeWindow.setTimeout;
1199
- const Math_random = nativeWindow.Math.random;
1200
- const Math_floor = nativeWindow.Math.floor;
1201
- const Math_max = nativeWindow.Math.max;
1202
- const Math_min = nativeWindow.Math.min;
1203
- const Window_Event = nativeWindow.Event;
1204
- const Promise_resolve = nativeWindow.Promise.resolve.bind(nativeWindow.Promise);
1205
- const Window_Blob = nativeWindow.Blob;
1206
- const Window_Proxy = nativeWindow.Proxy;
1207
- const Intl_DateTimeFormat = nativeWindow.Intl.DateTimeFormat;
1208
- const Date_constructor = nativeWindow.Date;
1209
-
1210
-
1211
- ${joint}
1212
-
1213
- document.head.removeChild(iframe);})();`);
306
+ function describeKind(kind: BrowserKind): string {
307
+ switch (kind.kind) {
308
+ case "headless":
309
+ return `headless ${kind.headless ? "hidden" : "visible"}`;
310
+ case "spawned":
311
+ return `spawned:${kind.path}`;
312
+ case "connected":
313
+ return `connected:${kind.cdpUrl}`;
1214
314
  }
315
+ }
1215
316
 
1216
- async execute(
1217
- _toolCallId: string,
1218
- params: BrowserParams,
1219
- signal?: AbortSignal,
1220
- _onUpdate?: AgentToolUpdateCallback<BrowserToolDetails>,
1221
- _ctx?: AgentToolContext,
1222
- ): Promise<AgentToolResult<BrowserToolDetails>> {
1223
- try {
1224
- throwIfAborted(signal);
1225
- const timeoutSeconds = clampTimeout("browser", params.timeout);
1226
- const timeoutMs = timeoutSeconds * 1000;
1227
- const details: BrowserToolDetails = { action: params.action };
1228
-
1229
- switch (params.action) {
1230
- case "open": {
1231
- const page = await untilAborted(signal, () => this.#resetBrowser(params));
1232
- const viewport = page.viewport();
1233
- details.viewport = viewport ?? DEFAULT_VIEWPORT;
1234
- return toolResult(details).text("Opened headless browser session").done();
1235
- }
1236
- case "close": {
1237
- await untilAborted(signal, () => this.#closeBrowser());
1238
- return toolResult(details).text("Closed headless browser session").done();
1239
- }
1240
- case "goto": {
1241
- const url = ensureParam(params.url, "url", params.action);
1242
- details.url = url;
1243
- const page = await this.#ensurePage(params);
1244
- const waitUntil = params.wait_until ?? "networkidle2";
1245
- await this.#clearElementCache();
1246
- await untilAborted(signal, () => page.goto(url, { waitUntil, timeout: timeoutMs }));
1247
- const finalUrl = page.url();
1248
- const title = (await untilAborted(signal, () => page.title())) as string;
1249
- details.url = finalUrl;
1250
- details.result = title;
1251
- return toolResult(details)
1252
- .text(`Navigated to ${finalUrl}${title ? `\nTitle: ${title}` : ""}`)
1253
- .done();
1254
- }
1255
- case "observe": {
1256
- const page = await this.#ensurePage(params);
1257
- const timeoutSignal = AbortSignal.timeout(timeoutMs);
1258
- const observeSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
1259
- await this.#clearElementCache();
1260
- const snapshot = (await untilAborted(observeSignal, () =>
1261
- page.accessibility.snapshot({ interestingOnly: !(params.include_all ?? false) }),
1262
- )) as SerializedAXNode | null;
1263
- if (!snapshot) {
1264
- throw new ToolError("Accessibility snapshot unavailable");
1265
- }
1266
- const entries: ObservationEntry[] = [];
1267
- await this.#collectObservationEntries(snapshot, entries, {
1268
- viewportOnly: params.viewport_only ?? false,
1269
- includeAll: params.include_all ?? false,
1270
- });
1271
- const scroll = (await untilAborted(observeSignal, () =>
1272
- page.evaluate(() => {
1273
- const win = globalThis as unknown as {
1274
- scrollX: number;
1275
- scrollY: number;
1276
- innerWidth: number;
1277
- innerHeight: number;
1278
- document: { documentElement: { scrollWidth: number; scrollHeight: number } };
1279
- };
1280
- const doc = win.document.documentElement;
1281
- return {
1282
- x: win.scrollX,
1283
- y: win.scrollY,
1284
- width: win.innerWidth,
1285
- height: win.innerHeight,
1286
- scrollWidth: doc.scrollWidth,
1287
- scrollHeight: doc.scrollHeight,
1288
- };
1289
- }),
1290
- )) as Observation["scroll"];
1291
- const url = page.url();
1292
- const title = (await untilAborted(observeSignal, () => page.title())) as string;
1293
- const viewport = page.viewport() ?? DEFAULT_VIEWPORT;
1294
- const observation: Observation = {
1295
- url,
1296
- title,
1297
- viewport,
1298
- scroll,
1299
- elements: entries,
1300
- };
1301
- details.url = url;
1302
- details.viewport = viewport;
1303
- details.observation = observation;
1304
- details.result = `${entries.length} elements`;
1305
- return toolResult(details).text(this.#formatObservation(observation)).done();
1306
- }
1307
- case "click": {
1308
- const selector = ensureParam(params.selector, "selector", params.action);
1309
- details.selector = selector;
1310
- const page = await this.#ensurePage(params);
1311
- const resolvedSelector = normalizeSelector(selector);
1312
- if (resolvedSelector.startsWith("text/")) {
1313
- await clickQueryHandlerText(page, resolvedSelector, timeoutMs, signal);
1314
- } else {
1315
- const locator = page.locator(resolvedSelector).setTimeout(timeoutMs);
1316
- await untilAborted(signal, () => locator.click());
1317
- }
1318
- return toolResult(details).text(`Clicked ${selector}`).done();
1319
- }
1320
- case "click_id": {
1321
- const elementId = ensureParam(params.element_id, "element_id", params.action);
1322
- details.elementId = elementId;
1323
- const handle = await this.#resolveCachedHandle(elementId);
1324
- try {
1325
- await untilAborted(signal, () => handle.click());
1326
- } catch {
1327
- await this.#clearElementCache();
1328
- throw new ToolError(`Element_id ${elementId} is stale. Run observe again.`);
1329
- }
1330
- return toolResult(details).text(`Clicked element ${elementId}`).done();
1331
- }
1332
- case "type": {
1333
- const selector = ensureParam(params.selector, "selector", params.action);
1334
- const text = ensureParam(params.text, "text", params.action);
1335
- details.selector = selector;
1336
- const page = await this.#ensurePage(params);
1337
- const resolvedSelector = normalizeSelector(selector);
1338
- const locator = page.locator(resolvedSelector).setTimeout(timeoutMs);
1339
- const handle = (await untilAborted(signal, () => locator.waitHandle())) as ElementHandle;
1340
- await untilAborted(signal, () => handle.type(text, { delay: 0 }));
1341
- await handle.dispose();
1342
- return toolResult(details).text(`Typed into ${selector}`).done();
1343
- }
1344
- case "type_id": {
1345
- const elementId = ensureParam(params.element_id, "element_id", params.action);
1346
- const text = ensureParam(params.text, "text", params.action);
1347
- details.elementId = elementId;
1348
- const page = await this.#ensurePage(params);
1349
- const handle = await this.#resolveCachedHandle(elementId);
1350
- try {
1351
- await untilAborted(signal, () => handle.focus());
1352
- await untilAborted(signal, () => page.keyboard.type(text, { delay: 0 }));
1353
- } catch {
1354
- await this.#clearElementCache();
1355
- throw new ToolError(`Element_id ${elementId} is stale. Run observe again.`);
1356
- }
1357
- return toolResult(details).text(`Typed into element ${elementId}`).done();
1358
- }
1359
- case "fill": {
1360
- const selector = ensureParam(params.selector, "selector", params.action);
1361
- const value = ensureParam(params.value, "value", params.action);
1362
- details.selector = selector;
1363
- const page = await this.#ensurePage(params);
1364
- const resolvedSelector = normalizeSelector(selector);
1365
- const locator = page.locator(resolvedSelector).setTimeout(timeoutMs);
1366
- await untilAborted(signal, () => locator.fill(value));
1367
- return toolResult(details).text(`Filled ${selector}`).done();
1368
- }
1369
- case "fill_id": {
1370
- const elementId = ensureParam(params.element_id, "element_id", params.action);
1371
- const value = ensureParam(params.value, "value", params.action);
1372
- details.elementId = elementId;
1373
- const handle = await this.#resolveCachedHandle(elementId);
1374
- try {
1375
- await untilAborted(signal, () =>
1376
- handle.evaluate((el, inputValue) => {
1377
- const element = el as { value?: string; dispatchEvent: (event: Event) => boolean };
1378
- if (!("value" in element)) {
1379
- throw new Error("Target element is not a form input");
1380
- }
1381
- element.value = String(inputValue);
1382
- element.dispatchEvent(new Event("input", { bubbles: true }));
1383
- element.dispatchEvent(new Event("change", { bubbles: true }));
1384
- }, value),
1385
- );
1386
- } catch {
1387
- await this.#clearElementCache();
1388
- throw new ToolError(`Element_id ${elementId} is stale. Run observe again.`);
1389
- }
1390
- return toolResult(details).text(`Filled element ${elementId}`).done();
1391
- }
1392
- case "press": {
1393
- const key = ensureParam(params.key, "key", params.action) as KeyInput;
1394
- const page = await this.#ensurePage(params);
1395
- if (params.selector) {
1396
- const resolvedSelector = normalizeSelector(params.selector as string);
1397
- await untilAborted(signal, () => page.focus(resolvedSelector));
1398
- }
1399
- await untilAborted(signal, () => page.keyboard.press(key));
1400
- return toolResult(details).text(`Pressed ${key}`).done();
1401
- }
1402
- case "scroll": {
1403
- const deltaY = ensureParam(params.delta_y, "delta_y", params.action);
1404
- const deltaX = params.delta_x ?? 0;
1405
- const page = await this.#ensurePage(params);
1406
- await untilAborted(signal, () => page.mouse.wheel({ deltaX, deltaY }));
1407
- return toolResult(details).text(`Scrolled by ${deltaX}, ${deltaY}`).done();
1408
- }
1409
- case "drag": {
1410
- const fromSelector = ensureParam(params.from_selector, "from_selector", params.action);
1411
- const toSelector = ensureParam(params.to_selector, "to_selector", params.action);
1412
- const page = await this.#ensurePage(params);
1413
- const resolvedFromSelector = normalizeSelector(fromSelector);
1414
- const resolvedToSelector = normalizeSelector(toSelector);
1415
- const fromHandle = (await untilAborted(signal, () =>
1416
- page.$(resolvedFromSelector),
1417
- )) as ElementHandle | null;
1418
- const toHandle = (await untilAborted(signal, () => page.$(resolvedToSelector))) as ElementHandle | null;
1419
- if (!fromHandle || !toHandle) {
1420
- throw new ToolError("Drag selectors did not resolve to elements");
1421
- }
1422
- const fromBox = (await untilAborted(signal, () => fromHandle.boundingBox())) as {
1423
- x: number;
1424
- y: number;
1425
- width: number;
1426
- height: number;
1427
- } | null;
1428
- const toBox = (await untilAborted(signal, () => toHandle.boundingBox())) as {
1429
- x: number;
1430
- y: number;
1431
- width: number;
1432
- height: number;
1433
- } | null;
1434
- await fromHandle.dispose();
1435
- await toHandle.dispose();
1436
- if (!fromBox || !toBox) {
1437
- throw new ToolError("Drag elements are not visible");
1438
- }
1439
- const startX = fromBox.x + fromBox.width / 2;
1440
- const startY = fromBox.y + fromBox.height / 2;
1441
- const endX = toBox.x + toBox.width / 2;
1442
- const endY = toBox.y + toBox.height / 2;
1443
- await untilAborted(signal, () => page.mouse.move(startX, startY));
1444
- await untilAborted(signal, () => page.mouse.down());
1445
- await untilAborted(signal, () => page.mouse.move(endX, endY, { steps: 12 }));
1446
- await untilAborted(signal, () => page.mouse.up());
1447
- return toolResult(details).text(`Dragged from ${fromSelector} to ${toSelector}`).done();
1448
- }
1449
- case "wait_for_selector": {
1450
- const selector = ensureParam(params.selector, "selector", params.action);
1451
- details.selector = selector;
1452
- const page = await this.#ensurePage(params);
1453
- const resolvedSelector = normalizeSelector(selector);
1454
- const locator = page.locator(resolvedSelector).setTimeout(timeoutMs);
1455
- await untilAborted(signal, () => locator.wait());
1456
- return toolResult(details).text(`Selector ready: ${selector}`).done();
1457
- }
1458
- case "evaluate": {
1459
- const script = ensureParam(params.script, "script", params.action);
1460
- const page = await this.#ensurePage(params);
1461
- const value = (await untilAborted(signal, () =>
1462
- page.evaluate(async (source: string) => {
1463
- try {
1464
- return await new Function(`return (async () => (${source}))();`)();
1465
- } catch {
1466
- return await new Function(`return (async () => { ${source} })();`)();
1467
- }
1468
- }, script),
1469
- )) as unknown;
1470
- const output = formatEvaluateResult(value);
1471
- details.result = output;
1472
- return toolResult(details).text(output).done();
1473
- }
1474
- case "get_text": {
1475
- const page = await this.#ensurePage(params);
1476
- if (params.args?.length) {
1477
- const values = (await Promise.all(
1478
- params.args.map((arg, index) => {
1479
- const selector = ensureParam(arg.selector, `args[${index}].selector`, params.action);
1480
- const resolvedSelector = normalizeSelector(selector);
1481
- return untilAborted(signal, () =>
1482
- page.$eval(resolvedSelector, (el: Element) => (el as HTMLElement).innerText),
1483
- );
1484
- }),
1485
- )) as string[];
1486
- details.result = values;
1487
- return toolResult(details)
1488
- .text(JSON.stringify(values, null, 2))
1489
- .done();
1490
- }
1491
- const selector = ensureParam(params.selector, "selector", params.action);
1492
- details.selector = selector;
1493
- const resolvedSelector = normalizeSelector(selector);
1494
- const value = (await untilAborted(signal, () =>
1495
- page.$eval(resolvedSelector, (el: Element) => (el as HTMLElement).innerText),
1496
- )) as string;
1497
- details.result = value;
1498
- return toolResult(details).text(value).done();
1499
- }
1500
- case "get_html": {
1501
- const page = await this.#ensurePage(params);
1502
- if (params.args?.length) {
1503
- const values = (await Promise.all(
1504
- params.args.map((arg, index) => {
1505
- const selector = ensureParam(arg.selector, `args[${index}].selector`, params.action);
1506
- const resolvedSelector = normalizeSelector(selector);
1507
- return untilAborted(signal, () =>
1508
- page.$eval(resolvedSelector, (el: Element) => (el as HTMLElement).innerHTML),
1509
- );
1510
- }),
1511
- )) as string[];
1512
- details.result = values;
1513
- return toolResult(details)
1514
- .text(JSON.stringify(values, null, 2))
1515
- .done();
1516
- }
1517
- const selector = ensureParam(params.selector, "selector", params.action);
1518
- details.selector = selector;
1519
- const resolvedSelector = normalizeSelector(selector);
1520
- const value = (await untilAborted(signal, () =>
1521
- page.$eval(resolvedSelector, (el: Element) => (el as HTMLElement).innerHTML),
1522
- )) as string;
1523
- details.result = value;
1524
- return toolResult(details).text(value).done();
1525
- }
1526
- case "get_attribute": {
1527
- const page = await this.#ensurePage(params);
1528
- if (params.args?.length) {
1529
- const values = (await Promise.all(
1530
- params.args.map((arg, index) => {
1531
- const selector = ensureParam(arg.selector, `args[${index}].selector`, params.action);
1532
- const attribute = ensureParam(arg.attribute, `args[${index}].attribute`, params.action);
1533
- const resolvedSelector = normalizeSelector(selector);
1534
- return untilAborted(signal, () =>
1535
- page.$eval(
1536
- resolvedSelector,
1537
- (el: Element, attr: string) => (el as HTMLElement).getAttribute(String(attr)),
1538
- attribute,
1539
- ),
1540
- );
1541
- }),
1542
- )) as string[];
1543
- details.result = values;
1544
- return toolResult(details)
1545
- .text(JSON.stringify(values, null, 2))
1546
- .done();
1547
- }
1548
- const selector = ensureParam(params.selector, "selector", params.action);
1549
- const attribute = ensureParam(params.attribute, "attribute", params.action);
1550
- details.selector = selector;
1551
- const resolvedSelector = normalizeSelector(selector);
1552
- const value = (await untilAborted(signal, () =>
1553
- page.$eval(
1554
- resolvedSelector,
1555
- (el: { getAttribute: (name: string) => string | null }, attr: string) =>
1556
- el.getAttribute(String(attr)),
1557
- attribute,
1558
- ),
1559
- )) as string | null;
1560
- const output = value ?? "";
1561
- details.result = output;
1562
- return toolResult(details).text(output).done();
1563
- }
1564
- case "extract_readable": {
1565
- const page = await this.#ensurePage(params);
1566
- const format = params.format ?? "markdown";
1567
- const html = (await untilAborted(signal, () => page.content())) as string;
1568
- const url = page.url();
1569
- const readable = extractReadableFromHtml(html, url, format);
1570
- if (!readable) {
1571
- throw new ToolError("Readable content not found");
1572
- }
1573
- details.url = url;
1574
- details.readable = readable;
1575
- details.result = format === "markdown" ? (readable.markdown ?? "") : (readable.text ?? "");
1576
- return toolResult(details)
1577
- .text(JSON.stringify(readable, null, 2))
1578
- .done();
1579
- }
1580
- case "screenshot": {
1581
- const page = await this.#ensurePage(params);
1582
- const fullPage = params.selector ? false : (params.full_page ?? false);
1583
- let buffer: Buffer;
1584
-
1585
- if (params.selector) {
1586
- const resolvedSelector = normalizeSelector(params.selector as string);
1587
- const handle = (await untilAborted(signal, () => page.$(resolvedSelector))) as ElementHandle | null;
1588
- if (!handle) {
1589
- throw new ToolError("Screenshot selector did not resolve to an element");
1590
- }
1591
- buffer = (await untilAborted(signal, () => handle.screenshot({ type: "png" }))) as Buffer;
1592
- await handle.dispose();
1593
- details.selector = params.selector;
1594
- } else {
1595
- buffer = (await untilAborted(signal, () => page.screenshot({ type: "png", fullPage }))) as Buffer;
1596
- }
1597
-
1598
- // Compress aggressively for API content — screenshots are the most
1599
- // frequent image source and land directly in the next LLM request.
1600
- // 1024px is plenty for OCR/UI inspection; 150KB keeps payloads lean.
1601
- const resized = await resizeImage(
1602
- { type: "image", data: buffer.toBase64(), mimeType: "image/png" },
1603
- { maxWidth: 1024, maxHeight: 1024, maxBytes: 150 * 1024, jpegQuality: 70 },
1604
- );
1605
- // Resolve destination: user-defined path > screenshotDir (auto-named) > temp file.
1606
- const screenshotDir = (() => {
1607
- const v = this.session.settings.get("browser.screenshotDir") as string | undefined;
1608
- return v ? expandPath(v) : undefined;
1609
- })();
1610
- const paramPath = params.path ? resolveToCwd(params.path as string, this.session.cwd) : undefined;
1611
- let dest: string;
1612
- if (paramPath) {
1613
- dest = paramPath;
1614
- } else if (screenshotDir) {
1615
- const ts = new Date().toISOString().replace(/[:.]/g, "-").slice(0, -1);
1616
- dest = path.join(screenshotDir, `screenshot-${ts}.png`);
1617
- } else {
1618
- dest = path.join(os.tmpdir(), `omp-sshots-${Snowflake.next()}.png`);
1619
- }
1620
- await fs.promises.mkdir(path.dirname(dest), { recursive: true });
1621
- // Full-res buffer when saving to a user-defined location; resized (API copy) for temp-only.
1622
- const saveFullRes = !!(paramPath || screenshotDir);
1623
- const savedBuffer = saveFullRes ? buffer : resized.buffer;
1624
- const savedMimeType = saveFullRes ? "image/png" : resized.mimeType;
1625
- await Bun.write(dest, savedBuffer);
1626
- details.screenshotPath = dest;
1627
- details.mimeType = savedMimeType;
1628
- details.bytes = savedBuffer.length;
317
+ function sameBrowserKind(a: BrowserKind, b: BrowserKind): boolean {
318
+ if (a.kind !== b.kind) return false;
319
+ if (a.kind === "headless" && b.kind === "headless") return a.headless === b.headless;
320
+ if (a.kind === "spawned" && b.kind === "spawned") return a.path === b.path;
321
+ if (a.kind === "connected" && b.kind === "connected") return a.cdpUrl === b.cdpUrl;
322
+ return false;
323
+ }
1629
324
 
1630
- const lines = formatScreenshot({
1631
- saveFullRes,
1632
- savedMimeType,
1633
- savedByteLength: savedBuffer.length,
1634
- dest,
1635
- resized,
1636
- });
1637
- return toolResult(details)
1638
- .content([
1639
- { type: "text", text: lines.join("\n") },
1640
- { type: "image", data: resized.data, mimeType: resized.mimeType },
1641
- ])
1642
- .done();
1643
- }
1644
- default:
1645
- throw new ToolError(`Unsupported action: ${params.action}`);
1646
- }
1647
- } catch (error) {
1648
- if (error instanceof ToolAbortError) throw error;
1649
- if (error instanceof Error && error.name === "AbortError") {
1650
- throw new ToolAbortError();
1651
- }
1652
- throw error;
1653
- }
325
+ function stringifyReturnValue(value: unknown): string {
326
+ if (typeof value === "string") return value;
327
+ try {
328
+ return JSON.stringify(value, null, 2) ?? String(value);
329
+ } catch {
330
+ return String(value);
1654
331
  }
1655
332
  }
333
+
334
+ // Re-export collectObservation so external callers (e.g. tests) can use it without
335
+ // reaching into the browser/ subdirectory.
336
+ export { collectObservation, formatObservation };