@oh-my-pi/pi-coding-agent 14.5.11 → 14.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/CHANGELOG.md +58 -0
  2. package/package.json +18 -10
  3. package/src/cli/jupyter-cli.ts +1 -1
  4. package/src/config/model-equivalence.ts +49 -16
  5. package/src/config/model-registry.ts +100 -25
  6. package/src/config/model-resolver.ts +29 -15
  7. package/src/config/settings-schema.ts +20 -6
  8. package/src/config/settings.ts +9 -8
  9. package/src/config.ts +9 -0
  10. package/src/eval/backend.ts +43 -0
  11. package/src/eval/eval.lark +43 -0
  12. package/src/eval/index.ts +5 -0
  13. package/src/eval/js/context-manager.ts +717 -0
  14. package/src/eval/js/executor.ts +131 -0
  15. package/src/eval/js/index.ts +46 -0
  16. package/src/eval/js/prelude.ts +2 -0
  17. package/src/eval/js/prelude.txt +84 -0
  18. package/src/eval/js/tool-bridge.ts +124 -0
  19. package/src/eval/parse.ts +337 -0
  20. package/src/{ipy → eval/py}/executor.ts +2 -180
  21. package/src/{ipy → eval/py}/gateway-coordinator.ts +4 -3
  22. package/src/eval/py/index.ts +58 -0
  23. package/src/{ipy → eval/py}/kernel.ts +5 -41
  24. package/src/{ipy → eval/py}/prelude.py +39 -227
  25. package/src/eval/types.ts +48 -0
  26. package/src/export/html/template.generated.ts +1 -1
  27. package/src/export/html/template.js +23 -17
  28. package/src/extensibility/extensions/types.ts +2 -3
  29. package/src/internal-urls/docs-index.generated.ts +5 -5
  30. package/src/lsp/client.ts +9 -0
  31. package/src/lsp/index.ts +395 -0
  32. package/src/lsp/types.ts +15 -4
  33. package/src/main.ts +25 -14
  34. package/src/mcp/oauth-flow.ts +1 -1
  35. package/src/memories/index.ts +1 -1
  36. package/src/modes/acp/acp-event-mapper.ts +1 -1
  37. package/src/modes/components/{python-execution.ts → eval-execution.ts} +11 -4
  38. package/src/modes/components/login-dialog.ts +1 -1
  39. package/src/modes/components/oauth-selector.ts +2 -1
  40. package/src/modes/components/tool-execution.ts +3 -4
  41. package/src/modes/controllers/command-controller.ts +28 -8
  42. package/src/modes/controllers/input-controller.ts +4 -4
  43. package/src/modes/controllers/selector-controller.ts +2 -1
  44. package/src/modes/interactive-mode.ts +4 -5
  45. package/src/modes/types.ts +3 -3
  46. package/src/modes/utils/ui-helpers.ts +2 -2
  47. package/src/prompts/system/system-prompt.md +3 -3
  48. package/src/prompts/tools/atom.md +3 -2
  49. package/src/prompts/tools/browser.md +61 -16
  50. package/src/prompts/tools/eval.md +92 -0
  51. package/src/prompts/tools/lsp.md +7 -3
  52. package/src/sdk.ts +45 -31
  53. package/src/session/agent-session.ts +44 -54
  54. package/src/session/messages.ts +1 -1
  55. package/src/slash-commands/builtin-registry.ts +1 -1
  56. package/src/system-prompt.ts +34 -66
  57. package/src/task/executor.ts +5 -9
  58. package/src/tools/browser/attach.ts +175 -0
  59. package/src/tools/browser/launch.ts +576 -0
  60. package/src/tools/browser/readable.ts +90 -0
  61. package/src/tools/browser/registry.ts +198 -0
  62. package/src/tools/browser/render.ts +212 -0
  63. package/src/tools/browser/tab-protocol.ts +101 -0
  64. package/src/tools/browser/tab-supervisor.ts +429 -0
  65. package/src/tools/browser/tab-worker-entry.ts +21 -0
  66. package/src/tools/browser/tab-worker.ts +1006 -0
  67. package/src/tools/browser.ts +231 -1567
  68. package/src/tools/checkpoint.ts +2 -2
  69. package/src/tools/{python.ts → eval.ts} +324 -315
  70. package/src/tools/exit-plan-mode.ts +1 -1
  71. package/src/tools/index.ts +62 -100
  72. package/src/tools/plan-mode-guard.ts +27 -1
  73. package/src/tools/read.ts +0 -6
  74. package/src/tools/recipe/runners/pkg.ts +34 -32
  75. package/src/tools/renderers.ts +4 -2
  76. package/src/tools/resolve.ts +7 -2
  77. package/src/tools/todo-write.ts +0 -1
  78. package/src/tools/tool-timeouts.ts +2 -2
  79. package/src/utils/markit.ts +15 -7
  80. package/src/utils/tools-manager.ts +5 -5
  81. package/src/web/search/index.ts +5 -5
  82. package/src/web/search/provider.ts +121 -39
  83. package/src/web/search/providers/gemini.ts +2 -2
  84. package/src/web/search/render.ts +2 -2
  85. package/src/ipy/modules.ts +0 -144
  86. package/src/prompts/tools/python.md +0 -57
  87. /package/src/{ipy → eval/py}/cancellation.ts +0 -0
  88. /package/src/{ipy → eval/py}/prelude.ts +0 -0
  89. /package/src/{ipy → eval/py}/runtime.ts +0 -0
@@ -1,1655 +1,319 @@
1
- import * as fs from "node:fs";
2
- import * as os from "node:os";
3
- import * as path from "node:path";
4
- import { Readability } from "@mozilla/readability";
5
1
  import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallback } from "@oh-my-pi/pi-agent-core";
6
2
  import { StringEnum } from "@oh-my-pi/pi-ai";
7
- import { $which, getPuppeteerDir, logger, prompt, Snowflake, untilAborted } from "@oh-my-pi/pi-utils";
3
+ import { prompt, untilAborted } from "@oh-my-pi/pi-utils";
8
4
  import { type Static, Type } from "@sinclair/typebox";
9
- import { type HTMLElement, parseHTML } from "linkedom";
10
- import type {
11
- Browser,
12
- CDPSession,
13
- ElementHandle,
14
- KeyInput,
15
- Page,
16
- default as Puppeteer,
17
- SerializedAXNode,
18
- } from "puppeteer-core";
19
5
  import browserDescription from "../prompts/tools/browser.md" with { type: "text" };
20
6
  import type { ToolSession } from "../sdk";
21
- import { resizeImage } from "../utils/image-resize";
22
- import { htmlToBasicMarkdown } from "../web/scrapers/types";
7
+ import { acquireBrowser, type BrowserHandle, type BrowserKind, type BrowserKindTag } from "./browser/registry";
8
+ import type { Observation, ScreenshotResult } from "./browser/tab-protocol";
9
+ import { acquireTab, dropHeadlessTabs, getTab, releaseAllTabs, releaseTab, runInTab } from "./browser/tab-supervisor";
23
10
  import type { OutputMeta } from "./output-meta";
24
- import { expandPath, resolveToCwd } from "./path-utils";
25
- import stealthTamperingScript from "./puppeteer/00_stealth_tampering.txt" with { type: "text" };
26
- import stealthActivityScript from "./puppeteer/01_stealth_activity.txt" with { type: "text" };
27
- import stealthHairlineScript from "./puppeteer/02_stealth_hairline.txt" with { type: "text" };
28
- import stealthBotdScript from "./puppeteer/03_stealth_botd.txt" with { type: "text" };
29
- import stealthIframeScript from "./puppeteer/04_stealth_iframe.txt" with { type: "text" };
30
- import stealthWebglScript from "./puppeteer/05_stealth_webgl.txt" with { type: "text" };
31
- import stealthScreenScript from "./puppeteer/06_stealth_screen.txt" with { type: "text" };
32
- import stealthFontsScript from "./puppeteer/07_stealth_fonts.txt" with { type: "text" };
33
- import stealthAudioScript from "./puppeteer/08_stealth_audio.txt" with { type: "text" };
34
- import stealthLocaleScript from "./puppeteer/09_stealth_locale.txt" with { type: "text" };
35
- import stealthPluginsScript from "./puppeteer/10_stealth_plugins.txt" with { type: "text" };
36
- import stealthHardwareScript from "./puppeteer/11_stealth_hardware.txt" with { type: "text" };
37
- import stealthCodecsScript from "./puppeteer/12_stealth_codecs.txt" with { type: "text" };
38
- import stealthWorkerScript from "./puppeteer/13_stealth_worker.txt" with { type: "text" };
39
- import { formatScreenshot } from "./render-utils";
11
+ import { resolveToCwd } from "./path-utils";
40
12
  import { ToolAbortError, ToolError, throwIfAborted } from "./tool-errors";
41
13
  import { toolResult } from "./tool-result";
42
14
  import { clampTimeout } from "./tool-timeouts";
43
15
 
44
- /**
45
- * Lazy-import puppeteer from a safe CWD so cosmiconfig doesn't choke
46
- * on malformed package.json files in the user's project tree.
47
- */
48
- let puppeteerModule: typeof Puppeteer | undefined;
49
- async function loadPuppeteer(): Promise<typeof Puppeteer> {
50
- if (puppeteerModule) return puppeteerModule;
51
- const prev = process.cwd();
52
- const safeDir = getPuppeteerDir();
53
- await Bun.write(path.join(safeDir, "package.json"), "{}");
54
- try {
55
- process.chdir(safeDir);
56
- puppeteerModule = (await import("puppeteer-core")).default;
57
- return puppeteerModule;
58
- } finally {
59
- process.chdir(prev);
60
- }
61
- }
62
-
63
- /**
64
- * Lazily download Chromium on first browser launch via @puppeteer/browsers.
65
- * Skipped when a system Chromium (NixOS) or PUPPETEER_EXECUTABLE_PATH is set.
66
- * The browser is cached under ~/.omp/puppeteer (getPuppeteerDir).
67
- */
68
- let chromiumExecutablePromise: Promise<string | undefined> | undefined;
69
- async function ensureChromiumExecutable(): Promise<string | undefined> {
70
- const sysChrome = resolveSystemChromium();
71
- if (sysChrome) return sysChrome;
72
- const envPath = process.env.PUPPETEER_EXECUTABLE_PATH;
73
- if (envPath) return envPath;
74
- if (chromiumExecutablePromise) return chromiumExecutablePromise;
75
-
76
- chromiumExecutablePromise = (async () => {
77
- const [browsers, revisions] = await Promise.all([
78
- import("@puppeteer/browsers"),
79
- import("puppeteer-core/internal/revisions.js"),
80
- ]);
81
- const platform = browsers.detectBrowserPlatform();
82
- if (!platform) {
83
- logger.warn("Could not detect browser platform; relying on puppeteer default resolution");
84
- return undefined;
85
- }
86
- const cacheDir = getPuppeteerDir();
87
- const buildId = await browsers.resolveBuildId(
88
- browsers.Browser.CHROME,
89
- platform,
90
- revisions.PUPPETEER_REVISIONS.chrome,
91
- );
92
- const executablePath = browsers.computeExecutablePath({
93
- browser: browsers.Browser.CHROME,
94
- buildId,
95
- cacheDir,
96
- platform,
97
- });
98
- if (fs.existsSync(executablePath)) return executablePath;
99
-
100
- logger.warn("Downloading Chromium for puppeteer (first browser use)", {
101
- buildId,
102
- platform,
103
- cacheDir,
104
- });
105
- let lastReportedPercent = -1;
106
- await browsers.install({
107
- browser: browsers.Browser.CHROME,
108
- buildId,
109
- cacheDir,
110
- platform,
111
- downloadProgressCallback: (downloaded, total) => {
112
- if (total <= 0) return;
113
- const pct = Math.floor((downloaded / total) * 100);
114
- if (pct >= lastReportedPercent + 10 || downloaded === total) {
115
- lastReportedPercent = pct;
116
- logger.debug(
117
- `Chromium download: ${pct}% (${Math.round(downloaded / 1_000_000)} / ${Math.round(total / 1_000_000)} MB)`,
118
- );
119
- }
120
- },
121
- });
122
- return executablePath;
123
- })().catch(err => {
124
- chromiumExecutablePromise = undefined;
125
- throw new ToolError(
126
- `Failed to install Chromium for puppeteer: ${(err as Error).message}. ` +
127
- "Set PUPPETEER_EXECUTABLE_PATH to use an existing Chrome/Chromium binary, or install one manually.",
128
- );
129
- });
130
- return chromiumExecutablePromise;
131
- }
132
-
133
- /**
134
- * Resolve a system-installed Chrome/Chromium so `puppeteer.launch()` can reuse
135
- * it instead of forcing a Chromium download. Returns `undefined` when no binary
136
- * is found, which lets the caller fall back to a managed download.
137
- *
138
- * Detection order (per platform):
139
- * - macOS: Google Chrome → Chromium → Microsoft Edge (system + user Applications)
140
- * - Linux: PATH lookups (google-chrome, chromium, etc.) → common /usr/bin paths,
141
- * with NixOS-specific profile paths added when /etc/NIXOS exists
142
- * - Windows: Program Files / LocalAppData install paths for Chrome and Edge
143
- *
144
- * Honored regardless of platform: PUPPETEER_EXECUTABLE_PATH callers should bypass
145
- * this entirely (handled in ensureChromiumExecutable).
146
- */
147
- let _resolvedChromium: string | null | undefined; // undefined = unchecked; null = not found
148
- function isExecutableFile(p: string): boolean {
149
- try {
150
- const st = fs.statSync(p);
151
- return st.isFile();
152
- } catch {
153
- return false;
154
- }
155
- }
156
-
157
- function systemChromiumCandidates(): string[] {
158
- const home = os.homedir();
159
- const candidates: string[] = [];
160
- switch (process.platform) {
161
- case "darwin": {
162
- for (const root of ["/Applications", path.join(home, "Applications")]) {
163
- candidates.push(
164
- path.join(root, "Google Chrome.app/Contents/MacOS/Google Chrome"),
165
- path.join(root, "Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta"),
166
- path.join(root, "Google Chrome Dev.app/Contents/MacOS/Google Chrome Dev"),
167
- path.join(root, "Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary"),
168
- path.join(root, "Chromium.app/Contents/MacOS/Chromium"),
169
- path.join(root, "Microsoft Edge.app/Contents/MacOS/Microsoft Edge"),
170
- );
171
- }
172
- break;
173
- }
174
- case "linux": {
175
- const names = ["google-chrome-stable", "google-chrome", "chromium", "chromium-browser", "chrome"];
176
- for (const name of names) {
177
- const found = $which(name);
178
- if (found) candidates.push(found);
179
- }
180
- candidates.push(
181
- "/usr/bin/google-chrome-stable",
182
- "/usr/bin/google-chrome",
183
- "/usr/bin/chromium",
184
- "/usr/bin/chromium-browser",
185
- "/snap/bin/chromium",
186
- "/var/lib/flatpak/exports/bin/com.google.Chrome",
187
- "/var/lib/flatpak/exports/bin/org.chromium.Chromium",
188
- );
189
- let onNixos = false;
190
- try {
191
- onNixos = fs.existsSync("/etc/NIXOS");
192
- } catch {}
193
- if (onNixos) {
194
- candidates.push(path.join(home, ".nix-profile/bin/chromium"), "/run/current-system/sw/bin/chromium");
195
- }
196
- break;
197
- }
198
- case "win32": {
199
- const programFiles = process.env.ProgramFiles ?? "C:\\Program Files";
200
- const programFilesX86 = process.env["ProgramFiles(x86)"] ?? "C:\\Program Files (x86)";
201
- const localAppData = process.env.LOCALAPPDATA ?? path.join(home, "AppData\\Local");
202
- candidates.push(
203
- path.join(programFiles, "Google\\Chrome\\Application\\chrome.exe"),
204
- path.join(programFilesX86, "Google\\Chrome\\Application\\chrome.exe"),
205
- path.join(localAppData, "Google\\Chrome\\Application\\chrome.exe"),
206
- path.join(programFiles, "Chromium\\Application\\chrome.exe"),
207
- path.join(localAppData, "Chromium\\Application\\chrome.exe"),
208
- path.join(programFiles, "Microsoft\\Edge\\Application\\msedge.exe"),
209
- path.join(programFilesX86, "Microsoft\\Edge\\Application\\msedge.exe"),
210
- );
211
- break;
212
- }
213
- }
214
- return candidates;
215
- }
216
-
217
- function resolveSystemChromium(): string | undefined {
218
- if (_resolvedChromium !== undefined) return _resolvedChromium ?? undefined;
219
- const seen = new Set<string>();
220
- for (const candidate of systemChromiumCandidates()) {
221
- if (!candidate || seen.has(candidate)) continue;
222
- seen.add(candidate);
223
- if (isExecutableFile(candidate)) {
224
- _resolvedChromium = candidate;
225
- logger.debug("Using system Chrome/Chromium", { path: candidate });
226
- return candidate;
227
- }
228
- }
229
- _resolvedChromium = null;
230
- return undefined;
231
- }
232
-
233
- const DEFAULT_VIEWPORT = { width: 1365, height: 768, deviceScaleFactor: 1.25 };
234
- const STEALTH_IGNORE_DEFAULT_ARGS = [
235
- "--disable-extensions",
236
- "--disable-default-apps",
237
- "--disable-component-extensions-with-background-pages",
238
- ];
239
- const STEALTH_ACCEPT_LANGUAGE = "en-US,en";
240
- const PUPPETEER_SOURCE_URL_SUFFIX = "//# sourceURL=__puppeteer_evaluation_script__";
241
- const INTERACTIVE_AX_ROLES = new Set([
242
- "button",
243
- "link",
244
- "textbox",
245
- "combobox",
246
- "listbox",
247
- "option",
248
- "checkbox",
249
- "radio",
250
- "switch",
251
- "tab",
252
- "menuitem",
253
- "menuitemcheckbox",
254
- "menuitemradio",
255
- "slider",
256
- "spinbutton",
257
- "searchbox",
258
- "treeitem",
259
- ]);
260
-
261
- declare global {
262
- interface Element extends HTMLElement {}
263
-
264
- function getComputedStyle(element: Element): Record<string, unknown>;
265
- var innerWidth: number;
266
- var innerHeight: number;
267
- var document: {
268
- elementFromPoint(x: number, y: number): Element | null;
269
- };
270
- }
271
-
272
- const LEGACY_SELECTOR_PREFIXES = ["p-aria/", "p-text/", "p-xpath/", "p-pierce/"] as const;
273
-
274
- function normalizeSelector(selector: string): string {
275
- if (!selector) return selector;
276
- if (selector.startsWith("p-") && !LEGACY_SELECTOR_PREFIXES.some(prefix => selector.startsWith(prefix))) {
277
- throw new ToolError(
278
- `Unsupported selector prefix. Use CSS or puppeteer query handlers (aria/, text/, xpath/, pierce/). Got: ${selector}`,
279
- );
280
- }
281
- if (selector.startsWith("p-text/")) {
282
- return `text/${selector.slice("p-text/".length)}`;
283
- }
284
- if (selector.startsWith("p-xpath/")) {
285
- return `xpath/${selector.slice("p-xpath/".length)}`;
286
- }
287
- if (selector.startsWith("p-pierce/")) {
288
- return `pierce/${selector.slice("p-pierce/".length)}`;
289
- }
290
- if (selector.startsWith("p-aria/")) {
291
- const rest = selector.slice("p-aria/".length);
292
- // Playwright-style: p-aria/[name="Sign in"] → aria/Sign in
293
- const nameMatch = rest.match(/\[\s*name\s*=\s*(?:"([^"]+)"|'([^']+)'|([^\]]+))\s*\]/);
294
- const name = nameMatch?.[1] ?? nameMatch?.[2] ?? nameMatch?.[3];
295
- if (name) return `aria/${name.trim()}`;
296
- return `aria/${rest}`;
297
- }
298
- return selector;
299
- }
300
-
301
- type ActionabilityResult = { ok: true; x: number; y: number } | { ok: false; reason: string };
302
-
303
- async function resolveActionableQueryHandlerClickTarget(handles: ElementHandle[]): Promise<ElementHandle | null> {
304
- const candidates: Array<{
305
- handle: ElementHandle;
306
- rect: { x: number; y: number; w: number; h: number };
307
- ownedProxy?: ElementHandle;
308
- }> = [];
309
-
310
- for (const handle of handles) {
311
- let clickable: ElementHandle = handle;
312
- let clickableProxy: ElementHandle | null = null;
313
- try {
314
- const proxy = await handle.evaluateHandle(el => {
315
- const target =
316
- (el as Element).closest(
317
- 'a,button,[role="button"],[role="link"],input[type="button"],input[type="submit"]',
318
- ) ?? el;
319
- return target;
320
- });
321
- const nodeHandle = proxy.asElement();
322
- clickableProxy = nodeHandle ? (nodeHandle as unknown as ElementHandle) : null;
323
- if (clickableProxy) {
324
- clickable = clickableProxy;
325
- }
326
- } catch {
327
- // ignore
328
- }
329
-
330
- try {
331
- const intersecting = await clickable.isIntersectingViewport();
332
- if (!intersecting) continue;
333
- const rect = (await clickable.evaluate(el => {
334
- const r = (el as Element).getBoundingClientRect();
335
- return { x: r.left, y: r.top, w: r.width, h: r.height };
336
- })) as { x: number; y: number; w: number; h: number };
337
- if (rect.w < 1 || rect.h < 1) continue;
338
- candidates.push({ handle: clickable, rect, ownedProxy: clickableProxy ?? undefined });
339
- } catch {
340
- // ignore
341
- } finally {
342
- if (clickableProxy && clickableProxy !== handle && clickable !== clickableProxy) {
343
- try {
344
- await clickableProxy.dispose();
345
- } catch {}
346
- }
347
- }
348
- }
349
-
350
- if (!candidates.length) return null;
351
-
352
- // Prefer top-most visible element (nav/header usually wins), tie-break by left-most.
353
- candidates.sort((a, b) => a.rect.y - b.rect.y || a.rect.x - b.rect.x);
354
- const winner = candidates[0]?.handle ?? null;
355
- // Dispose owned proxies for non-winning candidates
356
- for (let i = 1; i < candidates.length; i++) {
357
- const c = candidates[i]!;
358
- if (c.ownedProxy) {
359
- try {
360
- await c.ownedProxy.dispose();
361
- } catch {}
362
- }
363
- }
364
- return winner;
365
- }
366
-
367
- async function isClickActionable(handle: ElementHandle): Promise<ActionabilityResult> {
368
- return (await handle.evaluate(el => {
369
- const element = el as HTMLElement;
370
- const style = globalThis.getComputedStyle(element);
371
- if (style.display === "none") return { ok: false as const, reason: "display:none" };
372
- if (style.visibility === "hidden") return { ok: false as const, reason: "visibility:hidden" };
373
- if (style.pointerEvents === "none") return { ok: false as const, reason: "pointer-events:none" };
374
- if (Number(style.opacity) === 0) return { ok: false as const, reason: "opacity:0" };
375
-
376
- const r = element.getBoundingClientRect();
377
- if (r.width < 1 || r.height < 1) return { ok: false as const, reason: "zero-size" };
378
-
379
- const vw = globalThis.innerWidth;
380
- const vh = globalThis.innerHeight;
381
- const left = Math.max(0, Math.min(vw, r.left));
382
- const right = Math.max(0, Math.min(vw, r.right));
383
- const top = Math.max(0, Math.min(vh, r.top));
384
- const bottom = Math.max(0, Math.min(vh, r.bottom));
385
- if (right - left < 1 || bottom - top < 1) return { ok: false as const, reason: "off-viewport" };
386
-
387
- const x = Math.floor((left + right) / 2);
388
- const y = Math.floor((top + bottom) / 2);
389
- const topEl = globalThis.document.elementFromPoint(x, y);
390
- if (!topEl) return { ok: false as const, reason: "elementFromPoint-null" };
391
- if (topEl === element || element.contains(topEl) || (topEl as Element).contains(element)) {
392
- return { ok: true as const, x, y };
393
- }
394
- return { ok: false as const, reason: "obscured" };
395
- })) as ActionabilityResult;
396
- }
397
-
398
- async function clickQueryHandlerText(
399
- page: Page,
400
- selector: string,
401
- timeoutMs: number,
402
- signal?: AbortSignal,
403
- ): Promise<void> {
404
- const timeoutSignal = AbortSignal.timeout(timeoutMs);
405
- const clickSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
406
- const start = Date.now();
407
- let lastSeen = 0;
408
- let lastReason: string | null = null;
409
-
410
- while (Date.now() - start < timeoutMs) {
411
- throwIfAborted(clickSignal);
412
- const handles = (await untilAborted(clickSignal, () => page.$$(selector))) as ElementHandle[];
413
- try {
414
- lastSeen = handles.length;
415
- const target = await resolveActionableQueryHandlerClickTarget(handles);
416
- if (!target) {
417
- lastReason = handles.length ? "no-visible-candidate" : "no-matches";
418
- await Bun.sleep(100);
419
- continue;
420
- }
421
- const actionability = await isClickActionable(target);
422
- if (!actionability.ok) {
423
- lastReason = actionability.reason;
424
- await Bun.sleep(100);
425
- continue;
426
- }
427
-
428
- try {
429
- await untilAborted(clickSignal, () => target.click());
430
- return;
431
- } catch (err) {
432
- lastReason = err instanceof Error ? err.message : String(err);
433
- await Bun.sleep(100);
434
- }
435
- } finally {
436
- await Promise.all(
437
- handles.map(async h => {
438
- try {
439
- await h.dispose();
440
- } catch {}
441
- }),
442
- );
443
- }
444
- }
16
+ export { extractReadableFromHtml, type ReadableFormat, type ReadableResult } from "./browser/readable";
17
+ export type { Observation, ObservationEntry } from "./browser/tab-protocol";
445
18
 
446
- throw new ToolError(
447
- `Timed out clicking ${selector} (seen ${lastSeen} matches; last reason: ${lastReason ?? "unknown"}). ` +
448
- "If there are multiple matching elements, use observe+click_id or a more specific selector.",
449
- );
450
- }
451
-
452
- /**
453
- * Stealth init scripts for Puppeteer.
454
- */
455
-
456
- type PuppeteerCdpClient = {
457
- send: (method: string, params?: Record<string, unknown>) => Promise<unknown>;
458
- };
19
+ const DEFAULT_TAB_NAME = "main";
459
20
 
460
- type UserAgentOverride = {
461
- userAgent: string;
462
- platform: string;
463
- acceptLanguage: string;
464
- userAgentMetadata: {
465
- brands: Array<{ brand: string; version: string }>;
466
- fullVersion: string;
467
- platform: string;
468
- platformVersion: string;
469
- architecture: string;
470
- model: string;
471
- mobile: boolean;
472
- };
473
- };
474
-
475
- function resolvePageClient(page: Page): PuppeteerCdpClient | null {
476
- const pageWithClient = page as Page & {
477
- _client?: (() => PuppeteerCdpClient) | PuppeteerCdpClient;
478
- };
479
- if (!pageWithClient._client) return null;
480
- return typeof pageWithClient._client === "function" ? pageWithClient._client() : pageWithClient._client;
481
- }
482
-
483
- const puppeteerGetArgsSchema = Type.Array(
484
- Type.Object({
485
- selector: Type.String({
486
- description: "target element selector",
487
- examples: ["aria/Sign in", "text/Continue", "xpath/...", "pierce/..."],
488
- }),
489
- attribute: Type.Optional(Type.String({ description: "attribute name", examples: ["href", "data-id"] })),
490
- }),
491
- { description: "batch get_* args", minItems: 1 },
492
- );
493
-
494
- const browserSchema = Type.Object({
495
- action: StringEnum(
496
- [
497
- "open",
498
- "goto",
499
- "observe",
500
- "click",
501
- "click_id",
502
- "type",
503
- "type_id",
504
- "fill",
505
- "fill_id",
506
- "press",
507
- "scroll",
508
- "drag",
509
- "wait_for_selector",
510
- "evaluate",
511
- "get_text",
512
- "get_html",
513
- "get_attribute",
514
- "extract_readable",
515
- "screenshot",
516
- "close",
517
- ],
518
- { description: "action to perform" },
519
- ),
520
- url: Type.Optional(Type.String({ description: "url to navigate to", examples: ["https://example.com"] })),
521
- selector: Type.Optional(
21
+ const appSchema = Type.Object({
22
+ path: Type.Optional(
522
23
  Type.String({
523
- description: "target element selector",
524
- examples: ["aria/Sign in", "text/Continue", "xpath/...", "pierce/..."],
24
+ description: "absolute path to a binary to spawn (single-instance reuse)",
25
+ examples: ["/Applications/Cursor.app/Contents/MacOS/Cursor"],
525
26
  }),
526
27
  ),
527
- element_id: Type.Optional(Type.Number({ description: "observed element id" })),
528
- include_all: Type.Optional(Type.Boolean({ description: "include non-interactive nodes" })),
529
- viewport_only: Type.Optional(Type.Boolean({ description: "limit to viewport" })),
530
- args: Type.Optional(puppeteerGetArgsSchema),
531
- script: Type.Optional(
532
- Type.String({ description: "javascript expression", examples: ["document.title", "window.location.href"] }),
533
- ),
534
- text: Type.Optional(Type.String({ description: "text to type", examples: ["hello world"] })),
535
- value: Type.Optional(Type.String({ description: "value to set", examples: ["hello"] })),
536
- attribute: Type.Optional(Type.String({ description: "attribute to read", examples: ["href", "data-id"] })),
537
- key: Type.Optional(Type.String({ description: "keyboard key", examples: ["Enter", "Tab", "Escape"] })),
538
- timeout: Type.Optional(Type.Number({ description: "timeout in seconds", default: 30 })),
539
- wait_until: Type.Optional(
540
- StringEnum(["load", "domcontentloaded", "networkidle0", "networkidle2"], {
541
- description: "navigation wait condition",
28
+ cdp_url: Type.Optional(
29
+ Type.String({
30
+ description: "existing CDP endpoint to connect to (e.g. http://127.0.0.1:9222)",
542
31
  }),
543
32
  ),
544
- full_page: Type.Optional(Type.Boolean({ description: "full page screenshot" })),
545
- format: Type.Optional(
546
- StringEnum(["text", "markdown"], {
547
- description: "output format",
33
+ args: Type.Optional(Type.Array(Type.String(), { description: "extra CLI args when spawning" })),
34
+ target: Type.Optional(Type.String({ description: "substring matched against url+title to pick a BrowserWindow" })),
35
+ });
36
+
37
+ const browserSchema = Type.Object({
38
+ action: StringEnum(["open", "close", "run"], { description: "tab/browser operation" }),
39
+ name: Type.Optional(
40
+ Type.String({
41
+ description: "tab id; default 'main'. Multiple tabs can coexist; reusable across run() calls and subagents.",
42
+ examples: ["main", "docs", "gh"],
548
43
  }),
549
44
  ),
550
- path: Type.Optional(Type.String({ description: "screenshot save path", examples: ["out.png"] })),
45
+ url: Type.Optional(Type.String({ description: "open: navigate after acquiring tab" })),
46
+ app: Type.Optional(appSchema),
551
47
  viewport: Type.Optional(
552
48
  Type.Object({
553
- width: Type.Number({ description: "viewport width" }),
554
- height: Type.Number({ description: "viewport height" }),
555
- device_scale_factor: Type.Optional(Type.Number({ description: "device scale factor" })),
49
+ width: Type.Number(),
50
+ height: Type.Number(),
51
+ scale: Type.Optional(Type.Number()),
556
52
  }),
557
53
  ),
558
- delta_x: Type.Optional(Type.Number({ description: "scroll delta x" })),
559
- delta_y: Type.Optional(Type.Number({ description: "scroll delta y" })),
560
- from_selector: Type.Optional(
561
- Type.String({
562
- description: "drag start selector",
563
- examples: ["aria/Drag handle"],
54
+ wait_until: Type.Optional(
55
+ StringEnum(["load", "domcontentloaded", "networkidle0", "networkidle2"], {
56
+ description: "navigation wait condition for url",
57
+ }),
58
+ ),
59
+ dialogs: Type.Optional(
60
+ StringEnum(["accept", "dismiss"], {
61
+ description: "open: auto-handle alert/confirm/beforeunload dialogs (default: leave for caller to handle)",
564
62
  }),
565
63
  ),
566
- to_selector: Type.Optional(
64
+ code: Type.Optional(
567
65
  Type.String({
568
- description: "drag end selector",
569
- examples: ["text/Drop zone"],
66
+ description:
67
+ "run: JS body executed with `page`, `browser`, `tab`, `display`, `assert`, `wait` in scope. Treated as the body of an async function. Use `display(value)` to attach text/JSON/images; the function's return value is JSON-serialized as a final block.",
570
68
  }),
571
69
  ),
70
+ timeout: Type.Optional(Type.Number({ description: "timeout in seconds", default: 30 })),
71
+ all: Type.Optional(Type.Boolean({ description: "close: close every tab" })),
72
+ kill: Type.Optional(Type.Boolean({ description: "close: also kill spawned-app browsers (default: leave running)" })),
572
73
  });
573
74
 
574
- /** Input schema for the Puppeteer tool. */
75
+ /** Input schema for the browser tool. */
575
76
  export type BrowserParams = Static<typeof browserSchema>;
576
77
 
577
- /** Details describing a Puppeteer tool execution result. */
78
+ /** Details describing a browser tool execution result (for renderers + transcript). */
578
79
  export interface BrowserToolDetails {
579
80
  action: BrowserParams["action"];
81
+ name?: string;
580
82
  url?: string;
581
- selector?: string;
582
- elementId?: number;
583
- result?: string | string[];
584
- screenshotPath?: string;
585
- mimeType?: string;
586
- bytes?: number;
83
+ browser?: BrowserKindTag;
587
84
  viewport?: { width: number; height: number; deviceScaleFactor?: number };
588
85
  observation?: Observation;
589
- readable?: ReadableResult;
86
+ screenshots?: ScreenshotResult[];
87
+ result?: string;
590
88
  meta?: OutputMeta;
591
89
  }
592
90
 
593
- export interface ObservationEntry {
594
- id: number;
595
- role: string;
596
- name?: string;
597
- value?: string | number;
598
- description?: string;
599
- keyshortcuts?: string;
600
- states: string[];
601
- }
602
-
603
- export interface Observation {
604
- url: string;
605
- title?: string;
606
- viewport: { width: number; height: number; deviceScaleFactor?: number };
607
- scroll: {
608
- x: number;
609
- y: number;
610
- width: number;
611
- height: number;
612
- scrollWidth: number;
613
- scrollHeight: number;
614
- };
615
- elements: ObservationEntry[];
616
- }
617
-
618
- export interface ReadableResult {
619
- url: string;
620
- title?: string;
621
- byline?: string;
622
- excerpt?: string;
623
- contentLength: number;
624
- text?: string;
625
- markdown?: string;
626
- }
627
-
628
- type ReadableFormat = "text" | "markdown";
629
-
630
- /** Trim to non-empty string or undefined. */
631
- function normalize(text: string | null | undefined): string | undefined {
632
- const trimmed = text?.trim();
633
- return trimmed || undefined;
634
- }
635
-
636
- /**
637
- * Extract readable content from raw HTML.
638
- * Tries Readability (article-isolation scoring) first, then falls back to a
639
- * CSS selector chain over the same pre-parsed DOM. Returns null if neither
640
- * path yields usable content.
641
- */
642
- export function extractReadableFromHtml(html: string, url: string, format: ReadableFormat): ReadableResult | null {
643
- const { document } = parseHTML(html);
644
-
645
- // --- Primary: Readability article extraction ---
646
- const article = new Readability(document).parse();
647
- if (article) {
648
- const result = toReadableResult(url, format, article.textContent, article.content, {
649
- title: article.title,
650
- byline: article.byline,
651
- excerpt: article.excerpt,
652
- length: article.length,
653
- });
654
- if (result) return result;
91
+ function resolveBrowserKind(params: BrowserParams, session: ToolSession): BrowserKind {
92
+ const app = params.app;
93
+ if (app?.cdp_url) {
94
+ return { kind: "connected", cdpUrl: app.cdp_url.replace(/\/+$/, "") };
655
95
  }
656
-
657
- // --- Fallback: CSS selector chain ---
658
- const candidates = [
659
- document.querySelector("[data-pagefind-body]"),
660
- document.querySelector("main article"),
661
- document.querySelector("article"),
662
- document.querySelector("main"),
663
- document.querySelector("[role='main']"),
664
- document.body,
665
- ];
666
- for (const el of candidates) {
667
- if (!el) continue;
668
- const innerHTML = el.innerHTML?.trim();
669
- const textContent = el.textContent?.trim();
670
- if (!innerHTML || !textContent) continue;
671
- const result = toReadableResult(url, format, textContent, innerHTML, {
672
- title: document.title,
673
- excerpt: textContent.slice(0, 240),
674
- length: textContent.length,
675
- });
676
- if (result) return result;
677
- }
678
-
679
- return null;
680
- }
681
-
682
- /** Shared builder for both extraction paths. */
683
- function toReadableResult(
684
- url: string,
685
- format: ReadableFormat,
686
- textContent: string | null | undefined,
687
- htmlContent: string | null | undefined,
688
- meta: { title?: string | null; byline?: string | null; excerpt?: string | null; length?: number | null },
689
- ): ReadableResult | null {
690
- const text = normalize(textContent);
691
- const markdown = format === "markdown" ? (normalize(htmlToBasicMarkdown(htmlContent ?? "")) ?? text) : undefined;
692
- const normalizedText = format === "text" ? text : undefined;
693
- if (!normalizedText && !markdown) return null;
694
- return {
695
- url,
696
- title: normalize(meta.title),
697
- byline: normalize(meta.byline),
698
- excerpt: normalize(meta.excerpt),
699
- contentLength: meta.length ?? text?.length ?? markdown?.length ?? 0,
700
- text: normalizedText,
701
- markdown,
702
- };
703
- }
704
-
705
- function ensureParam<T>(value: T | undefined, name: string, action: string): T {
706
- if (value === undefined || value === null || value === "") {
707
- throw new ToolError(`Missing required parameter '${name}' for action '${action}'.`);
708
- }
709
- return value;
710
- }
711
-
712
- function formatEvaluateResult(value: unknown): string {
713
- if (typeof value === "string") return value;
714
- if (value === undefined) return "undefined";
715
- try {
716
- const serialized = JSON.stringify(value, null, 2);
717
- return serialized ?? "undefined";
718
- } catch {
719
- return String(value);
96
+ if (app?.path) {
97
+ const exe = resolveToCwd(app.path, session.cwd);
98
+ return { kind: "spawned", path: exe };
720
99
  }
100
+ const headless = session.settings.get("browser.headless") as boolean;
101
+ return { kind: "headless", headless };
721
102
  }
722
103
 
723
104
  /**
724
- * Puppeteer tool for headless browser automation.
105
+ * Browser tool: stateful, multi-tab. Three actions:
106
+ * - `open` → acquire/create a named tab on a browser kind (headless | spawned | connected) and optionally goto a url.
107
+ * - `close` → release a named tab (or all tabs); dispose browser when refcount hits 0.
108
+ * - `run` → execute JS code against an existing tab with `page`/`browser`/`tab` helpers in scope.
725
109
  */
726
110
  export class BrowserTool implements AgentTool<typeof browserSchema, BrowserToolDetails> {
727
- readonly name = "puppeteer";
728
- readonly label = "Puppeteer";
111
+ readonly name = "browser";
112
+ readonly label = "Browser";
729
113
  readonly description: string;
730
114
  readonly parameters = browserSchema;
731
115
  readonly strict = true;
732
- #browser: Browser | null = null;
733
- #page: Page | null = null;
734
- #currentHeadless: boolean | null = null;
735
- #browserSession: CDPSession | null = null;
736
- #userAgentOverride: UserAgentOverride | null = null;
737
- #elementIdCounter = 0;
738
- readonly #elementCache = new Map<number, ElementHandle>();
739
- readonly #patchedClients = new WeakSet<object>();
740
116
 
741
117
  constructor(private readonly session: ToolSession) {
742
118
  this.description = prompt.render(browserDescription, {});
743
119
  }
744
120
 
745
- async #closeBrowser(): Promise<void> {
746
- await this.#clearElementCache();
747
- if (this.#page && !this.#page.isClosed()) {
748
- await this.#page.close();
749
- }
750
- this.#page = null;
751
- if (this.#browser?.connected) {
752
- await this.#browser.close();
753
- }
754
- this.#browser = null;
755
- this.#browserSession = null;
756
- this.#userAgentOverride = null;
121
+ /** Restart browser to apply mode changes (e.g. headless toggle). Drops only headless browsers. */
122
+ async restartForModeChange(): Promise<void> {
123
+ await dropHeadlessTabs();
757
124
  }
758
125
 
759
- async #resetBrowser(params?: BrowserParams): Promise<Page> {
760
- await this.#closeBrowser();
761
- this.#currentHeadless = this.session.settings.get("browser.headless");
762
- const vp = params?.viewport;
763
- const initialViewport = vp
764
- ? {
765
- width: vp.width,
766
- height: vp.height,
767
- deviceScaleFactor: vp.device_scale_factor ?? DEFAULT_VIEWPORT.deviceScaleFactor,
768
- }
769
- : DEFAULT_VIEWPORT;
770
- const puppeteer = await loadPuppeteer();
771
- const launchArgs = [
772
- "--no-sandbox",
773
- "--disable-setuid-sandbox",
774
- "--disable-blink-features=AutomationControlled",
775
- `--window-size=${initialViewport.width},${initialViewport.height}`,
776
- ];
777
- const proxy = process.env.PUPPETEER_PROXY;
778
- if (proxy) {
779
- launchArgs.push(`--proxy-server=${proxy}`);
780
- // Chrome (since v72) bypasses proxies for localhost by default. When PUPPETEER_PROXY_BYPASS_LOOPBACK
781
- // is true, add <-loopback> so traffic to localhost reaches the proxy (e.g. for mitmdump/auth capture).
782
- const bypassLoopback = process.env.PUPPETEER_PROXY_BYPASS_LOOPBACK?.toLowerCase();
783
- if (
784
- bypassLoopback === "true" ||
785
- bypassLoopback === "1" ||
786
- bypassLoopback === "yes" ||
787
- bypassLoopback === "on"
788
- ) {
789
- launchArgs.push("--proxy-bypass-list=<-loopback>");
790
- }
791
- }
792
- const ignoreCert = process.env.PUPPETEER_PROXY_IGNORE_CERT_ERRORS?.toLowerCase();
793
- if (ignoreCert === "true" || ignoreCert === "1" || ignoreCert === "yes" || ignoreCert === "on") {
794
- launchArgs.push("--ignore-certificate-errors");
795
- }
796
- this.#browser = await puppeteer.launch({
797
- headless: this.#currentHeadless,
798
- defaultViewport: this.#currentHeadless ? initialViewport : null,
799
- executablePath: await ensureChromiumExecutable(),
800
- args: launchArgs,
801
- ignoreDefaultArgs: [...STEALTH_IGNORE_DEFAULT_ARGS],
802
- });
803
- this.#page = await this.#browser.newPage();
804
- await this.#applyStealthPatches(this.#page);
805
- if (this.#currentHeadless || params?.viewport) {
806
- await this.#applyViewport(this.#page, params?.viewport);
807
- }
808
- return this.#page;
809
- }
126
+ async execute(
127
+ _toolCallId: string,
128
+ params: BrowserParams,
129
+ signal?: AbortSignal,
130
+ _onUpdate?: AgentToolUpdateCallback<BrowserToolDetails>,
131
+ _ctx?: AgentToolContext,
132
+ ): Promise<AgentToolResult<BrowserToolDetails>> {
133
+ try {
134
+ throwIfAborted(signal);
135
+ const timeoutSeconds = clampTimeout("browser", params.timeout);
136
+ const timeoutMs = timeoutSeconds * 1000;
137
+ const name = params.name ?? DEFAULT_TAB_NAME;
138
+ const details: BrowserToolDetails = { action: params.action, name };
810
139
 
811
- async #ensurePage(params?: BrowserParams): Promise<Page> {
812
- const desiredHeadless = this.session.settings.get("browser.headless");
813
- if (this.#currentHeadless !== null && this.#currentHeadless !== desiredHeadless) {
814
- return this.#resetBrowser(params);
815
- }
816
- if (this.#page && !this.#page.isClosed()) {
817
- return this.#page;
818
- }
819
- if (!this.#browser?.isConnected()) {
820
- return this.#resetBrowser(params);
821
- }
822
- this.#page = await this.#browser.newPage();
823
- await this.#applyStealthPatches(this.#page);
824
- if (this.#currentHeadless || params?.viewport) {
825
- await this.#applyViewport(this.#page, params?.viewport);
140
+ switch (params.action) {
141
+ case "open":
142
+ return await this.#open(name, params, details, timeoutMs, signal);
143
+ case "close":
144
+ return await this.#close(name, params, details, signal);
145
+ case "run":
146
+ return await this.#run(name, params, details, timeoutMs, signal);
147
+ default:
148
+ throw new ToolError(`Unsupported action: ${(params as BrowserParams).action}`);
149
+ }
150
+ } catch (error) {
151
+ if (error instanceof ToolAbortError) throw error;
152
+ if (error instanceof Error && error.name === "AbortError") {
153
+ throw new ToolAbortError();
154
+ }
155
+ throw error;
826
156
  }
827
- return this.#page;
828
157
  }
829
158
 
830
- async #applyViewport(page: Page, viewport?: BrowserParams["viewport"]): Promise<void> {
831
- if (!viewport) {
832
- await page.setViewport(DEFAULT_VIEWPORT);
833
- return;
159
+ async #open(
160
+ name: string,
161
+ params: BrowserParams,
162
+ details: BrowserToolDetails,
163
+ timeoutMs: number,
164
+ signal?: AbortSignal,
165
+ ): Promise<AgentToolResult<BrowserToolDetails>> {
166
+ const kind = resolveBrowserKind(params, this.session);
167
+ details.browser = kind.kind;
168
+
169
+ // If a tab with this name already exists on a different browser kind, fail fast — caller must close first.
170
+ const existing = getTab(name);
171
+ if (existing && !sameBrowserKind(existing.browser.kind, kind)) {
172
+ throw new ToolError(
173
+ `Tab ${JSON.stringify(name)} is bound to a different browser (${describeKind(existing.browser.kind)}). Close it first.`,
174
+ );
834
175
  }
835
- await page.setViewport({
836
- width: viewport.width,
837
- height: viewport.height,
838
- deviceScaleFactor: viewport.device_scale_factor ?? DEFAULT_VIEWPORT.deviceScaleFactor,
839
- });
840
- }
841
176
 
842
- async #clearElementCache(): Promise<void> {
843
- if (this.#elementCache.size === 0) {
844
- this.#elementIdCounter = 0;
845
- return;
846
- }
847
- const handles = Array.from(this.#elementCache.values());
848
- this.#elementCache.clear();
849
- this.#elementIdCounter = 0;
850
- await Promise.all(
851
- handles.map(async handle => {
852
- try {
853
- await handle.dispose();
854
- } catch {
855
- return;
856
- }
177
+ const browser = await untilAborted(signal, () =>
178
+ acquireBrowser(kind, {
179
+ cwd: this.session.cwd,
180
+ viewport: params.viewport
181
+ ? {
182
+ width: params.viewport.width,
183
+ height: params.viewport.height,
184
+ deviceScaleFactor: params.viewport.scale,
185
+ }
186
+ : undefined,
187
+ appArgs: params.app?.args,
188
+ signal,
857
189
  }),
858
190
  );
859
- }
860
191
 
861
- async #resolveCachedHandle(id: number): Promise<ElementHandle> {
862
- const handle = this.#elementCache.get(id);
863
- if (!handle) {
864
- throw new ToolError(`Unknown element_id ${id}. Run observe to refresh the element list.`);
865
- }
866
- try {
867
- const isConnected = (await handle.evaluate(el => el.isConnected)) as boolean;
868
- if (!isConnected) {
869
- await this.#clearElementCache();
870
- throw new ToolError(`Element_id ${id} is stale. Run observe again.`);
871
- }
872
- } catch {
873
- await this.#clearElementCache();
874
- throw new ToolError(`Element_id ${id} is stale. Run observe again.`);
875
- }
876
- return handle;
877
- }
878
-
879
- #isInteractiveNode(node: SerializedAXNode): boolean {
880
- if (INTERACTIVE_AX_ROLES.has(node.role)) return true;
881
- return (
882
- node.checked !== undefined ||
883
- node.pressed !== undefined ||
884
- node.selected !== undefined ||
885
- node.expanded !== undefined ||
886
- node.focused === true
192
+ const result = await untilAborted(signal, () =>
193
+ acquireTab(name, browser, {
194
+ url: params.url,
195
+ waitUntil: params.wait_until,
196
+ viewport: params.viewport
197
+ ? {
198
+ width: params.viewport.width,
199
+ height: params.viewport.height,
200
+ deviceScaleFactor: params.viewport.scale,
201
+ }
202
+ : undefined,
203
+ target: params.app?.target,
204
+ timeoutMs,
205
+ dialogs: params.dialogs,
206
+ signal,
207
+ }),
887
208
  );
888
- }
889
-
890
- async #collectObservationEntries(
891
- node: SerializedAXNode,
892
- entries: ObservationEntry[],
893
- options: { viewportOnly: boolean; includeAll: boolean },
894
- ): Promise<void> {
895
- if (options.includeAll || this.#isInteractiveNode(node)) {
896
- const handle = await node.elementHandle();
897
- if (handle) {
898
- let inViewport = true;
899
- if (options.viewportOnly) {
900
- try {
901
- inViewport = await handle.isIntersectingViewport();
902
- } catch {
903
- inViewport = false;
904
- }
905
- }
906
- if (inViewport) {
907
- const id = ++this.#elementIdCounter;
908
- const states: string[] = [];
909
- if (node.disabled) states.push("disabled");
910
- if (node.checked !== undefined) states.push(`checked=${String(node.checked)}`);
911
- if (node.pressed !== undefined) states.push(`pressed=${String(node.pressed)}`);
912
- if (node.selected !== undefined) states.push(`selected=${String(node.selected)}`);
913
- if (node.expanded !== undefined) states.push(`expanded=${String(node.expanded)}`);
914
- if (node.required) states.push("required");
915
- if (node.readonly) states.push("readonly");
916
- if (node.multiselectable) states.push("multiselectable");
917
- if (node.multiline) states.push("multiline");
918
- if (node.modal) states.push("modal");
919
- if (node.focused) states.push("focused");
920
- this.#elementCache.set(id, handle);
921
- entries.push({
922
- id,
923
- role: node.role,
924
- name: node.name,
925
- value: node.value,
926
- description: node.description,
927
- keyshortcuts: node.keyshortcuts,
928
- states,
929
- });
930
- } else {
931
- await handle.dispose();
932
- }
933
- }
934
- }
935
- for (const child of node.children ?? []) {
936
- await this.#collectObservationEntries(child, entries, options);
937
- }
938
- }
939
-
940
- #formatObservation(observation: Observation): string {
941
- const viewport = `${observation.viewport.width}x${observation.viewport.height}`;
942
- const scroll = `x=${observation.scroll.x} y=${observation.scroll.y} viewport=${observation.scroll.width}x${observation.scroll.height} doc=${observation.scroll.scrollWidth}x${observation.scroll.scrollHeight}`;
209
+ const tab = result.tab;
210
+ const url = tab.info.url;
211
+ const title = tab.info.title ?? "";
212
+ details.url = url;
213
+ details.viewport = tab.info.viewport;
214
+ const verb = result.created ? "Opened" : "Reused";
943
215
  const lines = [
944
- `URL: ${observation.url}`,
945
- observation.title ? `Title: ${observation.title}` : "Title:",
946
- `Viewport: ${viewport}`,
947
- `Scroll: ${scroll}`,
948
- "Elements:",
949
- ];
950
- for (const entry of observation.elements) {
951
- const name = entry.name ? ` "${entry.name}"` : "";
952
- const value = entry.value !== undefined ? ` value=${JSON.stringify(entry.value)}` : "";
953
- const description = entry.description ? ` desc=${JSON.stringify(entry.description)}` : "";
954
- const shortcuts = entry.keyshortcuts ? ` shortcuts=${JSON.stringify(entry.keyshortcuts)}` : "";
955
- const state = entry.states.length ? ` (${entry.states.join(", ")})` : "";
956
- lines.push(`${entry.id}. ${entry.role}${name}${value}${description}${shortcuts}${state}`);
957
- }
958
- return lines.join("\n");
959
- }
960
-
961
- /**
962
- * Restart the browser to apply changes like headless mode.
963
- */
964
- async restartForModeChange(): Promise<void> {
965
- await this.#resetBrowser();
216
+ `${verb} tab ${JSON.stringify(name)} on ${describeBrowser(browser)}`,
217
+ `URL: ${url}`,
218
+ title ? `Title: ${title}` : null,
219
+ ].filter((l): l is string => typeof l === "string");
220
+ details.result = lines.join("\n");
221
+ return toolResult(details).text(lines.join("\n")).done();
966
222
  }
967
223
 
968
- async #applyStealthPatches(page: Page): Promise<void> {
969
- this.#patchSourceUrl(page);
970
- await this.#applyUserAgentOverride(page);
971
- await this.#injectStealthScripts(page);
972
- }
973
-
974
- async #applyUserAgentOverride(page: Page): Promise<void> {
975
- const client = resolvePageClient(page);
976
- if (!client) return;
977
- const override = await this.#resolveUserAgentOverride(page);
978
- await this.#sendUserAgentOverride(client, override);
979
- await this.#configureUserAgentTargets(override);
980
- }
981
-
982
- async #resolveUserAgentOverride(page: Page): Promise<UserAgentOverride> {
983
- if (this.#userAgentOverride) return this.#userAgentOverride;
984
- const rawUserAgent = await page.browser().userAgent();
985
- let userAgent = rawUserAgent.replace("HeadlessChrome/", "Chrome/");
986
- if (userAgent.includes("Linux") && !userAgent.includes("Android")) {
987
- userAgent = userAgent.replace(/\(([^)]+)\)/, "(Windows NT 10.0; Win64; x64)");
224
+ async #close(
225
+ name: string,
226
+ params: BrowserParams,
227
+ details: BrowserToolDetails,
228
+ signal?: AbortSignal,
229
+ ): Promise<AgentToolResult<BrowserToolDetails>> {
230
+ const kill = !!params.kill;
231
+ if (params.all) {
232
+ const count = await untilAborted(signal, () => releaseAllTabs({ kill }));
233
+ details.result = `Closed ${count} tab(s)`;
234
+ return toolResult(details).text(details.result).done();
988
235
  }
989
-
990
- const uaVersionMatch = userAgent.match(/Chrome\/([\d|.]+)/);
991
- const fallbackVersionMatch = uaVersionMatch ?? (await page.browser().version()).match(/\/([\d|.]+)/);
992
- const uaVersion = fallbackVersionMatch?.[1] ?? "0";
993
- const majorVersion = Number.parseInt(uaVersion.split(".")[0] ?? "0", 10) || 0;
994
- const isAndroid = userAgent.includes("Android");
995
- const platform = userAgent.includes("Mac OS X")
996
- ? "MacIntel"
997
- : isAndroid
998
- ? "Android"
999
- : userAgent.includes("Linux")
1000
- ? "Linux"
1001
- : "Win32";
1002
- const platformFull = userAgent.includes("Mac OS X")
1003
- ? "Mac OS X"
1004
- : isAndroid
1005
- ? "Android"
1006
- : userAgent.includes("Linux")
1007
- ? "Linux"
1008
- : "Windows";
1009
- const platformVersion = userAgent.includes("Mac OS X ")
1010
- ? (userAgent.match(/Mac OS X ([^)]+)/)?.[1] ?? "")
1011
- : userAgent.includes("Android ")
1012
- ? (userAgent.match(/Android ([^;]+)/)?.[1] ?? "")
1013
- : userAgent.includes("Windows ")
1014
- ? (userAgent.match(/Windows .*?([\d|.]+);?/)?.[1] ?? "")
1015
- : "";
1016
- const architecture = isAndroid ? "" : "x86";
1017
- const model = isAndroid ? (userAgent.match(/Android.*?;\s([^)]+)/)?.[1] ?? "") : "";
1018
-
1019
- const brandOrders = [
1020
- [0, 1, 2],
1021
- [0, 2, 1],
1022
- [1, 0, 2],
1023
- [1, 2, 0],
1024
- [2, 0, 1],
1025
- [2, 1, 0],
1026
- ];
1027
- const order = brandOrders[majorVersion % brandOrders.length] ?? brandOrders[0];
1028
- const escapedChars = [" ", " ", ";"];
1029
- const greaseyBrand = `${escapedChars[order[0]]}Not${escapedChars[order[1]]}A${escapedChars[order[2]]}Brand`;
1030
- const brands: { brand: string; version: string }[] = [];
1031
- brands[order[0]] = { brand: greaseyBrand, version: "99" };
1032
- brands[order[1]] = { brand: "Chromium", version: String(majorVersion) };
1033
- brands[order[2]] = { brand: "Google Chrome", version: String(majorVersion) };
1034
-
1035
- this.#userAgentOverride = {
1036
- userAgent,
1037
- platform,
1038
- acceptLanguage: STEALTH_ACCEPT_LANGUAGE,
1039
- userAgentMetadata: {
1040
- brands,
1041
- fullVersion: uaVersion,
1042
- platform: platformFull,
1043
- platformVersion,
1044
- architecture,
1045
- model,
1046
- mobile: isAndroid,
1047
- },
1048
- };
1049
- return this.#userAgentOverride;
236
+ const closed = await untilAborted(signal, () => releaseTab(name, { kill }));
237
+ details.result = closed ? `Closed tab ${JSON.stringify(name)}` : `No tab named ${JSON.stringify(name)}`;
238
+ return toolResult(details).text(details.result).done();
1050
239
  }
1051
240
 
1052
- async #configureUserAgentTargets(override: UserAgentOverride): Promise<void> {
1053
- if (!this.#browser) return;
1054
- if (!this.#browserSession) {
1055
- this.#browserSession = await this.#browser.target().createCDPSession();
1056
- await this.#browserSession.send("Target.setAutoAttach", {
1057
- autoAttach: true,
1058
- waitForDebuggerOnStart: false,
1059
- flatten: true,
1060
- });
1061
- this.#browserSession.on("Target.attachedToTarget", async (event: { sessionId: string }) => {
1062
- const connection = this.#browserSession?.connection();
1063
- const session = connection?.session(event.sessionId);
1064
- if (!session || !this.#userAgentOverride) return;
1065
- await this.#sendUserAgentOverride(this.#wrapSession(session), this.#userAgentOverride);
1066
- });
241
+ async #run(
242
+ name: string,
243
+ params: BrowserParams,
244
+ details: BrowserToolDetails,
245
+ timeoutMs: number,
246
+ signal?: AbortSignal,
247
+ ): Promise<AgentToolResult<BrowserToolDetails>> {
248
+ if (!params.code?.trim()) {
249
+ throw new ToolError("Missing required parameter 'code' for action 'run'.");
250
+ }
251
+ const tab = getTab(name);
252
+ if (tab) {
253
+ details.browser = tab.browser.kind.kind;
254
+ details.url = tab.info.url;
1067
255
  }
1068
256
 
1069
- const targets = this.#browser.targets();
1070
- await Promise.all(
1071
- targets.map(async target => {
1072
- const session = await target.createCDPSession();
1073
- await this.#sendUserAgentOverride(this.#wrapSession(session), override);
1074
- }),
1075
- );
1076
- }
257
+ const { displays, returnValue, screenshots } = await runInTab(name, {
258
+ code: params.code,
259
+ timeoutMs,
260
+ signal,
261
+ session: this.session,
262
+ });
1077
263
 
1078
- #wrapSession(session: CDPSession): PuppeteerCdpClient {
1079
- return {
1080
- send: async (method, params) => session.send(method as never, params as never),
1081
- };
1082
- }
264
+ if (screenshots.length) details.screenshots = screenshots;
1083
265
 
1084
- async #sendUserAgentOverride(client: PuppeteerCdpClient, override: UserAgentOverride): Promise<void> {
1085
- try {
1086
- await client.send("Network.enable");
1087
- } catch {}
1088
- try {
1089
- await client.send("Network.setUserAgentOverride", override);
1090
- } catch (error) {
1091
- logger.debug("Failed to apply Network user agent override", {
1092
- error: error instanceof Error ? error.message : String(error),
1093
- });
266
+ const content = [...displays];
267
+ if (returnValue !== undefined) {
268
+ content.push({ type: "text", text: stringifyReturnValue(returnValue) });
1094
269
  }
1095
- try {
1096
- await client.send("Emulation.setUserAgentOverride", override);
1097
- } catch (error) {
1098
- logger.debug("Failed to apply Emulation user agent override", {
1099
- error: error instanceof Error ? error.message : String(error),
1100
- });
270
+ if (!content.length) {
271
+ content.push({ type: "text", text: `Ran code on tab ${JSON.stringify(name)}` });
1101
272
  }
273
+ const textOnly = content
274
+ .filter((c): c is { type: "text"; text: string } => c.type === "text")
275
+ .map(c => c.text)
276
+ .join("\n");
277
+ details.result = textOnly;
278
+ return toolResult(details).content(content).done();
1102
279
  }
280
+ }
1103
281
 
1104
- #patchSourceUrl(page: Page): void {
1105
- const client = resolvePageClient(page);
1106
- if (!client) return;
1107
- const clientKey = client as object;
1108
- if (this.#patchedClients.has(clientKey)) return;
1109
- this.#patchedClients.add(clientKey);
1110
- const originalSend = client.send.bind(client);
1111
- client.send = async (method: string, params?: Record<string, unknown>) => {
1112
- const next = async (payload?: Record<string, unknown>) => {
1113
- try {
1114
- return await originalSend(method, payload);
1115
- } catch (error) {
1116
- if (
1117
- error instanceof Error &&
1118
- error.message.includes(
1119
- "Protocol error (Network.getResponseBody): No resource with given identifier found",
1120
- )
1121
- ) {
1122
- return undefined;
1123
- }
1124
- throw error;
1125
- }
1126
- };
1127
- if (!method || !params) {
1128
- return next(params);
1129
- }
1130
- const key =
1131
- method === "Runtime.evaluate"
1132
- ? "expression"
1133
- : method === "Runtime.callFunctionOn"
1134
- ? "functionDeclaration"
1135
- : null;
1136
- if (!key) {
1137
- return next(params);
1138
- }
1139
- const value = params[key];
1140
- if (typeof value !== "string" || !value.includes(PUPPETEER_SOURCE_URL_SUFFIX)) {
1141
- return next(params);
1142
- }
1143
- const patchedParams = { ...params, [key]: value.replace(PUPPETEER_SOURCE_URL_SUFFIX, "") };
1144
- return next(patchedParams);
1145
- };
282
+ function describeBrowser(handle: BrowserHandle): string {
283
+ switch (handle.kind.kind) {
284
+ case "headless":
285
+ return `headless browser (${handle.kind.headless ? "hidden" : "visible"})`;
286
+ case "spawned":
287
+ return `spawned ${handle.kind.path} (pid ${handle.pid ?? "?"})`;
288
+ case "connected":
289
+ return `connected ${handle.cdpUrl ?? handle.kind.cdpUrl}`;
1146
290
  }
291
+ }
1147
292
 
1148
- /** Injects stealth scripts that cover common puppeteer detection surfaces. */
1149
- async #injectStealthScripts(page: Page): Promise<void> {
1150
- const scripts = [
1151
- stealthTamperingScript,
1152
- stealthActivityScript,
1153
- stealthHairlineScript,
1154
- stealthBotdScript,
1155
- stealthIframeScript,
1156
- stealthWebglScript,
1157
- stealthScreenScript,
1158
- stealthFontsScript,
1159
- stealthAudioScript,
1160
- stealthLocaleScript,
1161
- stealthPluginsScript,
1162
- stealthHardwareScript,
1163
- stealthCodecsScript,
1164
- stealthWorkerScript,
1165
- ];
1166
-
1167
- const joint = scripts
1168
- .map(
1169
- script => `
1170
- try {
1171
- ${script};
1172
- } catch (e) {}
1173
- `,
1174
- )
1175
- .join(";\n");
1176
-
1177
- await page.evaluateOnNewDocument(`(() => {
1178
- // Native function cache - captured before any tampering
1179
- const iframe = document.createElement("iframe");
1180
- iframe.style.display = "none";
1181
- document.head.appendChild(iframe);
1182
- const nativeWindow = iframe.contentWindow;
1183
- if (!nativeWindow) return;
1184
-
1185
- // Cache pristine native functions
1186
- const Function_toString = nativeWindow.Function.prototype.toString;
1187
- const Object_getOwnPropertyDescriptor = nativeWindow.Object.getOwnPropertyDescriptor;
1188
- const Object_getOwnPropertyDescriptors = nativeWindow.Object.getOwnPropertyDescriptors;
1189
- const Object_getPrototypeOf = nativeWindow.Object.getPrototypeOf;
1190
- const Object_defineProperty = nativeWindow.Object.defineProperty;
1191
- const Object_getOwnPropertyDescriptorOriginal = nativeWindow.Object.getOwnPropertyDescriptor;
1192
- const Object_create = nativeWindow.Object.create;
1193
- const Object_keys = nativeWindow.Object.keys;
1194
- const Object_getOwnPropertyNames = nativeWindow.Object.getOwnPropertyNames;
1195
- const Object_entries = nativeWindow.Object.entries;
1196
- const Object_setPrototypeOf = nativeWindow.Object.setPrototypeOf;
1197
- const Object_assign = nativeWindow.Object.assign;
1198
- const Window_setTimeout = nativeWindow.setTimeout;
1199
- const Math_random = nativeWindow.Math.random;
1200
- const Math_floor = nativeWindow.Math.floor;
1201
- const Math_max = nativeWindow.Math.max;
1202
- const Math_min = nativeWindow.Math.min;
1203
- const Window_Event = nativeWindow.Event;
1204
- const Promise_resolve = nativeWindow.Promise.resolve.bind(nativeWindow.Promise);
1205
- const Window_Blob = nativeWindow.Blob;
1206
- const Window_Proxy = nativeWindow.Proxy;
1207
- const Intl_DateTimeFormat = nativeWindow.Intl.DateTimeFormat;
1208
- const Date_constructor = nativeWindow.Date;
1209
-
1210
-
1211
- ${joint}
1212
-
1213
- document.head.removeChild(iframe);})();`);
293
+ function describeKind(kind: BrowserKind): string {
294
+ switch (kind.kind) {
295
+ case "headless":
296
+ return `headless ${kind.headless ? "hidden" : "visible"}`;
297
+ case "spawned":
298
+ return `spawned:${kind.path}`;
299
+ case "connected":
300
+ return `connected:${kind.cdpUrl}`;
1214
301
  }
302
+ }
1215
303
 
1216
- async execute(
1217
- _toolCallId: string,
1218
- params: BrowserParams,
1219
- signal?: AbortSignal,
1220
- _onUpdate?: AgentToolUpdateCallback<BrowserToolDetails>,
1221
- _ctx?: AgentToolContext,
1222
- ): Promise<AgentToolResult<BrowserToolDetails>> {
1223
- try {
1224
- throwIfAborted(signal);
1225
- const timeoutSeconds = clampTimeout("browser", params.timeout);
1226
- const timeoutMs = timeoutSeconds * 1000;
1227
- const details: BrowserToolDetails = { action: params.action };
1228
-
1229
- switch (params.action) {
1230
- case "open": {
1231
- const page = await untilAborted(signal, () => this.#resetBrowser(params));
1232
- const viewport = page.viewport();
1233
- details.viewport = viewport ?? DEFAULT_VIEWPORT;
1234
- return toolResult(details).text("Opened headless browser session").done();
1235
- }
1236
- case "close": {
1237
- await untilAborted(signal, () => this.#closeBrowser());
1238
- return toolResult(details).text("Closed headless browser session").done();
1239
- }
1240
- case "goto": {
1241
- const url = ensureParam(params.url, "url", params.action);
1242
- details.url = url;
1243
- const page = await this.#ensurePage(params);
1244
- const waitUntil = params.wait_until ?? "networkidle2";
1245
- await this.#clearElementCache();
1246
- await untilAborted(signal, () => page.goto(url, { waitUntil, timeout: timeoutMs }));
1247
- const finalUrl = page.url();
1248
- const title = (await untilAborted(signal, () => page.title())) as string;
1249
- details.url = finalUrl;
1250
- details.result = title;
1251
- return toolResult(details)
1252
- .text(`Navigated to ${finalUrl}${title ? `\nTitle: ${title}` : ""}`)
1253
- .done();
1254
- }
1255
- case "observe": {
1256
- const page = await this.#ensurePage(params);
1257
- const timeoutSignal = AbortSignal.timeout(timeoutMs);
1258
- const observeSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
1259
- await this.#clearElementCache();
1260
- const snapshot = (await untilAborted(observeSignal, () =>
1261
- page.accessibility.snapshot({ interestingOnly: !(params.include_all ?? false) }),
1262
- )) as SerializedAXNode | null;
1263
- if (!snapshot) {
1264
- throw new ToolError("Accessibility snapshot unavailable");
1265
- }
1266
- const entries: ObservationEntry[] = [];
1267
- await this.#collectObservationEntries(snapshot, entries, {
1268
- viewportOnly: params.viewport_only ?? false,
1269
- includeAll: params.include_all ?? false,
1270
- });
1271
- const scroll = (await untilAborted(observeSignal, () =>
1272
- page.evaluate(() => {
1273
- const win = globalThis as unknown as {
1274
- scrollX: number;
1275
- scrollY: number;
1276
- innerWidth: number;
1277
- innerHeight: number;
1278
- document: { documentElement: { scrollWidth: number; scrollHeight: number } };
1279
- };
1280
- const doc = win.document.documentElement;
1281
- return {
1282
- x: win.scrollX,
1283
- y: win.scrollY,
1284
- width: win.innerWidth,
1285
- height: win.innerHeight,
1286
- scrollWidth: doc.scrollWidth,
1287
- scrollHeight: doc.scrollHeight,
1288
- };
1289
- }),
1290
- )) as Observation["scroll"];
1291
- const url = page.url();
1292
- const title = (await untilAborted(observeSignal, () => page.title())) as string;
1293
- const viewport = page.viewport() ?? DEFAULT_VIEWPORT;
1294
- const observation: Observation = {
1295
- url,
1296
- title,
1297
- viewport,
1298
- scroll,
1299
- elements: entries,
1300
- };
1301
- details.url = url;
1302
- details.viewport = viewport;
1303
- details.observation = observation;
1304
- details.result = `${entries.length} elements`;
1305
- return toolResult(details).text(this.#formatObservation(observation)).done();
1306
- }
1307
- case "click": {
1308
- const selector = ensureParam(params.selector, "selector", params.action);
1309
- details.selector = selector;
1310
- const page = await this.#ensurePage(params);
1311
- const resolvedSelector = normalizeSelector(selector);
1312
- if (resolvedSelector.startsWith("text/")) {
1313
- await clickQueryHandlerText(page, resolvedSelector, timeoutMs, signal);
1314
- } else {
1315
- const locator = page.locator(resolvedSelector).setTimeout(timeoutMs);
1316
- await untilAborted(signal, () => locator.click());
1317
- }
1318
- return toolResult(details).text(`Clicked ${selector}`).done();
1319
- }
1320
- case "click_id": {
1321
- const elementId = ensureParam(params.element_id, "element_id", params.action);
1322
- details.elementId = elementId;
1323
- const handle = await this.#resolveCachedHandle(elementId);
1324
- try {
1325
- await untilAborted(signal, () => handle.click());
1326
- } catch {
1327
- await this.#clearElementCache();
1328
- throw new ToolError(`Element_id ${elementId} is stale. Run observe again.`);
1329
- }
1330
- return toolResult(details).text(`Clicked element ${elementId}`).done();
1331
- }
1332
- case "type": {
1333
- const selector = ensureParam(params.selector, "selector", params.action);
1334
- const text = ensureParam(params.text, "text", params.action);
1335
- details.selector = selector;
1336
- const page = await this.#ensurePage(params);
1337
- const resolvedSelector = normalizeSelector(selector);
1338
- const locator = page.locator(resolvedSelector).setTimeout(timeoutMs);
1339
- const handle = (await untilAborted(signal, () => locator.waitHandle())) as ElementHandle;
1340
- await untilAborted(signal, () => handle.type(text, { delay: 0 }));
1341
- await handle.dispose();
1342
- return toolResult(details).text(`Typed into ${selector}`).done();
1343
- }
1344
- case "type_id": {
1345
- const elementId = ensureParam(params.element_id, "element_id", params.action);
1346
- const text = ensureParam(params.text, "text", params.action);
1347
- details.elementId = elementId;
1348
- const page = await this.#ensurePage(params);
1349
- const handle = await this.#resolveCachedHandle(elementId);
1350
- try {
1351
- await untilAborted(signal, () => handle.focus());
1352
- await untilAborted(signal, () => page.keyboard.type(text, { delay: 0 }));
1353
- } catch {
1354
- await this.#clearElementCache();
1355
- throw new ToolError(`Element_id ${elementId} is stale. Run observe again.`);
1356
- }
1357
- return toolResult(details).text(`Typed into element ${elementId}`).done();
1358
- }
1359
- case "fill": {
1360
- const selector = ensureParam(params.selector, "selector", params.action);
1361
- const value = ensureParam(params.value, "value", params.action);
1362
- details.selector = selector;
1363
- const page = await this.#ensurePage(params);
1364
- const resolvedSelector = normalizeSelector(selector);
1365
- const locator = page.locator(resolvedSelector).setTimeout(timeoutMs);
1366
- await untilAborted(signal, () => locator.fill(value));
1367
- return toolResult(details).text(`Filled ${selector}`).done();
1368
- }
1369
- case "fill_id": {
1370
- const elementId = ensureParam(params.element_id, "element_id", params.action);
1371
- const value = ensureParam(params.value, "value", params.action);
1372
- details.elementId = elementId;
1373
- const handle = await this.#resolveCachedHandle(elementId);
1374
- try {
1375
- await untilAborted(signal, () =>
1376
- handle.evaluate((el, inputValue) => {
1377
- const element = el as { value?: string; dispatchEvent: (event: Event) => boolean };
1378
- if (!("value" in element)) {
1379
- throw new Error("Target element is not a form input");
1380
- }
1381
- element.value = String(inputValue);
1382
- element.dispatchEvent(new Event("input", { bubbles: true }));
1383
- element.dispatchEvent(new Event("change", { bubbles: true }));
1384
- }, value),
1385
- );
1386
- } catch {
1387
- await this.#clearElementCache();
1388
- throw new ToolError(`Element_id ${elementId} is stale. Run observe again.`);
1389
- }
1390
- return toolResult(details).text(`Filled element ${elementId}`).done();
1391
- }
1392
- case "press": {
1393
- const key = ensureParam(params.key, "key", params.action) as KeyInput;
1394
- const page = await this.#ensurePage(params);
1395
- if (params.selector) {
1396
- const resolvedSelector = normalizeSelector(params.selector as string);
1397
- await untilAborted(signal, () => page.focus(resolvedSelector));
1398
- }
1399
- await untilAborted(signal, () => page.keyboard.press(key));
1400
- return toolResult(details).text(`Pressed ${key}`).done();
1401
- }
1402
- case "scroll": {
1403
- const deltaY = ensureParam(params.delta_y, "delta_y", params.action);
1404
- const deltaX = params.delta_x ?? 0;
1405
- const page = await this.#ensurePage(params);
1406
- await untilAborted(signal, () => page.mouse.wheel({ deltaX, deltaY }));
1407
- return toolResult(details).text(`Scrolled by ${deltaX}, ${deltaY}`).done();
1408
- }
1409
- case "drag": {
1410
- const fromSelector = ensureParam(params.from_selector, "from_selector", params.action);
1411
- const toSelector = ensureParam(params.to_selector, "to_selector", params.action);
1412
- const page = await this.#ensurePage(params);
1413
- const resolvedFromSelector = normalizeSelector(fromSelector);
1414
- const resolvedToSelector = normalizeSelector(toSelector);
1415
- const fromHandle = (await untilAborted(signal, () =>
1416
- page.$(resolvedFromSelector),
1417
- )) as ElementHandle | null;
1418
- const toHandle = (await untilAborted(signal, () => page.$(resolvedToSelector))) as ElementHandle | null;
1419
- if (!fromHandle || !toHandle) {
1420
- throw new ToolError("Drag selectors did not resolve to elements");
1421
- }
1422
- const fromBox = (await untilAborted(signal, () => fromHandle.boundingBox())) as {
1423
- x: number;
1424
- y: number;
1425
- width: number;
1426
- height: number;
1427
- } | null;
1428
- const toBox = (await untilAborted(signal, () => toHandle.boundingBox())) as {
1429
- x: number;
1430
- y: number;
1431
- width: number;
1432
- height: number;
1433
- } | null;
1434
- await fromHandle.dispose();
1435
- await toHandle.dispose();
1436
- if (!fromBox || !toBox) {
1437
- throw new ToolError("Drag elements are not visible");
1438
- }
1439
- const startX = fromBox.x + fromBox.width / 2;
1440
- const startY = fromBox.y + fromBox.height / 2;
1441
- const endX = toBox.x + toBox.width / 2;
1442
- const endY = toBox.y + toBox.height / 2;
1443
- await untilAborted(signal, () => page.mouse.move(startX, startY));
1444
- await untilAborted(signal, () => page.mouse.down());
1445
- await untilAborted(signal, () => page.mouse.move(endX, endY, { steps: 12 }));
1446
- await untilAborted(signal, () => page.mouse.up());
1447
- return toolResult(details).text(`Dragged from ${fromSelector} to ${toSelector}`).done();
1448
- }
1449
- case "wait_for_selector": {
1450
- const selector = ensureParam(params.selector, "selector", params.action);
1451
- details.selector = selector;
1452
- const page = await this.#ensurePage(params);
1453
- const resolvedSelector = normalizeSelector(selector);
1454
- const locator = page.locator(resolvedSelector).setTimeout(timeoutMs);
1455
- await untilAborted(signal, () => locator.wait());
1456
- return toolResult(details).text(`Selector ready: ${selector}`).done();
1457
- }
1458
- case "evaluate": {
1459
- const script = ensureParam(params.script, "script", params.action);
1460
- const page = await this.#ensurePage(params);
1461
- const value = (await untilAborted(signal, () =>
1462
- page.evaluate(async (source: string) => {
1463
- try {
1464
- return await new Function(`return (async () => (${source}))();`)();
1465
- } catch {
1466
- return await new Function(`return (async () => { ${source} })();`)();
1467
- }
1468
- }, script),
1469
- )) as unknown;
1470
- const output = formatEvaluateResult(value);
1471
- details.result = output;
1472
- return toolResult(details).text(output).done();
1473
- }
1474
- case "get_text": {
1475
- const page = await this.#ensurePage(params);
1476
- if (params.args?.length) {
1477
- const values = (await Promise.all(
1478
- params.args.map((arg, index) => {
1479
- const selector = ensureParam(arg.selector, `args[${index}].selector`, params.action);
1480
- const resolvedSelector = normalizeSelector(selector);
1481
- return untilAborted(signal, () =>
1482
- page.$eval(resolvedSelector, (el: Element) => (el as HTMLElement).innerText),
1483
- );
1484
- }),
1485
- )) as string[];
1486
- details.result = values;
1487
- return toolResult(details)
1488
- .text(JSON.stringify(values, null, 2))
1489
- .done();
1490
- }
1491
- const selector = ensureParam(params.selector, "selector", params.action);
1492
- details.selector = selector;
1493
- const resolvedSelector = normalizeSelector(selector);
1494
- const value = (await untilAborted(signal, () =>
1495
- page.$eval(resolvedSelector, (el: Element) => (el as HTMLElement).innerText),
1496
- )) as string;
1497
- details.result = value;
1498
- return toolResult(details).text(value).done();
1499
- }
1500
- case "get_html": {
1501
- const page = await this.#ensurePage(params);
1502
- if (params.args?.length) {
1503
- const values = (await Promise.all(
1504
- params.args.map((arg, index) => {
1505
- const selector = ensureParam(arg.selector, `args[${index}].selector`, params.action);
1506
- const resolvedSelector = normalizeSelector(selector);
1507
- return untilAborted(signal, () =>
1508
- page.$eval(resolvedSelector, (el: Element) => (el as HTMLElement).innerHTML),
1509
- );
1510
- }),
1511
- )) as string[];
1512
- details.result = values;
1513
- return toolResult(details)
1514
- .text(JSON.stringify(values, null, 2))
1515
- .done();
1516
- }
1517
- const selector = ensureParam(params.selector, "selector", params.action);
1518
- details.selector = selector;
1519
- const resolvedSelector = normalizeSelector(selector);
1520
- const value = (await untilAborted(signal, () =>
1521
- page.$eval(resolvedSelector, (el: Element) => (el as HTMLElement).innerHTML),
1522
- )) as string;
1523
- details.result = value;
1524
- return toolResult(details).text(value).done();
1525
- }
1526
- case "get_attribute": {
1527
- const page = await this.#ensurePage(params);
1528
- if (params.args?.length) {
1529
- const values = (await Promise.all(
1530
- params.args.map((arg, index) => {
1531
- const selector = ensureParam(arg.selector, `args[${index}].selector`, params.action);
1532
- const attribute = ensureParam(arg.attribute, `args[${index}].attribute`, params.action);
1533
- const resolvedSelector = normalizeSelector(selector);
1534
- return untilAborted(signal, () =>
1535
- page.$eval(
1536
- resolvedSelector,
1537
- (el: Element, attr: string) => (el as HTMLElement).getAttribute(String(attr)),
1538
- attribute,
1539
- ),
1540
- );
1541
- }),
1542
- )) as string[];
1543
- details.result = values;
1544
- return toolResult(details)
1545
- .text(JSON.stringify(values, null, 2))
1546
- .done();
1547
- }
1548
- const selector = ensureParam(params.selector, "selector", params.action);
1549
- const attribute = ensureParam(params.attribute, "attribute", params.action);
1550
- details.selector = selector;
1551
- const resolvedSelector = normalizeSelector(selector);
1552
- const value = (await untilAborted(signal, () =>
1553
- page.$eval(
1554
- resolvedSelector,
1555
- (el: { getAttribute: (name: string) => string | null }, attr: string) =>
1556
- el.getAttribute(String(attr)),
1557
- attribute,
1558
- ),
1559
- )) as string | null;
1560
- const output = value ?? "";
1561
- details.result = output;
1562
- return toolResult(details).text(output).done();
1563
- }
1564
- case "extract_readable": {
1565
- const page = await this.#ensurePage(params);
1566
- const format = params.format ?? "markdown";
1567
- const html = (await untilAborted(signal, () => page.content())) as string;
1568
- const url = page.url();
1569
- const readable = extractReadableFromHtml(html, url, format);
1570
- if (!readable) {
1571
- throw new ToolError("Readable content not found");
1572
- }
1573
- details.url = url;
1574
- details.readable = readable;
1575
- details.result = format === "markdown" ? (readable.markdown ?? "") : (readable.text ?? "");
1576
- return toolResult(details)
1577
- .text(JSON.stringify(readable, null, 2))
1578
- .done();
1579
- }
1580
- case "screenshot": {
1581
- const page = await this.#ensurePage(params);
1582
- const fullPage = params.selector ? false : (params.full_page ?? false);
1583
- let buffer: Buffer;
1584
-
1585
- if (params.selector) {
1586
- const resolvedSelector = normalizeSelector(params.selector as string);
1587
- const handle = (await untilAborted(signal, () => page.$(resolvedSelector))) as ElementHandle | null;
1588
- if (!handle) {
1589
- throw new ToolError("Screenshot selector did not resolve to an element");
1590
- }
1591
- buffer = (await untilAborted(signal, () => handle.screenshot({ type: "png" }))) as Buffer;
1592
- await handle.dispose();
1593
- details.selector = params.selector;
1594
- } else {
1595
- buffer = (await untilAborted(signal, () => page.screenshot({ type: "png", fullPage }))) as Buffer;
1596
- }
1597
-
1598
- // Compress aggressively for API content — screenshots are the most
1599
- // frequent image source and land directly in the next LLM request.
1600
- // 1024px is plenty for OCR/UI inspection; 150KB keeps payloads lean.
1601
- const resized = await resizeImage(
1602
- { type: "image", data: buffer.toBase64(), mimeType: "image/png" },
1603
- { maxWidth: 1024, maxHeight: 1024, maxBytes: 150 * 1024, jpegQuality: 70 },
1604
- );
1605
- // Resolve destination: user-defined path > screenshotDir (auto-named) > temp file.
1606
- const screenshotDir = (() => {
1607
- const v = this.session.settings.get("browser.screenshotDir") as string | undefined;
1608
- return v ? expandPath(v) : undefined;
1609
- })();
1610
- const paramPath = params.path ? resolveToCwd(params.path as string, this.session.cwd) : undefined;
1611
- let dest: string;
1612
- if (paramPath) {
1613
- dest = paramPath;
1614
- } else if (screenshotDir) {
1615
- const ts = new Date().toISOString().replace(/[:.]/g, "-").slice(0, -1);
1616
- dest = path.join(screenshotDir, `screenshot-${ts}.png`);
1617
- } else {
1618
- dest = path.join(os.tmpdir(), `omp-sshots-${Snowflake.next()}.png`);
1619
- }
1620
- await fs.promises.mkdir(path.dirname(dest), { recursive: true });
1621
- // Full-res buffer when saving to a user-defined location; resized (API copy) for temp-only.
1622
- const saveFullRes = !!(paramPath || screenshotDir);
1623
- const savedBuffer = saveFullRes ? buffer : resized.buffer;
1624
- const savedMimeType = saveFullRes ? "image/png" : resized.mimeType;
1625
- await Bun.write(dest, savedBuffer);
1626
- details.screenshotPath = dest;
1627
- details.mimeType = savedMimeType;
1628
- details.bytes = savedBuffer.length;
304
+ function sameBrowserKind(a: BrowserKind, b: BrowserKind): boolean {
305
+ if (a.kind !== b.kind) return false;
306
+ if (a.kind === "headless" && b.kind === "headless") return a.headless === b.headless;
307
+ if (a.kind === "spawned" && b.kind === "spawned") return a.path === b.path;
308
+ if (a.kind === "connected" && b.kind === "connected") return a.cdpUrl === b.cdpUrl;
309
+ return false;
310
+ }
1629
311
 
1630
- const lines = formatScreenshot({
1631
- saveFullRes,
1632
- savedMimeType,
1633
- savedByteLength: savedBuffer.length,
1634
- dest,
1635
- resized,
1636
- });
1637
- return toolResult(details)
1638
- .content([
1639
- { type: "text", text: lines.join("\n") },
1640
- { type: "image", data: resized.data, mimeType: resized.mimeType },
1641
- ])
1642
- .done();
1643
- }
1644
- default:
1645
- throw new ToolError(`Unsupported action: ${params.action}`);
1646
- }
1647
- } catch (error) {
1648
- if (error instanceof ToolAbortError) throw error;
1649
- if (error instanceof Error && error.name === "AbortError") {
1650
- throw new ToolAbortError();
1651
- }
1652
- throw error;
1653
- }
312
+ function stringifyReturnValue(value: unknown): string {
313
+ if (typeof value === "string") return value;
314
+ try {
315
+ return JSON.stringify(value, null, 2) ?? String(value);
316
+ } catch {
317
+ return String(value);
1654
318
  }
1655
319
  }