@oh-my-pi/pi-coding-agent 14.5.10 → 14.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,792 @@
1
+ import * as fs from "node:fs";
2
+ import * as os from "node:os";
3
+ import * as path from "node:path";
4
+ import type { ImageContent, TextContent } from "@oh-my-pi/pi-ai";
5
+ import { Snowflake, untilAborted } from "@oh-my-pi/pi-utils";
6
+ import type { HTMLElement } from "linkedom";
7
+ import type { ElementHandle, HTTPResponse, KeyInput, Page, SerializedAXNode } from "puppeteer-core";
8
+ import type { ToolSession } from "../../sdk";
9
+ import { resizeImage } from "../../utils/image-resize";
10
+ import { expandPath, resolveToCwd } from "../path-utils";
11
+ import { formatScreenshot } from "../render-utils";
12
+ import { ToolError, throwIfAborted } from "../tool-errors";
13
+ import { DEFAULT_VIEWPORT } from "./launch";
14
+ import { extractReadableFromHtml, type ReadableFormat, type ReadableResult } from "./readable";
15
+ import { clearElementCache, resolveCachedHandle, type TabHandle } from "./registry";
16
+
17
+ declare global {
18
+ interface Element extends HTMLElement {}
19
+ function getComputedStyle(element: Element): Record<string, unknown>;
20
+ var innerWidth: number;
21
+ var innerHeight: number;
22
+ var document: {
23
+ elementFromPoint(x: number, y: number): Element | null;
24
+ };
25
+ }
26
+
27
+ export interface ObservationEntry {
28
+ id: number;
29
+ role: string;
30
+ name?: string;
31
+ value?: string | number;
32
+ description?: string;
33
+ keyshortcuts?: string;
34
+ states: string[];
35
+ }
36
+
37
+ export interface Observation {
38
+ url: string;
39
+ title?: string;
40
+ viewport: { width: number; height: number; deviceScaleFactor?: number };
41
+ scroll: {
42
+ x: number;
43
+ y: number;
44
+ width: number;
45
+ height: number;
46
+ scrollWidth: number;
47
+ scrollHeight: number;
48
+ };
49
+ elements: ObservationEntry[];
50
+ }
51
+
52
+ const INTERACTIVE_AX_ROLES = new Set([
53
+ "button",
54
+ "link",
55
+ "textbox",
56
+ "combobox",
57
+ "listbox",
58
+ "option",
59
+ "checkbox",
60
+ "radio",
61
+ "switch",
62
+ "tab",
63
+ "menuitem",
64
+ "menuitemcheckbox",
65
+ "menuitemradio",
66
+ "slider",
67
+ "spinbutton",
68
+ "searchbox",
69
+ "treeitem",
70
+ ]);
71
+
72
+ const LEGACY_SELECTOR_PREFIXES = ["p-aria/", "p-text/", "p-xpath/", "p-pierce/"] as const;
73
+
74
+ function normalizeSelector(selector: string): string {
75
+ if (!selector) return selector;
76
+ if (selector.startsWith("p-") && !LEGACY_SELECTOR_PREFIXES.some(prefix => selector.startsWith(prefix))) {
77
+ throw new ToolError(
78
+ `Unsupported selector prefix. Use CSS or puppeteer query handlers (aria/, text/, xpath/, pierce/). Got: ${selector}`,
79
+ );
80
+ }
81
+ if (selector.startsWith("p-text/")) {
82
+ return `text/${selector.slice("p-text/".length)}`;
83
+ }
84
+ if (selector.startsWith("p-xpath/")) {
85
+ return `xpath/${selector.slice("p-xpath/".length)}`;
86
+ }
87
+ if (selector.startsWith("p-pierce/")) {
88
+ return `pierce/${selector.slice("p-pierce/".length)}`;
89
+ }
90
+ if (selector.startsWith("p-aria/")) {
91
+ const rest = selector.slice("p-aria/".length);
92
+ const nameMatch = rest.match(/\[\s*name\s*=\s*(?:"([^"]+)"|'([^']+)'|([^\]]+))\s*\]/);
93
+ const name = nameMatch?.[1] ?? nameMatch?.[2] ?? nameMatch?.[3];
94
+ if (name) return `aria/${name.trim()}`;
95
+ return `aria/${rest}`;
96
+ }
97
+ return selector;
98
+ }
99
+
100
+ function isInteractiveNode(node: SerializedAXNode): boolean {
101
+ if (INTERACTIVE_AX_ROLES.has(node.role)) return true;
102
+ return (
103
+ node.checked !== undefined ||
104
+ node.pressed !== undefined ||
105
+ node.selected !== undefined ||
106
+ node.expanded !== undefined ||
107
+ node.focused === true
108
+ );
109
+ }
110
+
111
+ async function collectObservationEntries(
112
+ tab: TabHandle,
113
+ node: SerializedAXNode,
114
+ entries: ObservationEntry[],
115
+ options: { viewportOnly: boolean; includeAll: boolean },
116
+ ): Promise<void> {
117
+ if (options.includeAll || isInteractiveNode(node)) {
118
+ const handle = await node.elementHandle();
119
+ if (handle) {
120
+ let inViewport = true;
121
+ if (options.viewportOnly) {
122
+ try {
123
+ inViewport = await handle.isIntersectingViewport();
124
+ } catch {
125
+ inViewport = false;
126
+ }
127
+ }
128
+ if (inViewport) {
129
+ const id = ++tab.elementCounter;
130
+ const states: string[] = [];
131
+ if (node.disabled) states.push("disabled");
132
+ if (node.checked !== undefined) states.push(`checked=${String(node.checked)}`);
133
+ if (node.pressed !== undefined) states.push(`pressed=${String(node.pressed)}`);
134
+ if (node.selected !== undefined) states.push(`selected=${String(node.selected)}`);
135
+ if (node.expanded !== undefined) states.push(`expanded=${String(node.expanded)}`);
136
+ if (node.required) states.push("required");
137
+ if (node.readonly) states.push("readonly");
138
+ if (node.multiselectable) states.push("multiselectable");
139
+ if (node.multiline) states.push("multiline");
140
+ if (node.modal) states.push("modal");
141
+ if (node.focused) states.push("focused");
142
+ tab.elementCache.set(id, handle);
143
+ entries.push({
144
+ id,
145
+ role: node.role,
146
+ name: node.name,
147
+ value: node.value,
148
+ description: node.description,
149
+ keyshortcuts: node.keyshortcuts,
150
+ states,
151
+ });
152
+ } else {
153
+ await handle.dispose();
154
+ }
155
+ }
156
+ }
157
+ for (const child of node.children ?? []) {
158
+ await collectObservationEntries(tab, child, entries, options);
159
+ }
160
+ }
161
+
162
+ export async function collectObservation(
163
+ tab: TabHandle,
164
+ options: { includeAll?: boolean; viewportOnly?: boolean; signal?: AbortSignal },
165
+ ): Promise<Observation> {
166
+ clearElementCache(tab);
167
+ const includeAll = options.includeAll ?? false;
168
+ const viewportOnly = options.viewportOnly ?? false;
169
+ const snapshot = (await untilAborted(options.signal, () =>
170
+ tab.page.accessibility.snapshot({ interestingOnly: !includeAll }),
171
+ )) as SerializedAXNode | null;
172
+ if (!snapshot) {
173
+ throw new ToolError("Accessibility snapshot unavailable");
174
+ }
175
+ const entries: ObservationEntry[] = [];
176
+ await collectObservationEntries(tab, snapshot, entries, { includeAll, viewportOnly });
177
+ const scroll = (await untilAborted(options.signal, () =>
178
+ tab.page.evaluate(() => {
179
+ const win = globalThis as unknown as {
180
+ scrollX: number;
181
+ scrollY: number;
182
+ innerWidth: number;
183
+ innerHeight: number;
184
+ document: { documentElement: { scrollWidth: number; scrollHeight: number } };
185
+ };
186
+ const doc = win.document.documentElement;
187
+ return {
188
+ x: win.scrollX,
189
+ y: win.scrollY,
190
+ width: win.innerWidth,
191
+ height: win.innerHeight,
192
+ scrollWidth: doc.scrollWidth,
193
+ scrollHeight: doc.scrollHeight,
194
+ };
195
+ }),
196
+ )) as Observation["scroll"];
197
+ const url = tab.page.url();
198
+ const title = (await untilAborted(options.signal, () => tab.page.title())) as string;
199
+ const viewport = tab.page.viewport() ?? DEFAULT_VIEWPORT;
200
+ return { url, title, viewport, scroll, elements: entries };
201
+ }
202
+
203
+ export function formatObservation(observation: Observation): string {
204
+ const viewport = `${observation.viewport.width}x${observation.viewport.height}`;
205
+ const scroll = `x=${observation.scroll.x} y=${observation.scroll.y} viewport=${observation.scroll.width}x${observation.scroll.height} doc=${observation.scroll.scrollWidth}x${observation.scroll.scrollHeight}`;
206
+ const lines = [
207
+ `URL: ${observation.url}`,
208
+ observation.title ? `Title: ${observation.title}` : "Title:",
209
+ `Viewport: ${viewport}`,
210
+ `Scroll: ${scroll}`,
211
+ "Elements:",
212
+ ];
213
+ for (const entry of observation.elements) {
214
+ const name = entry.name ? ` "${entry.name}"` : "";
215
+ const value = entry.value !== undefined ? ` value=${JSON.stringify(entry.value)}` : "";
216
+ const description = entry.description ? ` desc=${JSON.stringify(entry.description)}` : "";
217
+ const shortcuts = entry.keyshortcuts ? ` shortcuts=${JSON.stringify(entry.keyshortcuts)}` : "";
218
+ const state = entry.states.length ? ` (${entry.states.join(", ")})` : "";
219
+ lines.push(`${entry.id}. ${entry.role}${name}${value}${description}${shortcuts}${state}`);
220
+ }
221
+ return lines.join("\n");
222
+ }
223
+
224
+ // =====================================================================
225
+ // Click resolution helpers (text/aria selectors with visibility filtering)
226
+ // =====================================================================
227
+
228
+ type ActionabilityResult = { ok: true; x: number; y: number } | { ok: false; reason: string };
229
+
230
+ async function resolveActionableQueryHandlerClickTarget(handles: ElementHandle[]): Promise<ElementHandle | null> {
231
+ const candidates: Array<{
232
+ handle: ElementHandle;
233
+ rect: { x: number; y: number; w: number; h: number };
234
+ ownedProxy?: ElementHandle;
235
+ }> = [];
236
+
237
+ for (const handle of handles) {
238
+ let clickable: ElementHandle = handle;
239
+ let clickableProxy: ElementHandle | null = null;
240
+ try {
241
+ const proxy = await handle.evaluateHandle(el => {
242
+ const target =
243
+ (el as Element).closest(
244
+ 'a,button,[role="button"],[role="link"],input[type="button"],input[type="submit"]',
245
+ ) ?? el;
246
+ return target;
247
+ });
248
+ const nodeHandle = proxy.asElement();
249
+ clickableProxy = nodeHandle ? (nodeHandle as unknown as ElementHandle) : null;
250
+ if (clickableProxy) {
251
+ clickable = clickableProxy;
252
+ }
253
+ } catch {
254
+ // ignore
255
+ }
256
+
257
+ try {
258
+ const intersecting = await clickable.isIntersectingViewport();
259
+ if (!intersecting) continue;
260
+ const rect = (await clickable.evaluate(el => {
261
+ const r = (el as Element).getBoundingClientRect();
262
+ return { x: r.left, y: r.top, w: r.width, h: r.height };
263
+ })) as { x: number; y: number; w: number; h: number };
264
+ if (rect.w < 1 || rect.h < 1) continue;
265
+ candidates.push({ handle: clickable, rect, ownedProxy: clickableProxy ?? undefined });
266
+ } catch {
267
+ // ignore
268
+ } finally {
269
+ if (clickableProxy && clickableProxy !== handle && clickable !== clickableProxy) {
270
+ try {
271
+ await clickableProxy.dispose();
272
+ } catch {}
273
+ }
274
+ }
275
+ }
276
+
277
+ if (!candidates.length) return null;
278
+
279
+ candidates.sort((a, b) => a.rect.y - b.rect.y || a.rect.x - b.rect.x);
280
+ const winner = candidates[0]?.handle ?? null;
281
+ for (let i = 1; i < candidates.length; i++) {
282
+ const c = candidates[i]!;
283
+ if (c.ownedProxy) {
284
+ try {
285
+ await c.ownedProxy.dispose();
286
+ } catch {}
287
+ }
288
+ }
289
+ return winner;
290
+ }
291
+
292
+ async function isClickActionable(handle: ElementHandle): Promise<ActionabilityResult> {
293
+ return (await handle.evaluate(el => {
294
+ const element = el as HTMLElement;
295
+ const style = globalThis.getComputedStyle(element);
296
+ if (style.display === "none") return { ok: false as const, reason: "display:none" };
297
+ if (style.visibility === "hidden") return { ok: false as const, reason: "visibility:hidden" };
298
+ if (style.pointerEvents === "none") return { ok: false as const, reason: "pointer-events:none" };
299
+ if (Number(style.opacity) === 0) return { ok: false as const, reason: "opacity:0" };
300
+
301
+ const r = element.getBoundingClientRect();
302
+ if (r.width < 1 || r.height < 1) return { ok: false as const, reason: "zero-size" };
303
+
304
+ const vw = globalThis.innerWidth;
305
+ const vh = globalThis.innerHeight;
306
+ const left = Math.max(0, Math.min(vw, r.left));
307
+ const right = Math.max(0, Math.min(vw, r.right));
308
+ const top = Math.max(0, Math.min(vh, r.top));
309
+ const bottom = Math.max(0, Math.min(vh, r.bottom));
310
+ if (right - left < 1 || bottom - top < 1) return { ok: false as const, reason: "off-viewport" };
311
+
312
+ const x = Math.floor((left + right) / 2);
313
+ const y = Math.floor((top + bottom) / 2);
314
+ const topEl = globalThis.document.elementFromPoint(x, y);
315
+ if (!topEl) return { ok: false as const, reason: "elementFromPoint-null" };
316
+ if (topEl === element || element.contains(topEl) || (topEl as Element).contains(element)) {
317
+ return { ok: true as const, x, y };
318
+ }
319
+ return { ok: false as const, reason: "obscured" };
320
+ })) as ActionabilityResult;
321
+ }
322
+
323
+ async function clickQueryHandlerText(
324
+ page: Page,
325
+ selector: string,
326
+ timeoutMs: number,
327
+ signal?: AbortSignal,
328
+ ): Promise<void> {
329
+ const timeoutSignal = AbortSignal.timeout(timeoutMs);
330
+ const clickSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
331
+ const start = Date.now();
332
+ let lastSeen = 0;
333
+ let lastReason: string | null = null;
334
+
335
+ while (Date.now() - start < timeoutMs) {
336
+ throwIfAborted(clickSignal);
337
+ const handles = (await untilAborted(clickSignal, () => page.$$(selector))) as ElementHandle[];
338
+ try {
339
+ lastSeen = handles.length;
340
+ const target = await resolveActionableQueryHandlerClickTarget(handles);
341
+ if (!target) {
342
+ lastReason = handles.length ? "no-visible-candidate" : "no-matches";
343
+ await Bun.sleep(100);
344
+ continue;
345
+ }
346
+ const actionability = await isClickActionable(target);
347
+ if (!actionability.ok) {
348
+ lastReason = actionability.reason;
349
+ await Bun.sleep(100);
350
+ continue;
351
+ }
352
+
353
+ try {
354
+ await untilAborted(clickSignal, () => target.click());
355
+ return;
356
+ } catch (err) {
357
+ lastReason = err instanceof Error ? err.message : String(err);
358
+ await Bun.sleep(100);
359
+ }
360
+ } finally {
361
+ await Promise.all(
362
+ handles.map(async h => {
363
+ try {
364
+ await h.dispose();
365
+ } catch {}
366
+ }),
367
+ );
368
+ }
369
+ }
370
+
371
+ throw new ToolError(
372
+ `Timed out clicking ${selector} (seen ${lastSeen} matches; last reason: ${lastReason ?? "unknown"}). ` +
373
+ "If there are multiple matching elements, use observe + tab.id() or a more specific selector.",
374
+ );
375
+ }
376
+
377
+ // =====================================================================
378
+ // Tab API surface (visible to user code as `tab`)
379
+ // =====================================================================
380
+
381
+ export interface ScreenshotOptions {
382
+ selector?: string;
383
+ fullPage?: boolean;
384
+ save?: string;
385
+ silent?: boolean;
386
+ }
387
+
388
+ export interface ScreenshotResult {
389
+ dest: string;
390
+ mimeType: string;
391
+ bytes: number;
392
+ width: number;
393
+ height: number;
394
+ }
395
+
396
+ export type DragTarget = string | { readonly x: number; readonly y: number };
397
+
398
+ export interface TabApi {
399
+ readonly name: string;
400
+ readonly page: Page;
401
+ readonly signal?: AbortSignal;
402
+ url(): string;
403
+ title(): Promise<string>;
404
+ goto(
405
+ url: string,
406
+ opts?: { waitUntil?: "load" | "domcontentloaded" | "networkidle0" | "networkidle2" },
407
+ ): Promise<void>;
408
+ observe(opts?: { includeAll?: boolean; viewportOnly?: boolean }): Promise<Observation>;
409
+ screenshot(opts?: ScreenshotOptions): Promise<ScreenshotResult>;
410
+ extract(format?: ReadableFormat): Promise<ReadableResult | null>;
411
+ click(selector: string): Promise<void>;
412
+ type(selector: string, text: string): Promise<void>;
413
+ fill(selector: string, value: string): Promise<void>;
414
+ press(key: KeyInput, opts?: { selector?: string }): Promise<void>;
415
+ scroll(deltaX: number, deltaY: number): Promise<void>;
416
+ drag(from: DragTarget, to: DragTarget): Promise<void>;
417
+ waitFor(selector: string): Promise<ElementHandle>;
418
+ evaluate<TResult, TArgs extends unknown[]>(
419
+ fn: string | ((...args: TArgs) => TResult | Promise<TResult>),
420
+ ...args: TArgs
421
+ ): Promise<TResult>;
422
+ scrollIntoView(selector: string): Promise<void>;
423
+ select(selector: string, ...values: string[]): Promise<string[]>;
424
+ uploadFile(selector: string, ...filePaths: string[]): Promise<void>;
425
+ waitForUrl(pattern: string | RegExp, opts?: { timeout?: number }): Promise<string>;
426
+ waitForResponse(
427
+ pattern: string | RegExp | ((response: HTTPResponse) => boolean | Promise<boolean>),
428
+ opts?: { timeout?: number },
429
+ ): Promise<HTTPResponse>;
430
+ id(n: number): Promise<ElementHandle>;
431
+ }
432
+
433
+ export interface RunInTabOptions {
434
+ tab: TabHandle;
435
+ code: string;
436
+ timeoutMs: number;
437
+ signal?: AbortSignal;
438
+ session: ToolSession;
439
+ }
440
+
441
+ export interface RunInTabResult {
442
+ displays: Array<TextContent | ImageContent>;
443
+ returnValue: unknown;
444
+ screenshots: ScreenshotResult[];
445
+ }
446
+
447
+ const AsyncFunctionCtor = Object.getPrototypeOf(async () => {}).constructor as new (
448
+ ...args: string[]
449
+ ) => (...args: unknown[]) => Promise<unknown>;
450
+
451
+ export async function runInTab(opts: RunInTabOptions): Promise<RunInTabResult> {
452
+ const { tab, code, timeoutMs, signal, session } = opts;
453
+ const displays: Array<TextContent | ImageContent> = [];
454
+ const screenshots: ScreenshotResult[] = [];
455
+
456
+ const display = (value: unknown): void => {
457
+ if (value === undefined || value === null) return;
458
+ if (
459
+ typeof value === "object" &&
460
+ value !== null &&
461
+ "type" in (value as Record<string, unknown>) &&
462
+ (value as { type?: unknown }).type === "image"
463
+ ) {
464
+ const img = value as { data?: unknown; mimeType?: unknown };
465
+ if (typeof img.data === "string" && typeof img.mimeType === "string") {
466
+ displays.push({ type: "image", data: img.data, mimeType: img.mimeType });
467
+ return;
468
+ }
469
+ }
470
+ if (typeof value === "string") {
471
+ displays.push({ type: "text", text: value });
472
+ return;
473
+ }
474
+ try {
475
+ displays.push({ type: "text", text: JSON.stringify(value, null, 2) });
476
+ } catch {
477
+ displays.push({ type: "text", text: String(value) });
478
+ }
479
+ };
480
+
481
+ const assertFn = (cond: unknown, msg?: string): void => {
482
+ if (!cond) throw new ToolError(msg ?? "Assertion failed");
483
+ };
484
+
485
+ const wait = (ms: number): Promise<void> => Bun.sleep(ms);
486
+
487
+ const tabApi: TabApi = {
488
+ name: tab.name,
489
+ page: tab.page,
490
+ signal,
491
+ url: () => tab.page.url(),
492
+ title: () => tab.page.title(),
493
+ goto: async (url, gOpts) => {
494
+ clearElementCache(tab);
495
+ await untilAborted(signal, () =>
496
+ tab.page.goto(url, {
497
+ waitUntil: gOpts?.waitUntil ?? "networkidle2",
498
+ timeout: timeoutMs,
499
+ }),
500
+ );
501
+ },
502
+ observe: opts2 => collectObservation(tab, { ...opts2, signal }),
503
+ screenshot: async opts2 => {
504
+ const result = await captureScreenshot(tab, session, displays, screenshots, signal, opts2);
505
+ return result;
506
+ },
507
+ extract: async (format = "markdown") => {
508
+ const html = (await untilAborted(signal, () => tab.page.content())) as string;
509
+ return extractReadableFromHtml(html, tab.page.url(), format);
510
+ },
511
+ click: async selector => {
512
+ const resolved = normalizeSelector(selector);
513
+ if (resolved.startsWith("text/")) {
514
+ await clickQueryHandlerText(tab.page, resolved, timeoutMs, signal);
515
+ } else {
516
+ const locator = tab.page.locator(resolved).setTimeout(timeoutMs);
517
+ await untilAborted(signal, () => locator.click());
518
+ }
519
+ },
520
+ type: async (selector, text) => {
521
+ const resolved = normalizeSelector(selector);
522
+ const locator = tab.page.locator(resolved).setTimeout(timeoutMs);
523
+ const handle = (await untilAborted(signal, () => locator.waitHandle())) as ElementHandle;
524
+ try {
525
+ await untilAborted(signal, () => handle.type(text, { delay: 0 }));
526
+ } finally {
527
+ await handle.dispose();
528
+ }
529
+ },
530
+ fill: async (selector, value) => {
531
+ const resolved = normalizeSelector(selector);
532
+ const locator = tab.page.locator(resolved).setTimeout(timeoutMs);
533
+ await untilAborted(signal, () => locator.fill(value));
534
+ },
535
+ press: async (key, opts2) => {
536
+ if (opts2?.selector) {
537
+ const resolved = normalizeSelector(opts2.selector);
538
+ await untilAborted(signal, () => tab.page.focus(resolved));
539
+ }
540
+ await untilAborted(signal, () => tab.page.keyboard.press(key));
541
+ },
542
+ scroll: async (deltaX, deltaY) => {
543
+ await untilAborted(signal, () => tab.page.mouse.wheel({ deltaX, deltaY }));
544
+ },
545
+ drag: async (from, to) => {
546
+ const resolveDragPoint = async (
547
+ target: DragTarget,
548
+ role: "from" | "to",
549
+ ): Promise<{ x: number; y: number; handle?: ElementHandle }> => {
550
+ if (typeof target === "string") {
551
+ const resolved = normalizeSelector(target);
552
+ const handle = (await untilAborted(signal, () => tab.page.$(resolved))) as ElementHandle | null;
553
+ if (!handle) throw new ToolError(`Drag ${role} selector did not resolve: ${target}`);
554
+ const box = (await untilAborted(signal, () => handle.boundingBox())) as {
555
+ x: number;
556
+ y: number;
557
+ width: number;
558
+ height: number;
559
+ } | null;
560
+ if (!box) {
561
+ await handle.dispose().catch(() => undefined);
562
+ throw new ToolError(`Drag ${role} element has no bounding box (likely not visible): ${target}`);
563
+ }
564
+ return { x: box.x + box.width / 2, y: box.y + box.height / 2, handle };
565
+ }
566
+ if (
567
+ target !== null &&
568
+ typeof target === "object" &&
569
+ typeof (target as { x: unknown }).x === "number" &&
570
+ typeof (target as { y: unknown }).y === "number"
571
+ ) {
572
+ return { x: (target as { x: number }).x, y: (target as { y: number }).y };
573
+ }
574
+ throw new ToolError(
575
+ `Drag ${role} must be a selector string or { x: number, y: number } point. Got: ${typeof target}`,
576
+ );
577
+ };
578
+ const start = await resolveDragPoint(from, "from");
579
+ let end: { x: number; y: number; handle?: ElementHandle } | undefined;
580
+ try {
581
+ end = await resolveDragPoint(to, "to");
582
+ await untilAborted(signal, () => tab.page.mouse.move(start.x, start.y));
583
+ await untilAborted(signal, () => tab.page.mouse.down());
584
+ await untilAborted(signal, () => tab.page.mouse.move(end!.x, end!.y, { steps: 12 }));
585
+ await untilAborted(signal, () => tab.page.mouse.up());
586
+ } finally {
587
+ if (start.handle) await start.handle.dispose().catch(() => undefined);
588
+ if (end?.handle) await end.handle.dispose().catch(() => undefined);
589
+ }
590
+ },
591
+ waitFor: async selector => {
592
+ const resolved = normalizeSelector(selector);
593
+ const locator = tab.page.locator(resolved).setTimeout(timeoutMs);
594
+ return (await untilAborted(signal, () => locator.waitHandle())) as ElementHandle;
595
+ },
596
+ evaluate: async (fn, ...args) => {
597
+ return (await untilAborted(signal, () =>
598
+ typeof fn === "string"
599
+ ? tab.page.evaluate(fn)
600
+ : tab.page.evaluate(fn as (...a: unknown[]) => unknown, ...args),
601
+ )) as never;
602
+ },
603
+ scrollIntoView: async selector => {
604
+ const resolved = normalizeSelector(selector);
605
+ const locator = tab.page.locator(resolved).setTimeout(timeoutMs);
606
+ const handle = (await untilAborted(signal, () => locator.waitHandle())) as ElementHandle;
607
+ try {
608
+ await untilAborted(signal, () =>
609
+ handle.evaluate(el => {
610
+ const target = el as unknown as {
611
+ scrollIntoView: (opts: { behavior: string; block: string; inline: string }) => void;
612
+ };
613
+ target.scrollIntoView({ behavior: "instant", block: "center", inline: "center" });
614
+ }),
615
+ );
616
+ } finally {
617
+ await handle.dispose().catch(() => undefined);
618
+ }
619
+ },
620
+ select: async (selector, ...values) => {
621
+ const resolved = normalizeSelector(selector);
622
+ const locator = tab.page.locator(resolved).setTimeout(timeoutMs);
623
+ const handle = (await untilAborted(signal, () => locator.waitHandle())) as ElementHandle;
624
+ try {
625
+ return (await untilAborted(signal, () =>
626
+ handle.evaluate((el, vals) => {
627
+ interface SelectOption {
628
+ value: string;
629
+ selected: boolean;
630
+ }
631
+ interface SelectLike {
632
+ tagName: string;
633
+ options: ArrayLike<SelectOption>;
634
+ dispatchEvent: (event: unknown) => boolean;
635
+ }
636
+ const select = el as unknown as SelectLike;
637
+ if (!select || select.tagName !== "SELECT") {
638
+ throw new Error("tab.select() requires a <select> element");
639
+ }
640
+ const EventCtor = (
641
+ globalThis as unknown as { Event: new (type: string, init?: { bubbles: boolean }) => unknown }
642
+ ).Event;
643
+ const wanted = new Set(vals as string[]);
644
+ const selected: string[] = [];
645
+ for (let i = 0; i < select.options.length; i++) {
646
+ const opt = select.options[i] as SelectOption;
647
+ opt.selected = wanted.has(opt.value);
648
+ if (opt.selected) selected.push(opt.value);
649
+ }
650
+ select.dispatchEvent(new EventCtor("input", { bubbles: true }));
651
+ select.dispatchEvent(new EventCtor("change", { bubbles: true }));
652
+ return selected;
653
+ }, values),
654
+ )) as string[];
655
+ } finally {
656
+ await handle.dispose().catch(() => undefined);
657
+ }
658
+ },
659
+ uploadFile: async (selector, ...filePaths) => {
660
+ if (!filePaths.length) {
661
+ throw new ToolError("tab.uploadFile() requires at least one file path");
662
+ }
663
+ const resolved = normalizeSelector(selector);
664
+ const locator = tab.page.locator(resolved).setTimeout(timeoutMs);
665
+ const handle = (await untilAborted(signal, () => locator.waitHandle())) as ElementHandle;
666
+ try {
667
+ const absolute = filePaths.map(p => resolveToCwd(p, session.cwd));
668
+ const upload = handle as unknown as { uploadFile: (...paths: string[]) => Promise<void> };
669
+ const tagName = (await untilAborted(signal, () =>
670
+ handle.evaluate(el => (el as unknown as { tagName: string }).tagName),
671
+ )) as string;
672
+ if (tagName !== "INPUT") {
673
+ throw new ToolError(
674
+ `tab.uploadFile() requires an <input type="file"> element (got <${tagName.toLowerCase()}>)`,
675
+ );
676
+ }
677
+ await untilAborted(signal, () => upload.uploadFile(...absolute));
678
+ } finally {
679
+ await handle.dispose().catch(() => undefined);
680
+ }
681
+ },
682
+ waitForUrl: async (pattern, wOpts) => {
683
+ const timeout = wOpts?.timeout ?? timeoutMs;
684
+ const isRegex = pattern instanceof RegExp;
685
+ const matcher = isRegex ? pattern.source : pattern;
686
+ const flags = isRegex ? pattern.flags : "";
687
+ await untilAborted(signal, () =>
688
+ tab.page.waitForFunction(
689
+ (m: string, isRe: boolean, fl: string) => {
690
+ const url = (globalThis as unknown as { location: { href: string } }).location.href;
691
+ return isRe ? new RegExp(m, fl).test(url) : url.includes(m);
692
+ },
693
+ { timeout, polling: 200 },
694
+ matcher,
695
+ isRegex,
696
+ flags,
697
+ ),
698
+ );
699
+ return tab.page.url();
700
+ },
701
+ waitForResponse: async (pattern, wOpts) => {
702
+ const timeout = wOpts?.timeout ?? timeoutMs;
703
+ const predicate: (response: HTTPResponse) => boolean | Promise<boolean> =
704
+ typeof pattern === "function"
705
+ ? pattern
706
+ : pattern instanceof RegExp
707
+ ? response => pattern.test(response.url())
708
+ : response => response.url().includes(pattern);
709
+ return (await untilAborted(signal, () => tab.page.waitForResponse(predicate, { timeout }))) as HTTPResponse;
710
+ },
711
+ id: async n => resolveCachedHandle(tab, n),
712
+ };
713
+
714
+ const fn = new AsyncFunctionCtor("page", "browser", "tab", "display", "assert", "wait", code);
715
+ const returnValue = await fn(tab.page, tab.browser.browser, tabApi, display, assertFn, wait);
716
+ return { displays, returnValue, screenshots };
717
+ }
718
+
719
+ async function captureScreenshot(
720
+ tab: TabHandle,
721
+ session: ToolSession,
722
+ displays: Array<TextContent | ImageContent>,
723
+ screenshots: ScreenshotResult[],
724
+ signal: AbortSignal | undefined,
725
+ opts: ScreenshotOptions = {},
726
+ ): Promise<ScreenshotResult> {
727
+ const fullPage = opts.selector ? false : (opts.fullPage ?? false);
728
+ let buffer: Buffer;
729
+ if (opts.selector) {
730
+ const resolved = normalizeSelector(opts.selector);
731
+ const handle = (await untilAborted(signal, () => tab.page.$(resolved))) as ElementHandle | null;
732
+ if (!handle) {
733
+ throw new ToolError("Screenshot selector did not resolve to an element");
734
+ }
735
+ try {
736
+ buffer = (await untilAborted(signal, () => handle.screenshot({ type: "png" }))) as Buffer;
737
+ } finally {
738
+ await handle.dispose().catch(() => undefined);
739
+ }
740
+ } else {
741
+ buffer = (await untilAborted(signal, () => tab.page.screenshot({ type: "png", fullPage }))) as Buffer;
742
+ }
743
+
744
+ // Compress aggressively for the model copy.
745
+ const resized = await resizeImage(
746
+ { type: "image", data: buffer.toBase64(), mimeType: "image/png" },
747
+ { maxWidth: 1024, maxHeight: 1024, maxBytes: 150 * 1024, jpegQuality: 70 },
748
+ );
749
+
750
+ const screenshotDir = (() => {
751
+ const v = session.settings.get("browser.screenshotDir") as string | undefined;
752
+ return v ? expandPath(v) : undefined;
753
+ })();
754
+ const explicitPath = opts.save ? resolveToCwd(opts.save, session.cwd) : undefined;
755
+ let dest: string;
756
+ if (explicitPath) {
757
+ dest = explicitPath;
758
+ } else if (screenshotDir) {
759
+ const ts = new Date().toISOString().replace(/[:.]/g, "-").slice(0, -1);
760
+ dest = path.join(screenshotDir, `screenshot-${ts}.png`);
761
+ } else {
762
+ dest = path.join(os.tmpdir(), `omp-sshots-${Snowflake.next()}.png`);
763
+ }
764
+ await fs.promises.mkdir(path.dirname(dest), { recursive: true });
765
+ const saveFullRes = !!(explicitPath || screenshotDir);
766
+ const savedBuffer = saveFullRes ? buffer : resized.buffer;
767
+ const savedMimeType = saveFullRes ? "image/png" : resized.mimeType;
768
+ await Bun.write(dest, savedBuffer);
769
+
770
+ const info: ScreenshotResult = {
771
+ dest,
772
+ mimeType: savedMimeType,
773
+ bytes: savedBuffer.length,
774
+ width: resized.width,
775
+ height: resized.height,
776
+ };
777
+ screenshots.push(info);
778
+
779
+ if (!opts.silent) {
780
+ const lines = formatScreenshot({
781
+ saveFullRes,
782
+ savedMimeType,
783
+ savedByteLength: savedBuffer.length,
784
+ dest,
785
+ resized,
786
+ });
787
+ displays.push({ type: "text", text: lines.join("\n") });
788
+ displays.push({ type: "image", data: resized.data, mimeType: resized.mimeType });
789
+ }
790
+
791
+ return info;
792
+ }