agent-browser-loop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/state.ts ADDED
@@ -0,0 +1,602 @@
1
+ import type { BrowserContext, Page } from "playwright";
2
+ import type {
3
+ BrowserState,
4
+ GetStateOptions,
5
+ InteractiveElement,
6
+ ScrollPosition,
7
+ TabInfo,
8
+ } from "./types";
9
+
10
+ /** Selectors for interactive elements */
11
+ const INTERACTIVE_SELECTORS = [
12
+ "a[href]",
13
+ "button",
14
+ "input",
15
+ "textarea",
16
+ "select",
17
+ '[role="button"]',
18
+ '[role="link"]',
19
+ '[role="textbox"]',
20
+ '[role="checkbox"]',
21
+ '[role="radio"]',
22
+ '[role="combobox"]',
23
+ '[role="listbox"]',
24
+ '[role="menuitem"]',
25
+ '[role="option"]',
26
+ '[role="searchbox"]',
27
+ '[role="slider"]',
28
+ '[role="switch"]',
29
+ '[role="tab"]',
30
+ "[onclick]",
31
+ "[tabindex]",
32
+ ].join(", ");
33
+
34
+ interface ElementInfo {
35
+ tag: string;
36
+ role: string;
37
+ name: string;
38
+ text: string;
39
+ visible: boolean;
40
+ enabled: boolean;
41
+ attributes: Record<string, string>;
42
+ boundingBox: { x: number; y: number; width: number; height: number } | null;
43
+ }
44
+
45
+ interface ElementInfoWithRef extends ElementInfo {
46
+ ref: string;
47
+ }
48
+
49
+ /**
50
+ * Extract interactive elements from the page using DOM queries
51
+ * Assumes injectElementRefs has already been called
52
+ */
53
+ async function extractInteractiveElements(
54
+ page: Page,
55
+ ): Promise<InteractiveElement[]> {
56
+ const elementInfos = await page.evaluate((selector) => {
57
+ // Only get elements that match interactive selectors and have data-ref
58
+ const elements = Array.from(document.querySelectorAll(selector));
59
+ const results: ElementInfoWithRef[] = [];
60
+
61
+ const normalizeText = (value?: string | null) =>
62
+ value?.replace(/\s+/g, " ").trim() ?? "";
63
+
64
+ const getAriaLabelledbyText = (el: HTMLElement) => {
65
+ const ids = el.getAttribute("aria-labelledby");
66
+ if (!ids) {
67
+ return "";
68
+ }
69
+ const parts = ids
70
+ .split(/\s+/)
71
+ .map((id) => document.getElementById(id))
72
+ .filter((node): node is HTMLElement => Boolean(node))
73
+ .map((node) => normalizeText(node.textContent))
74
+ .filter(Boolean);
75
+ return parts.join(" ");
76
+ };
77
+
78
+ const getAssociatedLabel = (el: HTMLElement) => {
79
+ const inputEl = el as HTMLInputElement;
80
+ if ("labels" in inputEl && inputEl.labels?.length) {
81
+ const labels = Array.from(inputEl.labels)
82
+ .map((label) => normalizeText(label.textContent))
83
+ .filter(Boolean);
84
+ if (labels.length) {
85
+ return labels.join(" ");
86
+ }
87
+ }
88
+
89
+ const wrapped = el.closest("label");
90
+ if (wrapped) {
91
+ const text = normalizeText(wrapped.textContent);
92
+ if (text) {
93
+ return text;
94
+ }
95
+ }
96
+
97
+ const id = el.getAttribute("id");
98
+ if (id) {
99
+ const selector = `label[for="${CSS.escape(id)}"]`;
100
+ const label = document.querySelector(selector);
101
+ if (label) {
102
+ const text = normalizeText(label.textContent);
103
+ if (text) {
104
+ return text;
105
+ }
106
+ }
107
+ }
108
+
109
+ return "";
110
+ };
111
+
112
+ for (const el of elements) {
113
+ const htmlEl = el as HTMLElement;
114
+ const ref = htmlEl.getAttribute("data-ref");
115
+ if (!ref) {
116
+ continue;
117
+ }
118
+
119
+ // Get bounding box
120
+ const rect = htmlEl.getBoundingClientRect();
121
+
122
+ const style = window.getComputedStyle(htmlEl);
123
+ const isVisible =
124
+ style.display !== "none" &&
125
+ style.visibility !== "hidden" &&
126
+ (rect.width > 0 || rect.height > 0);
127
+
128
+ // Determine role
129
+ let role = htmlEl.getAttribute("role") || "";
130
+ if (!role) {
131
+ const tag = htmlEl.tagName.toLowerCase();
132
+ if (tag === "a") role = "link";
133
+ else if (tag === "button") role = "button";
134
+ else if (tag === "input") {
135
+ const type = (htmlEl as HTMLInputElement).type;
136
+ if (type === "checkbox") role = "checkbox";
137
+ else if (type === "radio") role = "radio";
138
+ else if (type === "submit" || type === "button") role = "button";
139
+ else role = "textbox";
140
+ } else if (tag === "textarea") role = "textbox";
141
+ else if (tag === "select") role = "combobox";
142
+ else role = tag;
143
+ }
144
+
145
+ const ariaLabel = htmlEl.getAttribute("aria-label") || "";
146
+ const labelledBy = getAriaLabelledbyText(htmlEl);
147
+ const labelText = getAssociatedLabel(htmlEl);
148
+ const placeholder = htmlEl.getAttribute("placeholder") || "";
149
+ const title = htmlEl.getAttribute("title") || "";
150
+ const fieldName = htmlEl.getAttribute("name") || "";
151
+ let valueText = "";
152
+ let isChecked = false;
153
+
154
+ if (htmlEl instanceof HTMLInputElement) {
155
+ if (htmlEl.type === "checkbox" || htmlEl.type === "radio") {
156
+ isChecked = htmlEl.checked;
157
+ } else {
158
+ valueText = htmlEl.value || "";
159
+ }
160
+ } else if (htmlEl instanceof HTMLTextAreaElement) {
161
+ valueText = htmlEl.value || "";
162
+ } else if (htmlEl instanceof HTMLSelectElement) {
163
+ const selected = Array.from(htmlEl.selectedOptions)
164
+ .map((option) => option.value || option.textContent || "")
165
+ .filter(Boolean);
166
+ valueText = selected.join(", ");
167
+ }
168
+
169
+ if (valueText.length > 120) {
170
+ valueText = `${valueText.slice(0, 120)}...`;
171
+ }
172
+
173
+ // Get accessible name
174
+ const name =
175
+ ariaLabel ||
176
+ labelledBy ||
177
+ labelText ||
178
+ title ||
179
+ placeholder ||
180
+ fieldName ||
181
+ (htmlEl as HTMLInputElement).value ||
182
+ "";
183
+
184
+ // Get visible text
185
+ const text = htmlEl.textContent?.trim().slice(0, 100) || "";
186
+
187
+ // Get relevant attributes
188
+ const attributes: Record<string, string> = {};
189
+ if (htmlEl.getAttribute("href"))
190
+ attributes.href = htmlEl.getAttribute("href")!;
191
+ if (placeholder) attributes.placeholder = placeholder;
192
+ if (labelText) attributes.label = labelText;
193
+ if (htmlEl.getAttribute("type"))
194
+ attributes.type = htmlEl.getAttribute("type")!;
195
+ if (fieldName) attributes.name = fieldName;
196
+ if (htmlEl.getAttribute("id")) attributes.id = htmlEl.getAttribute("id")!;
197
+ if (valueText) attributes.value = valueText;
198
+ if (isChecked) attributes.checked = "true";
199
+
200
+ results.push({
201
+ tag: htmlEl.tagName.toLowerCase(),
202
+ role,
203
+ name: name || text.slice(0, 50),
204
+ text,
205
+ ref,
206
+ visible: isVisible,
207
+ enabled: !(htmlEl as HTMLInputElement).disabled,
208
+ attributes,
209
+ boundingBox: {
210
+ x: rect.x,
211
+ y: rect.y,
212
+ width: rect.width,
213
+ height: rect.height,
214
+ },
215
+ });
216
+ }
217
+
218
+ return results;
219
+ }, INTERACTIVE_SELECTORS);
220
+
221
+ // Convert to InteractiveElement format, using ref from data-ref attribute
222
+ return elementInfos.map((info, index) => ({
223
+ index,
224
+ role: info.role,
225
+ name: info.name,
226
+ text: info.text,
227
+ ref: info.ref, // Use the actual ref from the DOM
228
+ visible: info.visible,
229
+ enabled: info.enabled,
230
+ boundingBox: info.boundingBox === null ? undefined : info.boundingBox,
231
+ attributes: info.attributes,
232
+ }));
233
+ }
234
+
235
+ /**
236
+ * Build a text representation of the page structure
237
+ */
238
+ async function buildAccessibilityTree(
239
+ page: Page,
240
+ maxLines?: number,
241
+ ): Promise<string> {
242
+ return await page.evaluate((limit) => {
243
+ const lines: string[] = [];
244
+
245
+ function traverse(node: Element, depth: number): void {
246
+ const indent = " ".repeat(depth);
247
+ const tag = node.tagName.toLowerCase();
248
+ const role = node.getAttribute("role") || "";
249
+
250
+ // Skip script, style, etc
251
+ if (["script", "style", "noscript", "svg", "path"].includes(tag)) {
252
+ return;
253
+ }
254
+
255
+ // Build line
256
+ let line = `${indent}- ${role || tag}`;
257
+
258
+ // Add text content for leaf nodes
259
+ if (node.children.length === 0) {
260
+ const text = node.textContent?.trim().slice(0, 50);
261
+ if (text) {
262
+ line += ` "${text}"`;
263
+ }
264
+ } else {
265
+ // For non-leaf, show aria-label or similar
266
+ const label =
267
+ node.getAttribute("aria-label") || node.getAttribute("title");
268
+ if (label) {
269
+ line += ` "${label}"`;
270
+ }
271
+ }
272
+
273
+ // Add relevant attributes
274
+ const id = node.getAttribute("id");
275
+ if (id) line += ` #${id}`;
276
+
277
+ const className = node.className;
278
+ if (className && typeof className === "string") {
279
+ const classes = className.split(" ").slice(0, 2).join(".");
280
+ if (classes) line += ` .${classes}`;
281
+ }
282
+
283
+ lines.push(line);
284
+
285
+ // Recurse children (limit depth for performance)
286
+ if (depth < 6) {
287
+ for (const child of Array.from(node.children)) {
288
+ traverse(child, depth + 1);
289
+ }
290
+ }
291
+ }
292
+
293
+ traverse(document.body, 0);
294
+ if (!limit || limit <= 0) {
295
+ return lines.join("\n");
296
+ }
297
+ return lines.slice(0, limit).join("\n");
298
+ }, maxLines ?? 0);
299
+ }
300
+
301
+ /**
302
+ * Get scroll position information
303
+ */
304
+ async function getScrollPosition(page: Page): Promise<ScrollPosition> {
305
+ return await page.evaluate(() => {
306
+ const scrollTop = window.scrollY;
307
+ const viewportHeight = window.innerHeight;
308
+ const totalHeight = document.documentElement.scrollHeight;
309
+
310
+ return {
311
+ scrollTop,
312
+ pixelsAbove: scrollTop,
313
+ pixelsBelow: Math.max(0, totalHeight - scrollTop - viewportHeight),
314
+ totalHeight,
315
+ viewportHeight,
316
+ };
317
+ });
318
+ }
319
+
320
+ /**
321
+ * Get information about all open tabs/pages
322
+ */
323
+ async function getTabsInfo(
324
+ context: BrowserContext,
325
+ currentPage: Page,
326
+ ): Promise<TabInfo[]> {
327
+ const pages = context.pages();
328
+ const tabs: TabInfo[] = [];
329
+
330
+ for (let i = 0; i < pages.length; i++) {
331
+ const p = pages[i];
332
+ tabs.push({
333
+ id: `tab-${i}`,
334
+ url: p.url(),
335
+ title: await p.title().catch(() => ""),
336
+ active: p === currentPage,
337
+ });
338
+ }
339
+
340
+ return tabs;
341
+ }
342
+
343
+ export function formatStateText(state: BrowserState): string {
344
+ const lines: string[] = [
345
+ `URL: ${state.url}`,
346
+ `Title: ${state.title}`,
347
+ `Tabs: ${state.tabs.length}`,
348
+ "",
349
+ `Scroll: ${state.scrollPosition.pixelsAbove}px above, ${state.scrollPosition.pixelsBelow}px below`,
350
+ "",
351
+ "Interactive Elements:",
352
+ ];
353
+
354
+ if (state.elements.length === 0) {
355
+ lines.push(" (none)");
356
+ } else {
357
+ for (const el of state.elements) {
358
+ const attrs = Object.entries(el.attributes)
359
+ .map(([k, v]) => `${k}="${v}"`)
360
+ .join(" ");
361
+ lines.push(
362
+ ` [${el.index}] ref=${el.ref} ${el.role} "${el.name || el.text}"${attrs ? ` (${attrs})` : ""}${el.enabled ? "" : " [disabled]"}`,
363
+ );
364
+ }
365
+ }
366
+
367
+ if (state.accessibilityTree) {
368
+ lines.push("", "Accessibility Tree:");
369
+ lines.push(state.accessibilityTree);
370
+ }
371
+
372
+ const consoleErrors = state.errors?.console ?? [];
373
+ const networkErrors = state.errors?.network ?? [];
374
+ if (consoleErrors.length > 0 || networkErrors.length > 0) {
375
+ lines.push("", "Errors:");
376
+ if (consoleErrors.length > 0) {
377
+ lines.push("Console:");
378
+ for (const entry of consoleErrors.slice(-10)) {
379
+ lines.push(` - ${entry}`);
380
+ }
381
+ }
382
+ if (networkErrors.length > 0) {
383
+ lines.push("Network:");
384
+ for (const event of networkErrors.slice(-10)) {
385
+ if (event.type === "failed") {
386
+ lines.push(
387
+ ` - failed ${event.method} ${event.url}${event.failureText ? ` (${event.failureText})` : ""}`,
388
+ );
389
+ } else if (event.status) {
390
+ lines.push(` - ${event.status} ${event.method} ${event.url}`);
391
+ } else {
392
+ lines.push(` - ${event.type} ${event.method} ${event.url}`);
393
+ }
394
+ }
395
+ }
396
+ }
397
+
398
+ return lines.join("\n");
399
+ }
400
+
401
+ /**
402
+ * Get the current state of the browser/page
403
+ */
404
+ export async function getState(
405
+ page: Page,
406
+ context: BrowserContext,
407
+ options: GetStateOptions = {},
408
+ ): Promise<BrowserState> {
409
+ const {
410
+ includeScreenshot = false,
411
+ includeElements = true,
412
+ includeTree = true,
413
+ elementsLimit,
414
+ elementsHead,
415
+ elementsTail,
416
+ treeLimit,
417
+ treeHead,
418
+ treeTail,
419
+ } = options;
420
+
421
+ // Wait for page to be stable
422
+ await page.waitForLoadState("domcontentloaded");
423
+
424
+ // Inject refs first so extraction and targeting use same indices
425
+ await injectElementRefs(page);
426
+
427
+ // Extract state in parallel
428
+ const [url, title, elements, accessibilityTree, scrollPosition, tabs] =
429
+ await Promise.all([
430
+ page.url(),
431
+ page.title(),
432
+ includeElements ? extractInteractiveElements(page) : [],
433
+ includeTree ? buildAccessibilityTree(page, treeLimit) : "",
434
+ getScrollPosition(page),
435
+ getTabsInfo(context, page),
436
+ ]);
437
+
438
+ // Optional screenshot
439
+ let screenshot: string | undefined;
440
+ if (includeScreenshot) {
441
+ const buffer = await page.screenshot({
442
+ type: "jpeg",
443
+ quality: 80,
444
+ });
445
+ screenshot = buffer.toString("base64");
446
+ }
447
+
448
+ return {
449
+ url,
450
+ title,
451
+ tabs,
452
+ elements: sliceList(elements, {
453
+ head: elementsHead,
454
+ tail: elementsTail,
455
+ limit: elementsLimit,
456
+ }),
457
+ accessibilityTree: sliceTree(accessibilityTree, {
458
+ head: treeHead,
459
+ tail: treeTail,
460
+ limit: treeLimit,
461
+ }),
462
+ scrollPosition,
463
+ screenshot,
464
+ };
465
+ }
466
+
467
+ function sliceList<T>(
468
+ items: T[],
469
+ options: { head?: number; tail?: number; limit?: number },
470
+ ): T[] {
471
+ const total = items.length;
472
+ const head = options.head;
473
+ const tail = options.tail;
474
+ const limit = options.limit;
475
+
476
+ if (head && tail) {
477
+ const headItems = items.slice(0, head);
478
+ const tailStart = Math.max(total - tail, headItems.length);
479
+ const tailItems = items.slice(tailStart);
480
+ return [...headItems, ...tailItems];
481
+ }
482
+
483
+ if (head) {
484
+ return items.slice(0, head);
485
+ }
486
+
487
+ if (tail) {
488
+ return items.slice(Math.max(0, total - tail));
489
+ }
490
+
491
+ if (limit) {
492
+ return items.slice(0, limit);
493
+ }
494
+
495
+ return items;
496
+ }
497
+
498
+ function sliceTree(
499
+ tree: string,
500
+ options: { head?: number; tail?: number; limit?: number },
501
+ ): string {
502
+ if (!tree) {
503
+ return tree;
504
+ }
505
+
506
+ const lines = tree.split("\n");
507
+ const sliced = sliceList(lines, options);
508
+ return sliced.join("\n");
509
+ }
510
+
511
+ /**
512
+ * Inject data-ref attributes into the page for element targeting
513
+ * Returns the number of elements tagged
514
+ */
515
+ export async function injectElementRefs(page: Page): Promise<number> {
516
+ return await page.evaluate((selector) => {
517
+ const elements = Array.from(document.querySelectorAll(selector));
518
+ const used = new Set<string>();
519
+ const counters: Record<string, number> = {};
520
+
521
+ const normalizeBase = (value: string) => {
522
+ const trimmed = value.trim().toLowerCase();
523
+ const normalized = trimmed.replace(/[^a-z0-9_-]+/g, "-");
524
+ return normalized.length > 0 ? normalized : "element";
525
+ };
526
+
527
+ const getElementBase = (el: HTMLElement) => {
528
+ const role = el.getAttribute("role");
529
+ if (role) {
530
+ return normalizeBase(role);
531
+ }
532
+ const tag = el.tagName.toLowerCase();
533
+ if (tag === "a") return "link";
534
+ if (tag === "button") return "button";
535
+ if (tag === "input") {
536
+ const type = (el as HTMLInputElement).type;
537
+ if (type === "checkbox") return "checkbox";
538
+ if (type === "radio") return "radio";
539
+ if (type === "submit" || type === "button") return "button";
540
+ return "input";
541
+ }
542
+ if (tag === "textarea") return "textarea";
543
+ if (tag === "select") return "select";
544
+ return normalizeBase(tag);
545
+ };
546
+
547
+ document.querySelectorAll("[data-ref]").forEach((el) => {
548
+ const ref = el.getAttribute("data-ref");
549
+ if (ref) {
550
+ used.add(ref);
551
+ const match = ref.match(/^([a-z0-9_-]+)_(\d+)$/i);
552
+ if (match) {
553
+ const base = match[1];
554
+ const index = Number(match[2]);
555
+ if (!Number.isNaN(index)) {
556
+ counters[base] = Math.max(counters[base] ?? 0, index + 1);
557
+ }
558
+ }
559
+ }
560
+ });
561
+
562
+ let index = 0;
563
+
564
+ for (const el of elements) {
565
+ const htmlEl = el as HTMLElement;
566
+ let ref = htmlEl.getAttribute("data-ref");
567
+
568
+ // Skip hidden elements unless they already have a stable ref.
569
+ const style = window.getComputedStyle(htmlEl);
570
+ if (!ref) {
571
+ if (style.display === "none" || style.visibility === "hidden") {
572
+ continue;
573
+ }
574
+
575
+ const rect = htmlEl.getBoundingClientRect();
576
+ if (rect.width === 0 && rect.height === 0) {
577
+ const tag = htmlEl.tagName.toLowerCase();
578
+ if (!["input", "textarea", "select"].includes(tag)) {
579
+ continue;
580
+ }
581
+ }
582
+ }
583
+
584
+ if (!ref) {
585
+ const base = getElementBase(htmlEl);
586
+ let next = counters[base] ?? 0;
587
+ while (used.has(`${base}_${next}`)) {
588
+ next++;
589
+ }
590
+ ref = `${base}_${next}`;
591
+ counters[base] = next + 1;
592
+ used.add(ref);
593
+ htmlEl.setAttribute("data-ref", ref);
594
+ }
595
+
596
+ htmlEl.setAttribute("data-index", String(index));
597
+ index++;
598
+ }
599
+
600
+ return used.size;
601
+ }, INTERACTIVE_SELECTORS);
602
+ }