agent-browser-loop 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/state.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import type { BrowserContext, Page } from "playwright";
2
+ import type { ElementRefStore, ElementSelectors } from "./ref-store";
2
3
  import type {
3
4
  BrowserState,
4
5
  GetStateOptions,
@@ -31,7 +32,7 @@ const INTERACTIVE_SELECTORS = [
31
32
  "[tabindex]",
32
33
  ].join(", ");
33
34
 
34
- interface ElementInfo {
35
+ interface RawElementInfo {
35
36
  tag: string;
36
37
  role: string;
37
38
  name: string;
@@ -40,23 +41,119 @@ interface ElementInfo {
40
41
  enabled: boolean;
41
42
  attributes: Record<string, string>;
42
43
  boundingBox: { x: number; y: number; width: number; height: number } | null;
43
- }
44
-
45
- interface ElementInfoWithRef extends ElementInfo {
46
- ref: string;
44
+ // Selector info for server-side storage
45
+ xpath: string;
46
+ cssPath: string;
47
+ fingerprint: string | null;
48
+ // For generating ref
49
+ refBase: string;
50
+ // Fingerprint data for validation
51
+ fingerprintData: {
52
+ tagName: string;
53
+ role?: string;
54
+ type?: string;
55
+ name?: string;
56
+ placeholder?: string;
57
+ };
47
58
  }
48
59
 
49
60
  /**
50
- * Extract interactive elements from the page using DOM queries
51
- * Assumes injectElementRefs has already been called
61
+ * Extract interactive elements from the page WITHOUT modifying the DOM
62
+ * Returns raw element info including selectors for server-side ref storage
52
63
  */
53
- async function extractInteractiveElements(
64
+ async function extractInteractiveElementsRaw(
54
65
  page: Page,
55
- ): Promise<InteractiveElement[]> {
56
- const elementInfos = await page.evaluate((selector) => {
57
- // Only get elements that match interactive selectors and have data-ref
58
- const elements = Array.from(document.querySelectorAll(selector));
59
- const results: ElementInfoWithRef[] = [];
66
+ ): Promise<RawElementInfo[]> {
67
+ return await page.evaluate((selector) => {
68
+ // Helper functions (must be inside evaluate)
69
+ const generateXPath = (element: Element): string => {
70
+ const parts: string[] = [];
71
+ let current: Element | null = element;
72
+
73
+ while (current && current.nodeType === Node.ELEMENT_NODE) {
74
+ let index = 1;
75
+ let sibling: Element | null = current.previousElementSibling;
76
+
77
+ while (sibling) {
78
+ if (sibling.tagName === current.tagName) {
79
+ index++;
80
+ }
81
+ sibling = sibling.previousElementSibling;
82
+ }
83
+
84
+ const tagName = current.tagName.toLowerCase();
85
+ parts.unshift(`${tagName}[${index}]`);
86
+ current = current.parentElement;
87
+ }
88
+
89
+ return `/${parts.join("/")}`;
90
+ };
91
+
92
+ const generateCssPath = (element: Element): string => {
93
+ const parts: string[] = [];
94
+ let current: Element | null = element;
95
+
96
+ while (current && current.nodeType === Node.ELEMENT_NODE) {
97
+ let selector = current.tagName.toLowerCase();
98
+
99
+ const id = current.getAttribute("id");
100
+ if (id) {
101
+ try {
102
+ if (document.querySelectorAll(`#${CSS.escape(id)}`).length === 1) {
103
+ parts.unshift(`#${CSS.escape(id)}`);
104
+ break;
105
+ }
106
+ } catch {
107
+ // Invalid ID, skip
108
+ }
109
+ }
110
+
111
+ const parent = current.parentElement;
112
+ if (parent) {
113
+ const siblings = Array.from(parent.children);
114
+ const sameTagSiblings = siblings.filter(
115
+ (s) => s.tagName === current!.tagName,
116
+ );
117
+ if (sameTagSiblings.length > 1) {
118
+ const index = sameTagSiblings.indexOf(current) + 1;
119
+ selector += `:nth-of-type(${index})`;
120
+ }
121
+ }
122
+
123
+ parts.unshift(selector);
124
+ current = current.parentElement;
125
+ }
126
+
127
+ return parts.join(" > ");
128
+ };
129
+
130
+ const generateFingerprint = (element: Element): string | null => {
131
+ const tag = element.tagName.toLowerCase();
132
+ const attrs: string[] = [];
133
+
134
+ const type = element.getAttribute("type");
135
+ const name = element.getAttribute("name");
136
+ const placeholder = element.getAttribute("placeholder");
137
+ const role = element.getAttribute("role");
138
+ const ariaLabel = element.getAttribute("aria-label");
139
+ const dataTestId = element.getAttribute("data-testid");
140
+
141
+ if (dataTestId) {
142
+ return `[data-testid="${CSS.escape(dataTestId)}"]`;
143
+ }
144
+
145
+ if (type) attrs.push(`[type="${CSS.escape(type)}"]`);
146
+ if (name) attrs.push(`[name="${CSS.escape(name)}"]`);
147
+ if (placeholder) attrs.push(`[placeholder="${CSS.escape(placeholder)}"]`);
148
+ if (role) attrs.push(`[role="${CSS.escape(role)}"]`);
149
+ if (ariaLabel) attrs.push(`[aria-label="${CSS.escape(ariaLabel)}"]`);
150
+
151
+ if (attrs.length === 0) {
152
+ return null;
153
+ }
154
+
155
+ return tag + attrs.join("");
156
+ };
60
157
 
61
158
  const normalizeText = (value?: string | null) =>
62
159
  value?.replace(/\s+/g, " ").trim() ?? "";
@@ -109,17 +206,52 @@ async function extractInteractiveElements(
109
206
  return "";
110
207
  };
111
208
 
209
+ const normalizeBase = (value: string) => {
210
+ const trimmed = value.trim().toLowerCase();
211
+ const normalized = trimmed.replace(/[^a-z0-9_-]+/g, "-");
212
+ return normalized.length > 0 ? normalized : "element";
213
+ };
214
+
215
+ const getElementBase = (el: HTMLElement) => {
216
+ const role = el.getAttribute("role");
217
+ if (role) {
218
+ return normalizeBase(role);
219
+ }
220
+ const tag = el.tagName.toLowerCase();
221
+ if (tag === "a") return "link";
222
+ if (tag === "button") return "button";
223
+ if (tag === "input") {
224
+ const type = (el as HTMLInputElement).type;
225
+ if (type === "checkbox") return "checkbox";
226
+ if (type === "radio") return "radio";
227
+ if (type === "submit" || type === "button") return "button";
228
+ return "input";
229
+ }
230
+ if (tag === "textarea") return "textarea";
231
+ if (tag === "select") return "select";
232
+ return normalizeBase(tag);
233
+ };
234
+
235
+ const elements = Array.from(document.querySelectorAll(selector));
236
+ const results: RawElementInfo[] = [];
237
+
112
238
  for (const el of elements) {
113
239
  const htmlEl = el as HTMLElement;
114
- const ref = htmlEl.getAttribute("data-ref");
115
- if (!ref) {
240
+
241
+ // Skip hidden elements
242
+ const style = window.getComputedStyle(htmlEl);
243
+ if (style.display === "none" || style.visibility === "hidden") {
116
244
  continue;
117
245
  }
118
246
 
119
- // Get bounding box
120
247
  const rect = htmlEl.getBoundingClientRect();
248
+ if (rect.width === 0 && rect.height === 0) {
249
+ const tag = htmlEl.tagName.toLowerCase();
250
+ if (!["input", "textarea", "select"].includes(tag)) {
251
+ continue;
252
+ }
253
+ }
121
254
 
122
- const style = window.getComputedStyle(htmlEl);
123
255
  const isVisible =
124
256
  style.display !== "none" &&
125
257
  style.visibility !== "hidden" &&
@@ -197,12 +329,16 @@ async function extractInteractiveElements(
197
329
  if (valueText) attributes.value = valueText;
198
330
  if (isChecked) attributes.checked = "true";
199
331
 
332
+ // Generate selectors
333
+ const xpath = generateXPath(htmlEl);
334
+ const cssPath = generateCssPath(htmlEl);
335
+ const fingerprint = generateFingerprint(htmlEl);
336
+
200
337
  results.push({
201
338
  tag: htmlEl.tagName.toLowerCase(),
202
339
  role,
203
340
  name: name || text.slice(0, 50),
204
341
  text,
205
- ref,
206
342
  visible: isVisible,
207
343
  enabled: !(htmlEl as HTMLInputElement).disabled,
208
344
  attributes,
@@ -212,24 +348,68 @@ async function extractInteractiveElements(
212
348
  width: rect.width,
213
349
  height: rect.height,
214
350
  },
351
+ xpath,
352
+ cssPath,
353
+ fingerprint,
354
+ refBase: getElementBase(htmlEl),
355
+ fingerprintData: {
356
+ tagName: htmlEl.tagName.toLowerCase(),
357
+ role: role || undefined,
358
+ type: htmlEl.getAttribute("type") || undefined,
359
+ name: fieldName || undefined,
360
+ placeholder: placeholder || undefined,
361
+ },
215
362
  });
216
363
  }
217
364
 
218
365
  return results;
219
366
  }, INTERACTIVE_SELECTORS);
367
+ }
220
368
 
221
- // Convert to InteractiveElement format, using ref from data-ref attribute
222
- return elementInfos.map((info, index) => ({
223
- index,
224
- role: info.role,
225
- name: info.name,
226
- text: info.text,
227
- ref: info.ref, // Use the actual ref from the DOM
228
- visible: info.visible,
229
- enabled: info.enabled,
230
- boundingBox: info.boundingBox === null ? undefined : info.boundingBox,
231
- attributes: info.attributes,
232
- }));
369
+ /**
370
+ * Extract interactive elements and store refs in the provided store
371
+ * Returns InteractiveElement array with assigned refs
372
+ */
373
+ export async function extractInteractiveElements(
374
+ page: Page,
375
+ refStore: ElementRefStore,
376
+ ): Promise<InteractiveElement[]> {
377
+ const rawElements = await extractInteractiveElementsRaw(page);
378
+
379
+ // Clear and rebuild the ref store
380
+ refStore.clear();
381
+
382
+ // Track used refs and counters for generating unique refs
383
+ const counters: Record<string, number> = {};
384
+
385
+ return rawElements.map((raw, index) => {
386
+ // Generate unique ref
387
+ const base = raw.refBase;
388
+ const counter = counters[base] ?? 0;
389
+ const ref = `${base}_${counter}`;
390
+ counters[base] = counter + 1;
391
+
392
+ // Store selectors for later resolution
393
+ const selectors: ElementSelectors = {
394
+ xpath: raw.xpath,
395
+ cssPath: raw.cssPath,
396
+ fingerprint: raw.fingerprint ?? undefined,
397
+ };
398
+
399
+ refStore.set(ref, index, selectors, raw.fingerprintData);
400
+
401
+ return {
402
+ index,
403
+ role: raw.role,
404
+ name: raw.name,
405
+ text: raw.text,
406
+ ref,
407
+ visible: raw.visible,
408
+ enabled: raw.enabled,
409
+ boundingBox: raw.boundingBox === null ? undefined : raw.boundingBox,
410
+ attributes: raw.attributes,
411
+ };
412
+ });
233
413
  }
234
414
 
235
415
  /**
@@ -400,10 +580,12 @@ export function formatStateText(state: BrowserState): string {
400
580
 
401
581
  /**
402
582
  * Get the current state of the browser/page
583
+ * Now requires a refStore to store element references server-side
403
584
  */
404
585
  export async function getState(
405
586
  page: Page,
406
587
  context: BrowserContext,
588
+ refStore: ElementRefStore,
407
589
  options: GetStateOptions = {},
408
590
  ): Promise<BrowserState> {
409
591
  const {
@@ -421,19 +603,24 @@ export async function getState(
421
603
  // Wait for page to be stable
422
604
  await page.waitForLoadState("domcontentloaded");
423
605
 
424
- // Inject refs first so extraction and targeting use same indices
425
- await injectElementRefs(page);
426
-
427
- // Extract state in parallel
428
- const [url, title, elements, accessibilityTree, scrollPosition, tabs] =
429
- await Promise.all([
430
- page.url(),
431
- page.title(),
432
- includeElements ? extractInteractiveElements(page) : [],
433
- includeTree ? buildAccessibilityTree(page, treeLimit) : "",
434
- getScrollPosition(page),
435
- getTabsInfo(context, page),
436
- ]);
606
+ // Extract state in parallel - NO DOM MODIFICATION
607
+ // Always rebuild refs even if elements aren't returned.
608
+ const [
609
+ url,
610
+ title,
611
+ elementsSnapshot,
612
+ accessibilityTree,
613
+ scrollPosition,
614
+ tabs,
615
+ ] = await Promise.all([
616
+ page.url(),
617
+ page.title(),
618
+ extractInteractiveElements(page, refStore),
619
+ includeTree ? buildAccessibilityTree(page, treeLimit) : "",
620
+ getScrollPosition(page),
621
+ getTabsInfo(context, page),
622
+ ]);
623
+ const elements = includeElements ? elementsSnapshot : [];
437
624
 
438
625
  // Optional screenshot
439
626
  let screenshot: string | undefined;
@@ -509,94 +696,11 @@ function sliceTree(
509
696
  }
510
697
 
511
698
  /**
512
- * Inject data-ref attributes into the page for element targeting
513
- * Returns the number of elements tagged
699
+ * @deprecated No longer injects refs into DOM - refs are now stored server-side
700
+ * This function is kept for backwards compatibility but does nothing
514
701
  */
515
- export async function injectElementRefs(page: Page): Promise<number> {
516
- return await page.evaluate((selector) => {
517
- const elements = Array.from(document.querySelectorAll(selector));
518
- const used = new Set<string>();
519
- const counters: Record<string, number> = {};
520
-
521
- const normalizeBase = (value: string) => {
522
- const trimmed = value.trim().toLowerCase();
523
- const normalized = trimmed.replace(/[^a-z0-9_-]+/g, "-");
524
- return normalized.length > 0 ? normalized : "element";
525
- };
526
-
527
- const getElementBase = (el: HTMLElement) => {
528
- const role = el.getAttribute("role");
529
- if (role) {
530
- return normalizeBase(role);
531
- }
532
- const tag = el.tagName.toLowerCase();
533
- if (tag === "a") return "link";
534
- if (tag === "button") return "button";
535
- if (tag === "input") {
536
- const type = (el as HTMLInputElement).type;
537
- if (type === "checkbox") return "checkbox";
538
- if (type === "radio") return "radio";
539
- if (type === "submit" || type === "button") return "button";
540
- return "input";
541
- }
542
- if (tag === "textarea") return "textarea";
543
- if (tag === "select") return "select";
544
- return normalizeBase(tag);
545
- };
546
-
547
- document.querySelectorAll("[data-ref]").forEach((el) => {
548
- const ref = el.getAttribute("data-ref");
549
- if (ref) {
550
- used.add(ref);
551
- const match = ref.match(/^([a-z0-9_-]+)_(\d+)$/i);
552
- if (match) {
553
- const base = match[1];
554
- const index = Number(match[2]);
555
- if (!Number.isNaN(index)) {
556
- counters[base] = Math.max(counters[base] ?? 0, index + 1);
557
- }
558
- }
559
- }
560
- });
561
-
562
- let index = 0;
563
-
564
- for (const el of elements) {
565
- const htmlEl = el as HTMLElement;
566
- let ref = htmlEl.getAttribute("data-ref");
567
-
568
- // Skip hidden elements unless they already have a stable ref.
569
- const style = window.getComputedStyle(htmlEl);
570
- if (!ref) {
571
- if (style.display === "none" || style.visibility === "hidden") {
572
- continue;
573
- }
574
-
575
- const rect = htmlEl.getBoundingClientRect();
576
- if (rect.width === 0 && rect.height === 0) {
577
- const tag = htmlEl.tagName.toLowerCase();
578
- if (!["input", "textarea", "select"].includes(tag)) {
579
- continue;
580
- }
581
- }
582
- }
583
-
584
- if (!ref) {
585
- const base = getElementBase(htmlEl);
586
- let next = counters[base] ?? 0;
587
- while (used.has(`${base}_${next}`)) {
588
- next++;
589
- }
590
- ref = `${base}_${next}`;
591
- counters[base] = next + 1;
592
- used.add(ref);
593
- htmlEl.setAttribute("data-ref", ref);
594
- }
595
-
596
- htmlEl.setAttribute("data-index", String(index));
597
- index++;
598
- }
599
-
600
- return used.size;
601
- }, INTERACTIVE_SELECTORS);
702
+ export async function injectElementRefs(_page: Page): Promise<number> {
703
+ // No-op: refs are now stored server-side in ElementRefStore
704
+ // This function is kept for API compatibility but should not be used
705
+ return 0;
602
706
  }
package/src/types.ts CHANGED
@@ -7,6 +7,8 @@ export interface BrowserConfig {
7
7
  executablePath?: string;
8
8
  /** Prefer system Chrome/Chromium over bundled Playwright (default: true) */
9
9
  useSystemChrome?: boolean;
10
+ /** Allow system Chrome in headless mode on macOS (default: false) */
11
+ allowSystemChromeHeadless?: boolean;
10
12
  /** Viewport width (default: 1280) */
11
13
  viewportWidth?: number;
12
14
  /** Viewport height (default: 720) */