agent-browser-loop 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -172,6 +172,8 @@ export default defineBrowserConfig({
172
172
  });
173
173
  ```
174
174
 
175
+ On macOS, headless system Chrome can crash during AppKit startup. By default, the CLI falls back to bundled Playwright Chromium when `headless: true`. If you explicitly want system Chrome in headless mode, set `allowSystemChromeHeadless: true`.
176
+
175
177
  ## What This Is NOT For
176
178
 
177
179
  This tool is for agents to test their own code. It is **not** for:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-browser-loop",
3
- "version": "0.2.1",
3
+ "version": "0.2.2",
4
4
  "description": "Let your AI coding agent drive a browser to verify its own work",
5
5
  "license": "MIT",
6
6
  "author": "Jason Silberman",
package/src/actions.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import type { Page, Request } from "playwright";
2
+ import type { ElementRefStore } from "./ref-store";
2
3
  import type {
3
4
  ClickOptions,
4
5
  NavigateOptions,
@@ -7,27 +8,26 @@ import type {
7
8
  } from "./types";
8
9
 
9
10
  /**
10
- * Get a locator for an element by ref or index
11
- * After calling getState(), elements have data-ref attributes injected
11
+ * Get a locator for an element by ref or index using the ref store
12
+ * The ref store contains selectors generated during getState()
12
13
  */
13
- function getLocator(page: Page, options: { ref?: string; index?: number }) {
14
- if (options.ref) {
15
- return page.locator(`[data-ref="${options.ref}"]`);
16
- }
17
- if (options.index !== undefined) {
18
- // Use data-index (injected by getState). Fallback to legacy e{index} refs.
19
- return page.locator(
20
- `[data-index="${options.index}"], [data-ref="e${options.index}"]`,
21
- );
22
- }
23
- throw new Error("Must provide either ref or index");
14
+ async function getLocator(
15
+ page: Page,
16
+ refStore: ElementRefStore,
17
+ options: { ref?: string; index?: number },
18
+ ) {
19
+ return await refStore.resolveLocator(page, options);
24
20
  }
25
21
 
26
22
  /**
27
23
  * Click an element
28
24
  */
29
- export async function click(page: Page, options: ClickOptions): Promise<void> {
30
- const locator = getLocator(page, options);
25
+ export async function click(
26
+ page: Page,
27
+ refStore: ElementRefStore,
28
+ options: ClickOptions,
29
+ ): Promise<void> {
30
+ const locator = await getLocator(page, refStore, options);
31
31
 
32
32
  const clickOptions: Parameters<typeof locator.click>[0] = {
33
33
  button: options.button,
@@ -44,8 +44,12 @@ export async function click(page: Page, options: ClickOptions): Promise<void> {
44
44
  /**
45
45
  * Type text into an element
46
46
  */
47
- export async function type(page: Page, options: TypeOptions): Promise<void> {
48
- const locator = getLocator(page, options);
47
+ export async function type(
48
+ page: Page,
49
+ refStore: ElementRefStore,
50
+ options: TypeOptions,
51
+ ): Promise<void> {
52
+ const locator = await getLocator(page, refStore, options);
49
53
 
50
54
  // Clear existing text if requested
51
55
  if (options.clear) {
@@ -129,9 +133,10 @@ export async function waitForElement(
129
133
  */
130
134
  export async function hover(
131
135
  page: Page,
136
+ refStore: ElementRefStore,
132
137
  options: { ref?: string; index?: number },
133
138
  ): Promise<void> {
134
- const locator = getLocator(page, options);
139
+ const locator = await getLocator(page, refStore, options);
135
140
  await locator.hover();
136
141
  }
137
142
 
@@ -140,9 +145,10 @@ export async function hover(
140
145
  */
141
146
  export async function select(
142
147
  page: Page,
148
+ refStore: ElementRefStore,
143
149
  options: { ref?: string; index?: number; value: string | string[] },
144
150
  ): Promise<void> {
145
- const locator = getLocator(page, options);
151
+ const locator = await getLocator(page, refStore, options);
146
152
  await locator.selectOption(options.value);
147
153
  }
148
154
 
package/src/browser.ts CHANGED
@@ -3,6 +3,7 @@ import { chromium } from "playwright";
3
3
  import * as actions from "./actions";
4
4
  import { findChromeExecutable } from "./chrome";
5
5
  import { log } from "./log";
6
+ import { ElementRefStore } from "./ref-store";
6
7
  import { formatStateText, getState } from "./state";
7
8
  import type {
8
9
  BrowserConfig,
@@ -33,12 +34,14 @@ export class AgentBrowser {
33
34
  private networkLogLimit: number;
34
35
  private usePersistentContext = false;
35
36
  private lastState: BrowserState | null = null;
37
+ private refStore: ElementRefStore = new ElementRefStore();
36
38
 
37
39
  constructor(options: AgentBrowserOptions = {}) {
38
40
  this.config = {
39
41
  headless: options.headless ?? true,
40
42
  executablePath: options.executablePath,
41
43
  useSystemChrome: options.useSystemChrome ?? true,
44
+ allowSystemChromeHeadless: options.allowSystemChromeHeadless,
42
45
  viewportWidth: options.viewportWidth ?? 1280,
43
46
  viewportHeight: options.viewportHeight ?? 720,
44
47
  userDataDir: options.userDataDir,
@@ -60,8 +63,27 @@ export class AgentBrowser {
60
63
  throw new Error("Browser already started");
61
64
  }
62
65
 
63
- const resolvedExecutablePath = this.config.useSystemChrome
64
- ? this.config.executablePath || findChromeExecutable()
66
+ const isDarwin = process.platform === "darwin";
67
+ let useSystemChrome = this.config.useSystemChrome ?? true;
68
+ let executablePath = this.config.executablePath;
69
+
70
+ if (
71
+ isDarwin &&
72
+ this.config.headless &&
73
+ (useSystemChrome || executablePath) &&
74
+ !this.config.allowSystemChromeHeadless
75
+ ) {
76
+ log
77
+ .withMetadata({ executablePath })
78
+ .warn(
79
+ "Headless system Chrome can crash on macOS. Falling back to bundled Chromium. Set allowSystemChromeHeadless to true to override.",
80
+ );
81
+ useSystemChrome = false;
82
+ executablePath = undefined;
83
+ }
84
+
85
+ const resolvedExecutablePath = useSystemChrome
86
+ ? executablePath || findChromeExecutable()
65
87
  : undefined;
66
88
 
67
89
  log
@@ -160,6 +182,7 @@ export class AgentBrowser {
160
182
  this.networkLogs = [];
161
183
  this.networkCaptureEnabled = false;
162
184
  this.usePersistentContext = false;
185
+ this.refStore.clear();
163
186
  }
164
187
 
165
188
  /**
@@ -190,15 +213,21 @@ export class AgentBrowser {
190
213
  options?: Omit<NavigateOptions, "url">,
191
214
  ): Promise<void> {
192
215
  await actions.navigate(this.getPage(), { url, ...options });
216
+ this.refStore.clear();
193
217
  }
194
218
 
195
219
  /**
196
220
  * Get rich state of the current page
197
- * Also injects data-ref attributes for element targeting
221
+ * Stores element refs server-side (no DOM modification)
198
222
  */
199
223
  async getState(options?: GetStateOptions): Promise<BrowserState> {
200
- // getState now handles ref injection internally
201
- const state = await getState(this.getPage(), this.getContext(), options);
224
+ // getState now stores refs in this.refStore instead of injecting into DOM
225
+ const state = await getState(
226
+ this.getPage(),
227
+ this.getContext(),
228
+ this.refStore,
229
+ options,
230
+ );
202
231
  const result = {
203
232
  ...state,
204
233
  errors: {
@@ -254,14 +283,14 @@ export class AgentBrowser {
254
283
  * Click an element
255
284
  */
256
285
  async click(options: ClickOptions): Promise<void> {
257
- await actions.click(this.getPage(), options);
286
+ await actions.click(this.getPage(), this.refStore, options);
258
287
  }
259
288
 
260
289
  /**
261
290
  * Type text into an element
262
291
  */
263
292
  async type(options: TypeOptions): Promise<void> {
264
- await actions.type(this.getPage(), options);
293
+ await actions.type(this.getPage(), this.refStore, options);
265
294
  }
266
295
 
267
296
  /**
@@ -429,7 +458,7 @@ export class AgentBrowser {
429
458
  * Hover over an element
430
459
  */
431
460
  async hover(options: { ref?: string; index?: number }): Promise<void> {
432
- await actions.hover(this.getPage(), options);
461
+ await actions.hover(this.getPage(), this.refStore, options);
433
462
  }
434
463
 
435
464
  /**
@@ -440,7 +469,7 @@ export class AgentBrowser {
440
469
  index?: number;
441
470
  value: string | string[];
442
471
  }): Promise<void> {
443
- await actions.select(this.getPage(), options);
472
+ await actions.select(this.getPage(), this.refStore, options);
444
473
  }
445
474
 
446
475
  /**
@@ -554,6 +583,13 @@ export class AgentBrowser {
554
583
  }
555
584
  return state;
556
585
  }
586
+
587
+ /**
588
+ * Get the element ref store (for advanced usage/testing)
589
+ */
590
+ getRefStore(): ElementRefStore {
591
+ return this.refStore;
592
+ }
557
593
  }
558
594
 
559
595
  /**
package/src/cli.ts CHANGED
@@ -147,6 +147,7 @@ async function resolveBrowserOptions(args: {
147
147
  headless,
148
148
  executablePath: config?.executablePath,
149
149
  useSystemChrome,
150
+ allowSystemChromeHeadless: config?.allowSystemChromeHeadless,
150
151
  viewportWidth: config?.viewportWidth,
151
152
  viewportHeight: config?.viewportHeight,
152
153
  userDataDir: config?.userDataDir,
@@ -321,7 +322,10 @@ const openCommand = command({
321
322
  json: jsonFlag,
322
323
  },
323
324
  handler: async (args) => {
324
- const browserOptions = await resolveBrowserOptions(args);
325
+ const browserOptions = await resolveBrowserOptions({
326
+ ...args,
327
+ configPath: args.config,
328
+ });
325
329
 
326
330
  let client: DaemonClient;
327
331
  if (args.new) {
@@ -386,7 +390,10 @@ const actCommand = command({
386
390
  process.exit(1);
387
391
  }
388
392
 
389
- const browserOptions = await resolveBrowserOptions(args);
393
+ const browserOptions = await resolveBrowserOptions({
394
+ ...args,
395
+ configPath: args.config,
396
+ });
390
397
 
391
398
  let client: DaemonClient;
392
399
  if (args.new) {
@@ -833,6 +840,7 @@ const serverCommand = command({
833
840
  headless,
834
841
  executablePath: args.executablePath ?? config?.executablePath,
835
842
  useSystemChrome,
843
+ allowSystemChromeHeadless: config?.allowSystemChromeHeadless,
836
844
  viewportWidth: args.viewportWidth || config?.viewportWidth,
837
845
  viewportHeight: args.viewportHeight || config?.viewportHeight,
838
846
  userDataDir: args.userDataDir ?? config?.userDataDir,
package/src/config.ts CHANGED
@@ -32,6 +32,7 @@ export const browserCliConfigSchema = z.looseObject({
32
32
  headless: z.boolean().optional(),
33
33
  executablePath: z.string().optional(),
34
34
  useSystemChrome: z.boolean().optional(),
35
+ allowSystemChromeHeadless: z.boolean().optional(),
35
36
  viewportWidth: z.number().int().optional(),
36
37
  viewportHeight: z.number().int().optional(),
37
38
  userDataDir: z.string().optional(),
package/src/daemon.ts CHANGED
@@ -239,14 +239,6 @@ export async function startDaemon(options: DaemonOptions = {}): Promise<void> {
239
239
  return session;
240
240
  }
241
241
 
242
- function getSession(sessionId: string): DaemonSession {
243
- const session = sessions.get(sessionId);
244
- if (!session) {
245
- throw new Error(`Session not found: ${sessionId}`);
246
- }
247
- return session;
248
- }
249
-
250
242
  function getOrDefaultSession(sessionId?: string): DaemonSession {
251
243
  const id = sessionId ?? "default";
252
244
  const session = sessions.get(id);
@@ -333,16 +325,13 @@ export async function startDaemon(options: DaemonOptions = {}): Promise<void> {
333
325
  parseResult.data,
334
326
  sessions,
335
327
  createSession,
336
- getSession,
337
328
  getOrDefaultSession,
338
329
  closeSession,
339
- idGenerator,
340
- defaultOptions,
341
330
  );
342
331
 
343
332
  // Handle shutdown
344
333
  if (parseResult.data.type === "shutdown") {
345
- socket.write(JSON.stringify(response) + "\n");
334
+ socket.write(`${JSON.stringify(response)}\n`);
346
335
  shutdown();
347
336
  return;
348
337
  }
@@ -355,7 +344,7 @@ export async function startDaemon(options: DaemonOptions = {}): Promise<void> {
355
344
  };
356
345
  }
357
346
 
358
- socket.write(JSON.stringify(response) + "\n");
347
+ socket.write(`${JSON.stringify(response)}\n`);
359
348
  }
360
349
  });
361
350
 
@@ -411,11 +400,8 @@ async function handleRequest(
411
400
  sessionId?: string,
412
401
  options?: AgentBrowserOptions,
413
402
  ) => Promise<DaemonSession>,
414
- getSession: (sessionId: string) => DaemonSession,
415
403
  getOrDefaultSession: (sessionId?: string) => DaemonSession,
416
404
  closeSession: (sessionId: string) => Promise<void>,
417
- idGenerator: ReturnType<typeof createIdGenerator>,
418
- defaultOptions: AgentBrowserOptions,
419
405
  ): Promise<DaemonResponse> {
420
406
  const { id } = request;
421
407
 
@@ -613,7 +599,7 @@ export class DaemonClient {
613
599
  let buffer = "";
614
600
 
615
601
  socket.on("connect", () => {
616
- socket.write(JSON.stringify(request) + "\n");
602
+ socket.write(`${JSON.stringify(request)}\n`);
617
603
  });
618
604
 
619
605
  socket.on("data", (data) => {
@@ -914,7 +900,7 @@ async function spawnDaemon(
914
900
 
915
901
  const child = spawn(
916
902
  process.execPath,
917
- ["--bun", import.meta.dirname + "/daemon-entry.ts", "--config", configPath],
903
+ ["--bun", `${import.meta.dirname}/daemon-entry.ts`, "--config", configPath],
918
904
  {
919
905
  detached: true,
920
906
  stdio: "ignore",
package/src/index.ts CHANGED
@@ -35,6 +35,9 @@ export {
35
35
  isDaemonRunning,
36
36
  startDaemon,
37
37
  } from "./daemon";
38
+ export type { ElementSelectors, StoredElementRef } from "./ref-store";
39
+ // Ref store for server-side element reference management
40
+ export { ElementRefStore } from "./ref-store";
38
41
  // Server
39
42
  export type { BrowserServerConfig } from "./server";
40
43
  export { startBrowserServer } from "./server";
@@ -0,0 +1,216 @@
1
+ import type { Locator, Page } from "playwright";
2
+
3
+ /**
4
+ * Selector strategies for locating an element
5
+ * Multiple strategies provide resilience if one fails
6
+ */
7
+ export interface ElementSelectors {
8
+ /** XPath from document root */
9
+ xpath: string;
10
+ /** CSS selector path */
11
+ cssPath: string;
12
+ /** Fingerprint-based selector using stable attributes */
13
+ fingerprint?: string;
14
+ }
15
+
16
+ /**
17
+ * Stored reference to an element
18
+ */
19
+ export interface StoredElementRef {
20
+ /** The ref string (e.g., "button_0") */
21
+ ref: string;
22
+ /** Sequential index */
23
+ index: number;
24
+ /** Multiple selector strategies */
25
+ selectors: ElementSelectors;
26
+ /** Element fingerprint for validation */
27
+ fingerprint: {
28
+ tagName: string;
29
+ role?: string;
30
+ type?: string;
31
+ name?: string;
32
+ placeholder?: string;
33
+ };
34
+ }
35
+
36
+ /**
37
+ * Server-side store for element references
38
+ * Avoids DOM modification that causes React hydration errors
39
+ */
40
+ export class ElementRefStore {
41
+ private refMap = new Map<string, StoredElementRef>();
42
+ private indexMap = new Map<number, StoredElementRef>();
43
+ private snapshotVersion = 0;
44
+
45
+ /**
46
+ * Clear all stored refs (call before new snapshot)
47
+ */
48
+ clear(): void {
49
+ this.refMap.clear();
50
+ this.indexMap.clear();
51
+ this.snapshotVersion++;
52
+ }
53
+
54
+ /**
55
+ * Get current snapshot version
56
+ */
57
+ getVersion(): number {
58
+ return this.snapshotVersion;
59
+ }
60
+
61
+ /**
62
+ * Store a ref for an element
63
+ */
64
+ set(
65
+ ref: string,
66
+ index: number,
67
+ selectors: ElementSelectors,
68
+ fingerprint: StoredElementRef["fingerprint"],
69
+ ): void {
70
+ const stored: StoredElementRef = { ref, index, selectors, fingerprint };
71
+ this.refMap.set(ref, stored);
72
+ this.indexMap.set(index, stored);
73
+ }
74
+
75
+ /**
76
+ * Get stored ref by ref string
77
+ */
78
+ getByRef(ref: string): StoredElementRef | undefined {
79
+ return this.refMap.get(ref);
80
+ }
81
+
82
+ /**
83
+ * Get stored ref by index
84
+ */
85
+ getByIndex(index: number): StoredElementRef | undefined {
86
+ return this.indexMap.get(index);
87
+ }
88
+
89
+ /**
90
+ * Resolve a Playwright locator for an element by ref or index
91
+ */
92
+ async resolveLocator(
93
+ page: Page,
94
+ options: { ref?: string; index?: number },
95
+ ): Promise<Locator> {
96
+ let stored: StoredElementRef | undefined;
97
+
98
+ if (options.ref) {
99
+ stored = this.refMap.get(options.ref);
100
+ if (!stored) {
101
+ throw new Error(
102
+ `Unknown ref: ${options.ref}. Call getState() first to snapshot elements.`,
103
+ );
104
+ }
105
+ } else if (options.index !== undefined) {
106
+ stored = this.indexMap.get(options.index);
107
+ if (!stored) {
108
+ throw new Error(
109
+ `Unknown index: ${options.index}. Call getState() first to snapshot elements.`,
110
+ );
111
+ }
112
+ } else {
113
+ throw new Error("Must provide either ref or index");
114
+ }
115
+
116
+ const pickMatching = async (locator: Locator): Promise<Locator | null> => {
117
+ const count = await locator.count();
118
+ if (count === 0) {
119
+ return null;
120
+ }
121
+
122
+ for (let i = 0; i < count; i++) {
123
+ const candidate = locator.nth(i);
124
+ const matches = await candidate.evaluate((el, fingerprint) => {
125
+ const element = el as HTMLElement;
126
+ if (
127
+ fingerprint.tagName &&
128
+ element.tagName.toLowerCase() !== fingerprint.tagName
129
+ ) {
130
+ return false;
131
+ }
132
+ if (
133
+ fingerprint.role &&
134
+ element.getAttribute("role") !== fingerprint.role
135
+ ) {
136
+ return false;
137
+ }
138
+ if (
139
+ fingerprint.type &&
140
+ element.getAttribute("type") !== fingerprint.type
141
+ ) {
142
+ return false;
143
+ }
144
+ if (
145
+ fingerprint.name &&
146
+ element.getAttribute("name") !== fingerprint.name
147
+ ) {
148
+ return false;
149
+ }
150
+ if (
151
+ fingerprint.placeholder &&
152
+ element.getAttribute("placeholder") !== fingerprint.placeholder
153
+ ) {
154
+ return false;
155
+ }
156
+ return true;
157
+ }, stored!.fingerprint);
158
+
159
+ if (matches) {
160
+ return candidate;
161
+ }
162
+ }
163
+
164
+ return null;
165
+ };
166
+
167
+ const selectors = stored.selectors;
168
+
169
+ const xpathLocator = page.locator(`xpath=${selectors.xpath}`);
170
+ const xpathMatch = await pickMatching(xpathLocator);
171
+ if (xpathMatch) {
172
+ return xpathMatch;
173
+ }
174
+
175
+ const cssLocator = page.locator(selectors.cssPath);
176
+ const cssMatch = await pickMatching(cssLocator);
177
+ if (cssMatch) {
178
+ return cssMatch;
179
+ }
180
+
181
+ let fingerprintLocator: Locator | null = null;
182
+ if (selectors.fingerprint) {
183
+ const tagPrefix = stored.fingerprint.tagName || "";
184
+ const fingerprintSelector = selectors.fingerprint.startsWith("[")
185
+ ? `${tagPrefix}${selectors.fingerprint}`
186
+ : selectors.fingerprint;
187
+ fingerprintLocator = page.locator(fingerprintSelector);
188
+ const fingerprintMatch = await pickMatching(fingerprintLocator);
189
+ if (fingerprintMatch) {
190
+ return fingerprintMatch;
191
+ }
192
+ }
193
+
194
+ // Last resort: fall back to first match from the best available selector.
195
+ if (await xpathLocator.count()) {
196
+ return xpathLocator.first();
197
+ }
198
+ if (await cssLocator.count()) {
199
+ return cssLocator.first();
200
+ }
201
+ if (fingerprintLocator && (await fingerprintLocator.count())) {
202
+ return fingerprintLocator.first();
203
+ }
204
+
205
+ throw new Error(
206
+ `Unable to resolve element for ref ${stored.ref}. Call getState() again to refresh element refs.`,
207
+ );
208
+ }
209
+
210
+ /**
211
+ * Get all stored refs
212
+ */
213
+ getAllRefs(): StoredElementRef[] {
214
+ return Array.from(this.refMap.values());
215
+ }
216
+ }
package/src/state.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import type { BrowserContext, Page } from "playwright";
2
+ import type { ElementRefStore, ElementSelectors } from "./ref-store";
2
3
  import type {
3
4
  BrowserState,
4
5
  GetStateOptions,
@@ -31,7 +32,7 @@ const INTERACTIVE_SELECTORS = [
31
32
  "[tabindex]",
32
33
  ].join(", ");
33
34
 
34
- interface ElementInfo {
35
+ interface RawElementInfo {
35
36
  tag: string;
36
37
  role: string;
37
38
  name: string;
@@ -40,23 +41,119 @@ interface ElementInfo {
40
41
  enabled: boolean;
41
42
  attributes: Record<string, string>;
42
43
  boundingBox: { x: number; y: number; width: number; height: number } | null;
43
- }
44
-
45
- interface ElementInfoWithRef extends ElementInfo {
46
- ref: string;
44
+ // Selector info for server-side storage
45
+ xpath: string;
46
+ cssPath: string;
47
+ fingerprint: string | null;
48
+ // For generating ref
49
+ refBase: string;
50
+ // Fingerprint data for validation
51
+ fingerprintData: {
52
+ tagName: string;
53
+ role?: string;
54
+ type?: string;
55
+ name?: string;
56
+ placeholder?: string;
57
+ };
47
58
  }
48
59
 
49
60
  /**
50
- * Extract interactive elements from the page using DOM queries
51
- * Assumes injectElementRefs has already been called
61
+ * Extract interactive elements from the page WITHOUT modifying the DOM
62
+ * Returns raw element info including selectors for server-side ref storage
52
63
  */
53
- async function extractInteractiveElements(
64
+ async function extractInteractiveElementsRaw(
54
65
  page: Page,
55
- ): Promise<InteractiveElement[]> {
56
- const elementInfos = await page.evaluate((selector) => {
57
- // Only get elements that match interactive selectors and have data-ref
58
- const elements = Array.from(document.querySelectorAll(selector));
59
- const results: ElementInfoWithRef[] = [];
66
+ ): Promise<RawElementInfo[]> {
67
+ return await page.evaluate((selector) => {
68
+ // Helper functions (must be inside evaluate)
69
+ const generateXPath = (element: Element): string => {
70
+ const parts: string[] = [];
71
+ let current: Element | null = element;
72
+
73
+ while (current && current.nodeType === Node.ELEMENT_NODE) {
74
+ let index = 1;
75
+ let sibling: Element | null = current.previousElementSibling;
76
+
77
+ while (sibling) {
78
+ if (sibling.tagName === current.tagName) {
79
+ index++;
80
+ }
81
+ sibling = sibling.previousElementSibling;
82
+ }
83
+
84
+ const tagName = current.tagName.toLowerCase();
85
+ parts.unshift(`${tagName}[${index}]`);
86
+ current = current.parentElement;
87
+ }
88
+
89
+ return `/${parts.join("/")}`;
90
+ };
91
+
92
+ const generateCssPath = (element: Element): string => {
93
+ const parts: string[] = [];
94
+ let current: Element | null = element;
95
+
96
+ while (current && current.nodeType === Node.ELEMENT_NODE) {
97
+ let selector = current.tagName.toLowerCase();
98
+
99
+ const id = current.getAttribute("id");
100
+ if (id) {
101
+ try {
102
+ if (document.querySelectorAll(`#${CSS.escape(id)}`).length === 1) {
103
+ parts.unshift(`#${CSS.escape(id)}`);
104
+ break;
105
+ }
106
+ } catch {
107
+ // Invalid ID, skip
108
+ }
109
+ }
110
+
111
+ const parent = current.parentElement;
112
+ if (parent) {
113
+ const siblings = Array.from(parent.children);
114
+ const sameTagSiblings = siblings.filter(
115
+ (s) => s.tagName === current!.tagName,
116
+ );
117
+ if (sameTagSiblings.length > 1) {
118
+ const index = sameTagSiblings.indexOf(current) + 1;
119
+ selector += `:nth-of-type(${index})`;
120
+ }
121
+ }
122
+
123
+ parts.unshift(selector);
124
+ current = current.parentElement;
125
+ }
126
+
127
+ return parts.join(" > ");
128
+ };
129
+
130
+ const generateFingerprint = (element: Element): string | null => {
131
+ const tag = element.tagName.toLowerCase();
132
+ const attrs: string[] = [];
133
+
134
+ const type = element.getAttribute("type");
135
+ const name = element.getAttribute("name");
136
+ const placeholder = element.getAttribute("placeholder");
137
+ const role = element.getAttribute("role");
138
+ const ariaLabel = element.getAttribute("aria-label");
139
+ const dataTestId = element.getAttribute("data-testid");
140
+
141
+ if (dataTestId) {
142
+ return `[data-testid="${CSS.escape(dataTestId)}"]`;
143
+ }
144
+
145
+ if (type) attrs.push(`[type="${CSS.escape(type)}"]`);
146
+ if (name) attrs.push(`[name="${CSS.escape(name)}"]`);
147
+ if (placeholder) attrs.push(`[placeholder="${CSS.escape(placeholder)}"]`);
148
+ if (role) attrs.push(`[role="${CSS.escape(role)}"]`);
149
+ if (ariaLabel) attrs.push(`[aria-label="${CSS.escape(ariaLabel)}"]`);
150
+
151
+ if (attrs.length === 0) {
152
+ return null;
153
+ }
154
+
155
+ return tag + attrs.join("");
156
+ };
60
157
 
61
158
  const normalizeText = (value?: string | null) =>
62
159
  value?.replace(/\s+/g, " ").trim() ?? "";
@@ -109,17 +206,52 @@ async function extractInteractiveElements(
109
206
  return "";
110
207
  };
111
208
 
209
+ const normalizeBase = (value: string) => {
210
+ const trimmed = value.trim().toLowerCase();
211
+ const normalized = trimmed.replace(/[^a-z0-9_-]+/g, "-");
212
+ return normalized.length > 0 ? normalized : "element";
213
+ };
214
+
215
+ const getElementBase = (el: HTMLElement) => {
216
+ const role = el.getAttribute("role");
217
+ if (role) {
218
+ return normalizeBase(role);
219
+ }
220
+ const tag = el.tagName.toLowerCase();
221
+ if (tag === "a") return "link";
222
+ if (tag === "button") return "button";
223
+ if (tag === "input") {
224
+ const type = (el as HTMLInputElement).type;
225
+ if (type === "checkbox") return "checkbox";
226
+ if (type === "radio") return "radio";
227
+ if (type === "submit" || type === "button") return "button";
228
+ return "input";
229
+ }
230
+ if (tag === "textarea") return "textarea";
231
+ if (tag === "select") return "select";
232
+ return normalizeBase(tag);
233
+ };
234
+
235
+ const elements = Array.from(document.querySelectorAll(selector));
236
+ const results: RawElementInfo[] = [];
237
+
112
238
  for (const el of elements) {
113
239
  const htmlEl = el as HTMLElement;
114
- const ref = htmlEl.getAttribute("data-ref");
115
- if (!ref) {
240
+
241
+ // Skip hidden elements
242
+ const style = window.getComputedStyle(htmlEl);
243
+ if (style.display === "none" || style.visibility === "hidden") {
116
244
  continue;
117
245
  }
118
246
 
119
- // Get bounding box
120
247
  const rect = htmlEl.getBoundingClientRect();
248
+ if (rect.width === 0 && rect.height === 0) {
249
+ const tag = htmlEl.tagName.toLowerCase();
250
+ if (!["input", "textarea", "select"].includes(tag)) {
251
+ continue;
252
+ }
253
+ }
121
254
 
122
- const style = window.getComputedStyle(htmlEl);
123
255
  const isVisible =
124
256
  style.display !== "none" &&
125
257
  style.visibility !== "hidden" &&
@@ -197,12 +329,16 @@ async function extractInteractiveElements(
197
329
  if (valueText) attributes.value = valueText;
198
330
  if (isChecked) attributes.checked = "true";
199
331
 
332
+ // Generate selectors
333
+ const xpath = generateXPath(htmlEl);
334
+ const cssPath = generateCssPath(htmlEl);
335
+ const fingerprint = generateFingerprint(htmlEl);
336
+
200
337
  results.push({
201
338
  tag: htmlEl.tagName.toLowerCase(),
202
339
  role,
203
340
  name: name || text.slice(0, 50),
204
341
  text,
205
- ref,
206
342
  visible: isVisible,
207
343
  enabled: !(htmlEl as HTMLInputElement).disabled,
208
344
  attributes,
@@ -212,24 +348,68 @@ async function extractInteractiveElements(
212
348
  width: rect.width,
213
349
  height: rect.height,
214
350
  },
351
+ xpath,
352
+ cssPath,
353
+ fingerprint,
354
+ refBase: getElementBase(htmlEl),
355
+ fingerprintData: {
356
+ tagName: htmlEl.tagName.toLowerCase(),
357
+ role: role || undefined,
358
+ type: htmlEl.getAttribute("type") || undefined,
359
+ name: fieldName || undefined,
360
+ placeholder: placeholder || undefined,
361
+ },
215
362
  });
216
363
  }
217
364
 
218
365
  return results;
219
366
  }, INTERACTIVE_SELECTORS);
367
+ }
220
368
 
221
- // Convert to InteractiveElement format, using ref from data-ref attribute
222
- return elementInfos.map((info, index) => ({
223
- index,
224
- role: info.role,
225
- name: info.name,
226
- text: info.text,
227
- ref: info.ref, // Use the actual ref from the DOM
228
- visible: info.visible,
229
- enabled: info.enabled,
230
- boundingBox: info.boundingBox === null ? undefined : info.boundingBox,
231
- attributes: info.attributes,
232
- }));
369
+ /**
370
+ * Extract interactive elements and store refs in the provided store
371
+ * Returns InteractiveElement array with assigned refs
372
+ */
373
+ export async function extractInteractiveElements(
374
+ page: Page,
375
+ refStore: ElementRefStore,
376
+ ): Promise<InteractiveElement[]> {
377
+ const rawElements = await extractInteractiveElementsRaw(page);
378
+
379
+ // Clear and rebuild the ref store
380
+ refStore.clear();
381
+
382
+ // Track used refs and counters for generating unique refs
383
+ const counters: Record<string, number> = {};
384
+
385
+ return rawElements.map((raw, index) => {
386
+ // Generate unique ref
387
+ const base = raw.refBase;
388
+ const counter = counters[base] ?? 0;
389
+ const ref = `${base}_${counter}`;
390
+ counters[base] = counter + 1;
391
+
392
+ // Store selectors for later resolution
393
+ const selectors: ElementSelectors = {
394
+ xpath: raw.xpath,
395
+ cssPath: raw.cssPath,
396
+ fingerprint: raw.fingerprint ?? undefined,
397
+ };
398
+
399
+ refStore.set(ref, index, selectors, raw.fingerprintData);
400
+
401
+ return {
402
+ index,
403
+ role: raw.role,
404
+ name: raw.name,
405
+ text: raw.text,
406
+ ref,
407
+ visible: raw.visible,
408
+ enabled: raw.enabled,
409
+ boundingBox: raw.boundingBox === null ? undefined : raw.boundingBox,
410
+ attributes: raw.attributes,
411
+ };
412
+ });
233
413
  }
234
414
 
235
415
  /**
@@ -400,10 +580,12 @@ export function formatStateText(state: BrowserState): string {
400
580
 
401
581
  /**
402
582
  * Get the current state of the browser/page
583
+ * Now requires a refStore to store element references server-side
403
584
  */
404
585
  export async function getState(
405
586
  page: Page,
406
587
  context: BrowserContext,
588
+ refStore: ElementRefStore,
407
589
  options: GetStateOptions = {},
408
590
  ): Promise<BrowserState> {
409
591
  const {
@@ -421,19 +603,24 @@ export async function getState(
421
603
  // Wait for page to be stable
422
604
  await page.waitForLoadState("domcontentloaded");
423
605
 
424
- // Inject refs first so extraction and targeting use same indices
425
- await injectElementRefs(page);
426
-
427
- // Extract state in parallel
428
- const [url, title, elements, accessibilityTree, scrollPosition, tabs] =
429
- await Promise.all([
430
- page.url(),
431
- page.title(),
432
- includeElements ? extractInteractiveElements(page) : [],
433
- includeTree ? buildAccessibilityTree(page, treeLimit) : "",
434
- getScrollPosition(page),
435
- getTabsInfo(context, page),
436
- ]);
606
+ // Extract state in parallel - NO DOM MODIFICATION
607
+ // Always rebuild refs even if elements aren't returned.
608
+ const [
609
+ url,
610
+ title,
611
+ elementsSnapshot,
612
+ accessibilityTree,
613
+ scrollPosition,
614
+ tabs,
615
+ ] = await Promise.all([
616
+ page.url(),
617
+ page.title(),
618
+ extractInteractiveElements(page, refStore),
619
+ includeTree ? buildAccessibilityTree(page, treeLimit) : "",
620
+ getScrollPosition(page),
621
+ getTabsInfo(context, page),
622
+ ]);
623
+ const elements = includeElements ? elementsSnapshot : [];
437
624
 
438
625
  // Optional screenshot
439
626
  let screenshot: string | undefined;
@@ -509,94 +696,11 @@ function sliceTree(
509
696
  }
510
697
 
511
698
  /**
512
- * Inject data-ref attributes into the page for element targeting
513
- * Returns the number of elements tagged
699
+ * @deprecated No longer injects refs into DOM - refs are now stored server-side
700
+ * This function is kept for backwards compatibility but does nothing
514
701
  */
515
- export async function injectElementRefs(page: Page): Promise<number> {
516
- return await page.evaluate((selector) => {
517
- const elements = Array.from(document.querySelectorAll(selector));
518
- const used = new Set<string>();
519
- const counters: Record<string, number> = {};
520
-
521
- const normalizeBase = (value: string) => {
522
- const trimmed = value.trim().toLowerCase();
523
- const normalized = trimmed.replace(/[^a-z0-9_-]+/g, "-");
524
- return normalized.length > 0 ? normalized : "element";
525
- };
526
-
527
- const getElementBase = (el: HTMLElement) => {
528
- const role = el.getAttribute("role");
529
- if (role) {
530
- return normalizeBase(role);
531
- }
532
- const tag = el.tagName.toLowerCase();
533
- if (tag === "a") return "link";
534
- if (tag === "button") return "button";
535
- if (tag === "input") {
536
- const type = (el as HTMLInputElement).type;
537
- if (type === "checkbox") return "checkbox";
538
- if (type === "radio") return "radio";
539
- if (type === "submit" || type === "button") return "button";
540
- return "input";
541
- }
542
- if (tag === "textarea") return "textarea";
543
- if (tag === "select") return "select";
544
- return normalizeBase(tag);
545
- };
546
-
547
- document.querySelectorAll("[data-ref]").forEach((el) => {
548
- const ref = el.getAttribute("data-ref");
549
- if (ref) {
550
- used.add(ref);
551
- const match = ref.match(/^([a-z0-9_-]+)_(\d+)$/i);
552
- if (match) {
553
- const base = match[1];
554
- const index = Number(match[2]);
555
- if (!Number.isNaN(index)) {
556
- counters[base] = Math.max(counters[base] ?? 0, index + 1);
557
- }
558
- }
559
- }
560
- });
561
-
562
- let index = 0;
563
-
564
- for (const el of elements) {
565
- const htmlEl = el as HTMLElement;
566
- let ref = htmlEl.getAttribute("data-ref");
567
-
568
- // Skip hidden elements unless they already have a stable ref.
569
- const style = window.getComputedStyle(htmlEl);
570
- if (!ref) {
571
- if (style.display === "none" || style.visibility === "hidden") {
572
- continue;
573
- }
574
-
575
- const rect = htmlEl.getBoundingClientRect();
576
- if (rect.width === 0 && rect.height === 0) {
577
- const tag = htmlEl.tagName.toLowerCase();
578
- if (!["input", "textarea", "select"].includes(tag)) {
579
- continue;
580
- }
581
- }
582
- }
583
-
584
- if (!ref) {
585
- const base = getElementBase(htmlEl);
586
- let next = counters[base] ?? 0;
587
- while (used.has(`${base}_${next}`)) {
588
- next++;
589
- }
590
- ref = `${base}_${next}`;
591
- counters[base] = next + 1;
592
- used.add(ref);
593
- htmlEl.setAttribute("data-ref", ref);
594
- }
595
-
596
- htmlEl.setAttribute("data-index", String(index));
597
- index++;
598
- }
599
-
600
- return used.size;
601
- }, INTERACTIVE_SELECTORS);
702
+ export async function injectElementRefs(_page: Page): Promise<number> {
703
+ // No-op: refs are now stored server-side in ElementRefStore
704
+ // This function is kept for API compatibility but should not be used
705
+ return 0;
602
706
  }
package/src/types.ts CHANGED
@@ -7,6 +7,8 @@ export interface BrowserConfig {
7
7
  executablePath?: string;
8
8
  /** Prefer system Chrome/Chromium over bundled Playwright (default: true) */
9
9
  useSystemChrome?: boolean;
10
+ /** Allow system Chrome in headless mode on macOS (default: false) */
11
+ allowSystemChromeHeadless?: boolean;
10
12
  /** Viewport width (default: 1280) */
11
13
  viewportWidth?: number;
12
14
  /** Viewport height (default: 720) */