playwright-genie 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -4,6 +4,9 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
4
  var __getOwnPropNames = Object.getOwnPropertyNames;
5
5
  var __getProtoOf = Object.getPrototypeOf;
6
6
  var __hasOwnProp = Object.prototype.hasOwnProperty;
7
+ var __esm = (fn, res) => function __init() {
8
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
9
+ };
7
10
  var __export = (target, all) => {
8
11
  for (var name in all)
9
12
  __defProp(target, name, { get: all[name], enumerable: true });
@@ -26,128 +29,16 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
26
29
  ));
27
30
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
28
31
 
29
- // src/entry.js
30
- var entry_exports = {};
31
- __export(entry_exports, {
32
- SmartAction: () => SmartAction,
33
- chatCompletion: () => chatCompletion,
34
- clearAllCaches: () => clearAllCaches,
35
- clearCache: () => clearCache,
36
- createSmartLocator: () => createSmartLocator,
37
- findAllMatches: () => findAllMatches,
38
- findLocator: () => findLocator,
39
- getConfig: () => getConfig,
40
- getLocator: () => getLocator,
41
- getPageStructure: () => getPageStructure,
42
- resolveLocator: () => resolveLocator,
43
- resolveLocatorsBatch: () => resolveLocatorsBatch
44
- });
45
- module.exports = __toCommonJS(entry_exports);
46
-
47
- // src/llm/client.js
48
- var import_config = require("dotenv/config");
49
- var import_openai = __toESM(require("openai"), 1);
50
- var DEBUG = process.env.LLM_LOCATOR_DEBUG === "true";
51
- var LLM_API_KEY = process.env.LLM_API_KEY || process.env.ROUTELLM_API_KEY || process.env.OPENAI_API_KEY || process.env.ANTHROPIC_API_KEY;
52
- var LLM_BASE_URL = process.env.LLM_BASE_URL || process.env.ROUTELLM_BASE_URL || "https://api.openai.com/v1";
53
- var LLM_MODEL = process.env.LLM_MODEL || process.env.ROUTELLM_MODEL || "gpt-4o-mini";
54
- if (!LLM_API_KEY) {
55
- throw new Error(
56
- "No LLM API key found. Set one of: LLM_API_KEY, ROUTELLM_API_KEY, OPENAI_API_KEY, or ANTHROPIC_API_KEY"
57
- );
58
- }
59
- var client = new import_openai.default({
60
- baseURL: LLM_BASE_URL,
61
- apiKey: LLM_API_KEY
62
- });
63
- async function chatCompletion(messages, options = {}) {
64
- const {
65
- model = LLM_MODEL,
66
- temperature = 0,
67
- maxTokens = 1024
68
- } = options;
69
- if (DEBUG) {
70
- console.log(`[LLM Client] model=${model} base=${LLM_BASE_URL} msgs=${messages.length}`);
71
- }
72
- const completion = await client.chat.completions.create({
73
- model,
74
- temperature,
75
- max_tokens: maxTokens,
76
- messages
77
- });
78
- return completion.choices[0].message.content.trim();
79
- }
80
- function getConfig() {
81
- return {
82
- apiKey: LLM_API_KEY ? "***" + LLM_API_KEY.slice(-4) : null,
83
- baseUrl: LLM_BASE_URL,
84
- model: LLM_MODEL
85
- };
86
- }
87
-
88
- // src/llm/prompt.js
89
- var SYSTEM_PROMPT = `You are an expert at generating Playwright locators from page structure.
90
- You receive a (possibly trimmed) accessibility tree in YAML and special DOM elements.
91
-
92
- CRITICAL RULES:
93
- 1. ONLY use exact text/names that EXIST in the provided structure. NEVER invent or echo the user's query as locator text.
94
- 2. The user's query is a DESCRIPTION of the element, NOT the element's text. You must FIND the matching element in the tree.
95
- Example: Query "Admin Tab" \u2192 find the link/tab named "Admin" in the tree \u2192 getByRole('link', { name: /Admin/i })
96
- Example: Query "login button" \u2192 find button named "Login" in the tree \u2192 getByRole('button', { name: /Login/i })
97
- 3. YAML format: "- link \\"Sign in\\"" means a link element with accessible name "Sign in"
98
- "- textbox \\"Username\\"" means a textbox with accessible name "Username"
99
- 4. Prefer getByRole with name regex: { name: /text/i } for robustness.
100
- 5. Words like "tab", "button", "link", "field", "textbox", "input" in the query describe the ELEMENT TYPE, not the text content.
101
-
102
- ACTION-AWARE RULES:
103
- - If action is "fill", "type", "clear", "press": The target MUST be an editable element (textbox, searchbox, combobox, input). Use getByRole('textbox', ...) or getByPlaceholder or getByLabel. NEVER target a label or static text.
104
- - If action is "click", "dblclick", "tap", "focus", "hover": Target the interactive element (link, button, tab, menuitem, etc).
105
- - If action is "check", "uncheck": Target a checkbox or radio element.
106
- - If action is "select": Target a combobox or listbox element.
107
- - If action is "getText" or null: Target any matching element.
108
-
109
- PRIORITY: getByTestId > getByRole > getByLabel > getByPlaceholder > getByAltText > getByTitle > getByText > locator(CSS)
110
- For iframes: page.frameLocator('sel').getByRole(...)
111
- For positional: .first() / .nth(n) / .last()
112
-
113
- Return ONLY valid JSON.`;
114
- function buildMessages(payloadStr, query, action) {
115
- const actionHint = action ? `
116
- Action: "${action}" (the locator MUST target an element appropriate for this action)` : "";
117
- return [
118
- { role: "system", content: SYSTEM_PROMPT },
119
- {
120
- role: "user",
121
- content: `Page:
122
- ${payloadStr}
123
-
124
- Find element described as: "${query}"${actionHint}
125
-
126
- IMPORTANT: Match the query to an ACTUAL element in the tree above. Do NOT use the query text as locator text.
127
-
128
- JSON: { "strategy":"\u2026", "locatorString":"page.getBy\u2026()", "isInFrame":false, "frameSelector":null, "confidence":0.95, "reasoning":"\u2026" }`
129
- }
130
- ];
131
- }
132
- function buildBatchMessages(payloadStr, queries) {
133
- const queriesList = queries.map((q, i) => `${i + 1}. "${q}"`).join("\n");
134
- return [
135
- { role: "system", content: SYSTEM_PROMPT },
136
- {
137
- role: "user",
138
- content: `Page:
139
- ${payloadStr}
140
-
141
- Find locators for ALL (match each query to ACTUAL elements in the tree, do NOT echo query text):
142
- ${queriesList}
143
-
144
- Return JSON array:
145
- [{ "query":"\u2026", "strategy":"\u2026", "locatorString":"page.getBy\u2026()", "isInFrame":false, "frameSelector":null, "confidence":0.95, "reasoning":"\u2026" }]`
146
- }
147
- ];
148
- }
149
-
150
32
  // src/llm/parser.js
33
+ var parser_exports = {};
34
+ __export(parser_exports, {
35
+ applyChain: () => applyChain,
36
+ createPlaywrightLocator: () => createPlaywrightLocator,
37
+ evalLocator: () => evalLocator,
38
+ parseJsonResponse: () => parseJsonResponse,
39
+ validateLocator: () => validateLocator,
40
+ validateWithFallback: () => validateWithFallback
41
+ });
151
42
  function parseJsonResponse(raw) {
152
43
  let text = raw.trim();
153
44
  if (text.startsWith("```")) {
@@ -167,17 +58,39 @@ function createPlaywrightLocator(page, locatorString, isInFrame, frameSelector)
167
58
  function parseInlineOptions(str) {
168
59
  if (!str) return {};
169
60
  const opts = {};
170
- const nameRegex = str.match(/name:\s*\/([^/]+)\/([gimsuy]*)/);
171
- const nameStr = str.match(/name:\s*'([^']+)'/);
61
+ let nameMatch = null;
62
+ const nameIdx = str.indexOf("name:");
63
+ if (nameIdx !== -1) {
64
+ const after = str.substring(nameIdx + 5).trim();
65
+ if (after.startsWith("/")) {
66
+ const lastSlash = after.lastIndexOf("/");
67
+ if (lastSlash > 0) {
68
+ const body = after.substring(1, lastSlash);
69
+ const flags = after.substring(lastSlash + 1).match(/^[gimsuy]*/)[0];
70
+ try {
71
+ opts.name = new RegExp(body, flags || void 0);
72
+ } catch {
73
+ opts.name = body;
74
+ }
75
+ }
76
+ } else if (after.startsWith("'")) {
77
+ const end = after.indexOf("'", 1);
78
+ if (end > 0) opts.name = after.substring(1, end);
79
+ }
80
+ }
172
81
  const exact = str.match(/exact:\s*(true|false)/);
173
- if (nameRegex) opts.name = new RegExp(nameRegex[1], nameRegex[2] || void 0);
174
- else if (nameStr) opts.name = nameStr[1];
175
82
  if (exact) opts.exact = exact[1] === "true";
176
83
  return opts;
177
84
  }
178
85
  function parseTextArg(quoted, regexBody, regexFlags) {
179
86
  if (quoted) return quoted;
180
- if (regexBody) return new RegExp(regexBody, regexFlags || void 0);
87
+ if (regexBody) {
88
+ try {
89
+ return new RegExp(regexBody, regexFlags || void 0);
90
+ } catch {
91
+ return regexBody;
92
+ }
93
+ }
181
94
  return null;
182
95
  }
183
96
  function parseFilterOptions(str) {
@@ -192,7 +105,7 @@ function parseFilterOptions(str) {
192
105
  function evalLocator(context, code) {
193
106
  const patterns = [
194
107
  {
195
- regex: /getByRole\('([^']+)'(?:,\s*({[^}]+}))?\)(.*)/,
108
+ regex: /getByRole\('([^']+)'(?:,\s*(\{.*\}))?\)(.*)/,
196
109
  handler: (m) => {
197
110
  const opts = parseInlineOptions(m[2]);
198
111
  return applyChain(context.getByRole(m[1], opts), m[3]);
@@ -242,9 +155,21 @@ function evalLocator(context, code) {
242
155
  return applyChain(context.getByTitle(t, opts), m[5]);
243
156
  }
244
157
  },
158
+ {
159
+ regex: /locator\('xpath=([^']+)'\)(.*)/,
160
+ handler: (m) => applyChain(context.locator(`xpath=${m[1]}`), m[2])
161
+ },
162
+ {
163
+ regex: /locator\("xpath=([^"]+)"\)(.*)/,
164
+ handler: (m) => applyChain(context.locator(`xpath=${m[1]}`), m[2])
165
+ },
245
166
  {
246
167
  regex: /locator\('([^']+)'\)(.*)/,
247
168
  handler: (m) => applyChain(context.locator(m[1]), m[2])
169
+ },
170
+ {
171
+ regex: /locator\("([^"]+)"\)(.*)/,
172
+ handler: (m) => applyChain(context.locator(m[1]), m[2])
248
173
  }
249
174
  ];
250
175
  for (const p of patterns) {
@@ -277,6 +202,261 @@ async function validateLocator(locator, timeoutMs = 3e3) {
277
202
  return false;
278
203
  }
279
204
  }
205
+ async function validateWithFallback(page, primary, fallbacks, isInFrame, frameSelector) {
206
+ const primaryLocator = createPlaywrightLocator(page, primary, isInFrame, frameSelector);
207
+ const valid = await validateLocator(primaryLocator);
208
+ if (valid) return { locator: primaryLocator, locatorString: primary };
209
+ if (fallbacks && fallbacks.length > 0) {
210
+ for (const fb of fallbacks) {
211
+ try {
212
+ const fbLocator = createPlaywrightLocator(page, fb, isInFrame, frameSelector);
213
+ const fbValid = await validateLocator(fbLocator, 2e3);
214
+ if (fbValid) return { locator: fbLocator, locatorString: fb };
215
+ } catch {
216
+ }
217
+ }
218
+ }
219
+ return null;
220
+ }
221
+ var init_parser = __esm({
222
+ "src/llm/parser.js"() {
223
+ }
224
+ });
225
+
226
+ // src/entry.js
227
+ var entry_exports = {};
228
+ __export(entry_exports, {
229
+ SmartAction: () => SmartAction,
230
+ chatCompletion: () => chatCompletion,
231
+ clearAllCaches: () => clearAllCaches,
232
+ clearCache: () => clearCache,
233
+ createSmartLocator: () => createSmartLocator,
234
+ findAllMatches: () => findAllMatches,
235
+ findLocator: () => findLocator,
236
+ getConfig: () => getConfig,
237
+ getLocator: () => getLocator,
238
+ getPageStructure: () => getPageStructure,
239
+ resolveLocator: () => resolveLocator,
240
+ resolveLocatorsBatch: () => resolveLocatorsBatch
241
+ });
242
+ module.exports = __toCommonJS(entry_exports);
243
+
244
+ // src/llm/client.js
245
+ var import_config = require("dotenv/config");
246
+ var import_openai = __toESM(require("openai"), 1);
247
+ var DEBUG = process.env.LLM_LOCATOR_DEBUG === "true";
248
+ var LLM_API_KEY = process.env.LLM_API_KEY || process.env.ROUTELLM_API_KEY || process.env.OPENAI_API_KEY || process.env.ANTHROPIC_API_KEY;
249
+ var LLM_BASE_URL = process.env.LLM_BASE_URL || process.env.ROUTELLM_BASE_URL || "https://api.openai.com/v1";
250
+ var LLM_MODEL = process.env.LLM_MODEL || process.env.ROUTELLM_MODEL || "gpt-4o-mini";
251
+ if (!LLM_API_KEY) {
252
+ throw new Error(
253
+ "No LLM API key found. Set one of: LLM_API_KEY, ROUTELLM_API_KEY, OPENAI_API_KEY, or ANTHROPIC_API_KEY"
254
+ );
255
+ }
256
+ var client = new import_openai.default({
257
+ baseURL: LLM_BASE_URL,
258
+ apiKey: LLM_API_KEY
259
+ });
260
+ async function chatCompletion(messages, options = {}) {
261
+ const {
262
+ model = LLM_MODEL,
263
+ temperature = 0,
264
+ maxTokens = 256
265
+ } = options;
266
+ if (DEBUG) {
267
+ console.log(`[LLM Client] model=${model} base=${LLM_BASE_URL} msgs=${messages.length}`);
268
+ }
269
+ const params = {
270
+ model,
271
+ temperature,
272
+ max_tokens: maxTokens,
273
+ messages
274
+ };
275
+ if (model.includes("gpt-") || model.includes("o1") || model.includes("o3") || model.includes("o4")) {
276
+ params.response_format = { type: "json_object" };
277
+ }
278
+ const completion = await client.chat.completions.create(params);
279
+ return completion.choices[0].message.content.trim();
280
+ }
281
+ function getConfig() {
282
+ return {
283
+ apiKey: LLM_API_KEY ? "***" + LLM_API_KEY.slice(-4) : null,
284
+ baseUrl: LLM_BASE_URL,
285
+ model: LLM_MODEL
286
+ };
287
+ }
288
+
289
+ // src/llm/prompt.js
290
+ var SYSTEM_PROMPT = `You are an expert at generating Playwright locators from page structure.
291
+ You receive a (possibly trimmed) accessibility tree in YAML, special DOM elements, and interactive DOM elements.
292
+
293
+ CRITICAL RULES:
294
+ 1. ONLY use exact text/names that EXIST in the provided structure. NEVER invent or echo the user's query as locator text.
295
+ 2. The user's query is a DESCRIPTION of the element, NOT the element's text. You must FIND the matching element in the tree.
296
+ Example: Query "Admin Tab" \u2192 find the link/tab named "Admin" in the tree \u2192 getByRole('link', { name: /Admin/i })
297
+ Example: Query "login button" \u2192 find button named "Login" in the tree \u2192 getByRole('button', { name: /Login/i })
298
+ 3. YAML format: "- link \\"Sign in\\"" means a link element with accessible name "Sign in"
299
+ "- textbox \\"Username\\"" means a textbox with accessible name "Username"
300
+ 4. Prefer getByRole with name regex: { name: /text/i } for robustness.
301
+ 5. Words like "tab", "button", "link", "field", "textbox", "input" in the query describe the ELEMENT TYPE, not the text content.
302
+ 6. NEVER invent accessible names. If an element appears in the aria tree WITHOUT a quoted name
303
+ (e.g., "textbox", "button", "checkbox", "combobox", "link" with no "Name" after it),
304
+ it has NO accessible name. Do NOT guess a name from nearby text, labels, cells, or headings.
305
+ Instead, find that element in interactiveElements by matching tag/type/position, then use its
306
+ id, htmlName, testId, or selector to build a CSS locator:
307
+ - page.locator('#escapedId')
308
+ - page.locator('[name="htmlName"]')
309
+ - page.locator('[data-test="value"]')
310
+ - page.locator('select.className')
311
+ Only use getByRole('role', { name: /text/i }) when the aria tree EXPLICITLY shows a quoted name
312
+ like: button "Submit", link "Home", textbox "Email".
313
+
314
+ INTERACTIVE ELEMENTS:
315
+ - The interactiveElements array contains DOM metadata for ALL interactive elements including unnamed ones.
316
+ - Each entry has: tag, type, role, id, classes, name, testId, text, options (for selects), selector, parentContext.
317
+ - When an element has NO accessible name in the aria tree, use interactiveElements to find it by id, testId, classes, or parent context.
318
+ - For <select> elements: use the "selector" or "id" field to build page.locator('#id') or page.getByTestId('...').
319
+ - For unnamed comboboxes: look at interactiveElements for a select/combobox with matching options, then use its id/testId/classes.
320
+
321
+ ACTION-AWARE RULES:
322
+ - If action is "fill", "type", "clear", "press": The target MUST be an editable element (textbox, searchbox, combobox, input). Use getByRole('textbox', ...) or getByPlaceholder or getByLabel. NEVER target a label or static text.
323
+ - If action is "click", "dblclick", "tap", "focus", "hover": Target the interactive element (link, button, tab, menuitem, etc).
324
+ - If action is "check", "uncheck": Target a checkbox or radio element.
325
+ - If action is "select": Target a combobox, listbox, or <select> element. If the element is a native <select>, prefer page.locator('#id') or page.locator('select.className').
326
+ - If action is "getText" or null: Target any matching element.
327
+
328
+ LOCATOR PRIORITY: getByTestId > getByRole > getByLabel > getByPlaceholder > getByAltText > getByTitle > getByText > locator(CSS)
329
+ FALLBACK: When no accessible name exists, use locator('CSS selector') with id, class, or data attribute from interactiveElements.
330
+ - When using test IDs, use the EXACT attribute from the page data.
331
+ - If data shows [data-test="x"], use page.locator('[data-test="x"]') NOT getByTestId('x').
332
+ - Only use getByTestId() if the attribute is literally "data-testid".
333
+ For iframes: page.frameLocator('sel').getByRole(...)
334
+ For positional: .first() / .nth(n) / .last()
335
+
336
+ Return ONLY valid JSON.`;
337
+ var DOM_SYSTEM_PROMPT = `You are an expert at generating Playwright locators from raw DOM structure.
338
+ You receive a DOM tree with element metadata (tag, id, classes, xpath, css selectors, text, attributes).
339
+ There is NO accessibility tree available \u2014 you must build locators purely from DOM data.
340
+
341
+ CRITICAL RULES:
342
+ 1. The user's query is a DESCRIPTION of the element, NOT the element's text. Match the description to DOM nodes.
343
+ 2. ONLY use data that EXISTS in the provided DOM structure. NEVER invent IDs, classes, or text.
344
+ 3. Each DOM node may have: tag, id, classes, role, testId ({attr, value}), ariaLabel, title, name, placeholder, text, href, xpath, css, label, type, value, disabled, checked, depth, interactive, heading, landmark.
345
+
346
+ LOCATOR STRATEGY (in priority order):
347
+ 1. Test ID: If testId exists, use page.locator('[<attr>="<value>"]') with the EXACT attribute name.
348
+ Only use page.getByTestId('x') when attr is literally "data-testid".
349
+ 2. ID: page.locator('#<id>') \u2014 most stable, prefer when available.
350
+ 3. Role + accessible name: page.getByRole('<role>', { name: /<text>/i }) \u2014 when role and ariaLabel/label/text exist.
351
+ 4. Label: page.getByLabel(/<text>/i) \u2014 for form fields with associated labels.
352
+ 5. Placeholder: page.getByPlaceholder(/<text>/i) \u2014 for inputs with placeholder text.
353
+ 6. Name attribute: page.locator('[name="<name>"]') \u2014 for form elements.
354
+ 7. CSS selector: page.locator('<css>') \u2014 use the "css" field from DOM data when available.
355
+ 8. XPath: page.locator('xpath=<xpath>') \u2014 use as last resort, take from "xpath" field.
356
+ 9. Compound CSS: Build from tag + class + structural position when no unique identifier exists.
357
+ Example: page.locator('div.container button.submit')
358
+ Example: page.locator('form#login input[type="email"]')
359
+
360
+ COMBINING SELECTORS FOR UNIQUENESS:
361
+ - If a single attribute isn't unique, combine: page.locator('form#login input[name="email"]')
362
+ - Use parent context from "depth" and surrounding nodes to disambiguate.
363
+ - Use .first() / .nth(n) / .last() when multiple matches exist.
364
+ - Use .filter({ hasText: /text/i }) to narrow results.
365
+
366
+ ACTION-AWARE RULES:
367
+ - If action is "fill", "type", "clear", "press": Target MUST be input, textarea, select, or [contenteditable]. NEVER target labels, divs, or spans.
368
+ - If action is "click": Target the clickable element (button, a, [role="button"], etc).
369
+ - If action is "check", "uncheck": Target checkbox or radio input.
370
+ - If action is "select": Target the <select> element or [role="combobox"]/[role="listbox"].
371
+
372
+ FALLBACK LOCATORS:
373
+ - Always provide 1-2 fallback locators in case the primary doesn't match.
374
+ - Use different strategies for fallbacks (e.g., primary=CSS, fallback=XPath).
375
+
376
+ For iframes: page.frameLocator('sel').locator(...)
377
+ Return ONLY valid JSON.`;
378
+ function buildMessages(pageStructure, query, action) {
379
+ const actionNote = action ? `
380
+ ACTION: The user wants to "${action}" this element.` : "";
381
+ return [
382
+ { role: "system", content: SYSTEM_PROMPT },
383
+ {
384
+ role: "user",
385
+ content: `Page structure:
386
+ ${pageStructure}
387
+
388
+ Find element: "${query}"${actionNote}
389
+
390
+ IMPORTANT: Find the element that matches the description "${query}". Do NOT use "${query}" as the locator text - find the ACTUAL element name/text from the page structure above.
391
+
392
+ Return JSON: {"locatorString":"page.getBy...(...)", "confidence": 0.0-1.0, "explanation":"...", "fallbackLocators":["page.locator(...)"]}`
393
+ }
394
+ ];
395
+ }
396
+ function buildDomMessages(pageStructure, query, action) {
397
+ const actionNote = action ? `
398
+ ACTION: The user wants to "${action}" this element.` : "";
399
+ return [
400
+ { role: "system", content: DOM_SYSTEM_PROMPT },
401
+ {
402
+ role: "user",
403
+ content: `DOM structure:
404
+ ${pageStructure}
405
+
406
+ Find element: "${query}"${actionNote}
407
+
408
+ INSTRUCTIONS:
409
+ - Search the domTree and interactiveElements for the element matching "${query}".
410
+ - Use the element's id, testId, css, xpath, name, classes, role, text, label, or placeholder to build a Playwright locator.
411
+ - Prefer stable selectors (id, testId, name) over positional ones (xpath, nth).
412
+ - Provide fallback locators using alternative strategies.
413
+
414
+ Return JSON: {"locatorString":"page.locator(...)", "confidence": 0.0-1.0, "explanation":"...", "fallbackLocators":["page.locator('xpath=...')"]}`
415
+ }
416
+ ];
417
+ }
418
+ function buildBatchMessages(pageStructure, queries) {
419
+ const queryList = queries.map((q, i) => `${i + 1}. "${q.query || q}" (action: ${q.action || "none"})`).join("\n");
420
+ return [
421
+ { role: "system", content: SYSTEM_PROMPT },
422
+ {
423
+ role: "user",
424
+ content: `Page structure:
425
+ ${pageStructure}
426
+
427
+ Find ALL of these elements:
428
+ ${queryList}
429
+
430
+ IMPORTANT: For each query, find the ACTUAL element name/text from the page structure. Do NOT echo the query as locator text.
431
+
432
+ Return JSON array: [{"query":"...","locatorString":"page.getBy...(...)", "confidence": 0.0-1.0, "explanation":"...", "fallbackLocators":["..."]}]`
433
+ }
434
+ ];
435
+ }
436
+ function buildDomBatchMessages(pageStructure, queries) {
437
+ const queryList = queries.map((q, i) => `${i + 1}. "${q.query || q}" (action: ${q.action || "none"})`).join("\n");
438
+ return [
439
+ { role: "system", content: DOM_SYSTEM_PROMPT },
440
+ {
441
+ role: "user",
442
+ content: `DOM structure:
443
+ ${pageStructure}
444
+
445
+ Find ALL of these elements:
446
+ ${queryList}
447
+
448
+ INSTRUCTIONS:
449
+ - Search the domTree and interactiveElements for each element.
450
+ - Use id, testId, css, xpath, name, classes, role, text, label, or placeholder to build Playwright locators.
451
+ - Prefer stable selectors over positional ones.
452
+
453
+ Return JSON array: [{"query":"...","locatorString":"page.locator(...)", "confidence": 0.0-1.0, "explanation":"...", "fallbackLocators":["..."]}]`
454
+ }
455
+ ];
456
+ }
457
+
458
+ // src/resolver.js
459
+ init_parser();
280
460
 
281
461
  // src/cache/disk-cache.js
282
462
  var import_fs = __toESM(require("fs"), 1);
@@ -402,6 +582,8 @@ function clearMemoryCache() {
402
582
 
403
583
  // src/page-structure.js
404
584
  var DEBUG3 = process.env.LLM_LOCATOR_DEBUG === "true";
585
+ var MAX_DOM_NODES = 300;
586
+ var MAX_INTERACTIVE = 100;
405
587
  async function getPageStructure(page, forceRefresh = false) {
406
588
  const url = page.url();
407
589
  if (!forceRefresh && isStructureCacheValid(url)) {
@@ -428,7 +610,9 @@ async function getPageStructure(page, forceRefresh = false) {
428
610
  async function getFrameStructure(pageOrFrame) {
429
611
  let ariaTree = null;
430
612
  let specialElements = [];
431
- const [ariaResult, domResult] = await Promise.allSettled([
613
+ let interactiveElements = [];
614
+ let domTree = [];
615
+ const [ariaResult, domResult, interactiveResult, domTreeResult] = await Promise.allSettled([
432
616
  pageOrFrame.locator("body").ariaSnapshot({ timeout: 5e3 }),
433
617
  pageOrFrame.evaluate(() => {
434
618
  const els = [];
@@ -445,7 +629,220 @@ async function getFrameStructure(pageOrFrame) {
445
629
  if (e.testId || e.placeholder || e.alt || e.ariaLabel) els.push(e);
446
630
  }
447
631
  return els;
448
- })
632
+ }),
633
+ pageOrFrame.evaluate((maxInteractive) => {
634
+ const els = [];
635
+ const selectors = 'select, input, textarea, button, a, [role="combobox"], [role="listbox"], [role="menu"], [role="tab"], [role="checkbox"], [role="radio"], [role="slider"], [role="switch"], [contenteditable="true"]';
636
+ for (const el of document.querySelectorAll(selectors)) {
637
+ if (els.length >= maxInteractive) break;
638
+ const ariaLabel = el.getAttribute("aria-label") || el.getAttribute("placeholder") || null;
639
+ const htmlName = el.getAttribute("name") || null;
640
+ const id = el.id || null;
641
+ const classes = el.className ? el.className.toString().split(/\s+/).filter((c) => c && c.length < 40).slice(0, 3).join(" ") : null;
642
+ const tag = el.tagName.toLowerCase();
643
+ const type = el.getAttribute("type") || null;
644
+ const role = el.getAttribute("role") || null;
645
+ const testIdAttr = el.getAttribute("data-testid") ? "data-testid" : el.getAttribute("data-test") ? "data-test" : el.getAttribute("data-test-id") ? "data-test-id" : el.getAttribute("data-qa") ? "data-qa" : el.getAttribute("data-cy") ? "data-cy" : null;
646
+ const testId = testIdAttr ? el.getAttribute(testIdAttr) : null;
647
+ const text = el.textContent?.trim().substring(0, 50) || null;
648
+ const labelEl = id ? document.querySelector(`label[for="${id}"]`) : null;
649
+ const labelText = labelEl ? labelEl.textContent.trim().substring(0, 50) : null;
650
+ let options = null;
651
+ if (tag === "select") {
652
+ options = [...el.options].map((o) => ({ value: o.value, text: o.text, selected: o.selected })).slice(0, 10);
653
+ }
654
+ const parent = el.closest("[class], [id], [data-testid], [data-test], [data-cy], [data-qa]");
655
+ let parentContext = null;
656
+ if (parent && parent !== el) {
657
+ const parentTestAttr = parent.getAttribute("data-testid") ? "data-testid" : parent.getAttribute("data-test") ? "data-test" : parent.getAttribute("data-qa") ? "data-qa" : parent.getAttribute("data-cy") ? "data-cy" : null;
658
+ parentContext = parent.id ? `#${parent.id}` : parentTestAttr ? `[${parentTestAttr}="${parent.getAttribute(parentTestAttr)}"]` : parent.className ? `.${parent.className.toString().split(/\s+/)[0]}` : null;
659
+ }
660
+ const selector = testId ? `[${testIdAttr}="${testId}"]` : id ? `[id="${id}"]` : htmlName ? `[name="${htmlName}"]` : classes ? `.${classes.split(" ")[0]}` : null;
661
+ els.push({
662
+ tag,
663
+ type,
664
+ role,
665
+ id,
666
+ classes,
667
+ ariaLabel,
668
+ htmlName,
669
+ testId,
670
+ labelText,
671
+ text: tag === "select" ? null : text,
672
+ options,
673
+ selector,
674
+ parentContext
675
+ });
676
+ }
677
+ return els;
678
+ }, MAX_INTERACTIVE),
679
+ pageOrFrame.evaluate((maxNodes) => {
680
+ function getXPath(el) {
681
+ if (el.id) return `//*[@id="${el.id}"]`;
682
+ const parts = [];
683
+ let current = el;
684
+ while (current && current.nodeType === 1) {
685
+ let idx = 0;
686
+ let sibling = current.previousSibling;
687
+ while (sibling) {
688
+ if (sibling.nodeType === 1 && sibling.tagName === current.tagName) idx++;
689
+ sibling = sibling.previousSibling;
690
+ }
691
+ const tag = current.tagName.toLowerCase();
692
+ parts.unshift(idx > 0 || current.nextElementSibling?.tagName === current.tagName ? `${tag}[${idx + 1}]` : tag);
693
+ current = current.parentElement;
694
+ }
695
+ return "/" + parts.join("/");
696
+ }
697
+ function getUniqueSelector(el) {
698
+ if (el.id) return `#${CSS.escape(el.id)}`;
699
+ const testAttrs = ["data-testid", "data-test-id", "data-test", "data-qa", "data-cy"];
700
+ for (const attr of testAttrs) {
701
+ const val = el.getAttribute(attr);
702
+ if (val) return `[${attr}="${CSS.escape(val)}"]`;
703
+ }
704
+ const tag = el.tagName.toLowerCase();
705
+ const name = el.getAttribute("name");
706
+ if (name) return `${tag}[name="${CSS.escape(name)}"]`;
707
+ const cls = el.className?.toString().trim();
708
+ if (cls) {
709
+ const sel = tag + "." + cls.split(/\s+/).map((c) => CSS.escape(c)).join(".");
710
+ if (document.querySelectorAll(sel).length === 1) return sel;
711
+ }
712
+ return null;
713
+ }
714
+ function isVisible(el) {
715
+ if (!el.offsetParent && el.tagName !== "BODY" && el.tagName !== "HTML" && getComputedStyle(el).position !== "fixed" && getComputedStyle(el).position !== "sticky") return false;
716
+ const style = getComputedStyle(el);
717
+ return style.display !== "none" && style.visibility !== "hidden" && parseFloat(style.opacity) > 0;
718
+ }
719
+ const interactiveTags = /* @__PURE__ */ new Set([
720
+ "a",
721
+ "button",
722
+ "input",
723
+ "textarea",
724
+ "select",
725
+ "details",
726
+ "summary",
727
+ "label"
728
+ ]);
729
+ const interactiveRoles = /* @__PURE__ */ new Set([
730
+ "button",
731
+ "link",
732
+ "tab",
733
+ "menuitem",
734
+ "menuitemcheckbox",
735
+ "menuitemradio",
736
+ "option",
737
+ "checkbox",
738
+ "radio",
739
+ "switch",
740
+ "slider",
741
+ "spinbutton",
742
+ "combobox",
743
+ "listbox",
744
+ "searchbox",
745
+ "textbox",
746
+ "treeitem"
747
+ ]);
748
+ const landmarkRoles = /* @__PURE__ */ new Set([
749
+ "banner",
750
+ "navigation",
751
+ "main",
752
+ "complementary",
753
+ "contentinfo",
754
+ "form",
755
+ "region",
756
+ "search",
757
+ "dialog",
758
+ "alertdialog",
759
+ "tablist",
760
+ "toolbar",
761
+ "menu",
762
+ "menubar",
763
+ "tree",
764
+ "grid",
765
+ "table",
766
+ "list"
767
+ ]);
768
+ const structuralSel = "h1, h2, h3, h4, h5, h6, nav, main, header, footer, section, form, table, thead, tbody, tr, th, td, ul, ol, li, dl, dt, dd, fieldset, legend, [role]";
769
+ const nodes = [];
770
+ const allEls = document.querySelectorAll("body *");
771
+ for (const el of allEls) {
772
+ if (nodes.length >= maxNodes) break;
773
+ const tag = el.tagName.toLowerCase();
774
+ if (["script", "style", "noscript", "br", "hr", "wbr", "meta", "link"].includes(tag)) continue;
775
+ if (!isVisible(el)) continue;
776
+ const role = el.getAttribute("role") || null;
777
+ const isInteractive = interactiveTags.has(tag) || role && interactiveRoles.has(role) || el.getAttribute("contenteditable") === "true" || el.getAttribute("tabindex") !== null || el.getAttribute("onclick") !== null;
778
+ const isLandmark = landmarkRoles.has(role) || ["nav", "main", "header", "footer", "section", "form", "table"].includes(tag);
779
+ const isHeading = /^h[1-6]$/.test(tag);
780
+ const isStructural = el.matches(structuralSel);
781
+ if (!isInteractive && !isLandmark && !isHeading && !isStructural) continue;
782
+ const id = el.id || null;
783
+ const classes = el.className ? el.className.toString().split(/\s+/).filter((c) => c && c.length < 50).slice(0, 5).join(" ") : null;
784
+ const text = (el.textContent?.trim() || "").substring(0, 80) || null;
785
+ const directText = (() => {
786
+ let t = "";
787
+ for (const child of el.childNodes) {
788
+ if (child.nodeType === 3) t += child.textContent;
789
+ }
790
+ return t.trim().substring(0, 80) || null;
791
+ })();
792
+ const ariaLabel = el.getAttribute("aria-label") || null;
793
+ const title = el.getAttribute("title") || null;
794
+ const type = el.getAttribute("type") || null;
795
+ const href = tag === "a" ? (el.getAttribute("href") || "").substring(0, 100) : null;
796
+ const placeholder = el.getAttribute("placeholder") || null;
797
+ const htmlName = el.getAttribute("name") || null;
798
+ const value = tag === "input" || tag === "textarea" ? (el.value || "").substring(0, 50) || null : null;
799
+ const disabled = el.disabled || el.getAttribute("aria-disabled") === "true" || null;
800
+ const checked = el.checked !== void 0 ? el.checked : null;
801
+ const testIdAttr = el.getAttribute("data-testid") ? "data-testid" : el.getAttribute("data-test") ? "data-test" : el.getAttribute("data-test-id") ? "data-test-id" : el.getAttribute("data-qa") ? "data-qa" : el.getAttribute("data-cy") ? "data-cy" : null;
802
+ const testId = testIdAttr ? { attr: testIdAttr, value: el.getAttribute(testIdAttr) } : null;
803
+ const xpath = getXPath(el);
804
+ const cssSelector = getUniqueSelector(el);
805
+ const depth = (() => {
806
+ let d = 0;
807
+ let p = el.parentElement;
808
+ while (p && p !== document.body) {
809
+ d++;
810
+ p = p.parentElement;
811
+ }
812
+ return d;
813
+ })();
814
+ const node = { tag, depth };
815
+ if (role) node.role = role;
816
+ if (id) node.id = id;
817
+ if (classes) node.classes = classes;
818
+ if (testId) node.testId = testId;
819
+ if (ariaLabel) node.ariaLabel = ariaLabel;
820
+ if (title) node.title = title;
821
+ if (type) node.type = type;
822
+ if (htmlName) node.name = htmlName;
823
+ if (placeholder) node.placeholder = placeholder;
824
+ if (href) node.href = href;
825
+ if (directText) node.text = directText;
826
+ else if (text && text.length <= 80) node.text = text;
827
+ if (value) node.value = value;
828
+ if (disabled) node.disabled = true;
829
+ if (checked !== null && checked !== void 0) node.checked = checked;
830
+ if (isInteractive) node.interactive = true;
831
+ if (isHeading) node.heading = true;
832
+ if (isLandmark) node.landmark = true;
833
+ node.xpath = xpath;
834
+ if (cssSelector) node.css = cssSelector;
835
+ const labelEl = id ? document.querySelector(`label[for="${id}"]`) : null;
836
+ if (labelEl) node.label = labelEl.textContent.trim().substring(0, 50);
837
+ let selectOpts = null;
838
+ if (tag === "select") {
839
+ selectOpts = [...el.options].map((o) => ({ value: o.value, text: o.text, selected: o.selected })).slice(0, 15);
840
+ if (selectOpts.length) node.options = selectOpts;
841
+ }
842
+ nodes.push(node);
843
+ }
844
+ return nodes;
845
+ }, MAX_DOM_NODES)
449
846
  ]);
450
847
  if (ariaResult.status === "fulfilled") ariaTree = ariaResult.value;
451
848
  else {
@@ -458,7 +855,19 @@ async function getFrameStructure(pageOrFrame) {
458
855
  }
459
856
  }
460
857
  if (domResult.status === "fulfilled") specialElements = domResult.value;
461
- return { ariaTree, specialElements };
858
+ if (interactiveResult.status === "fulfilled") interactiveElements = interactiveResult.value;
859
+ if (domTreeResult.status === "fulfilled") domTree = domTreeResult.value;
860
+ const ariaQuality = evaluateAriaQuality(ariaTree, interactiveElements.length);
861
+ return { ariaTree, specialElements, interactiveElements, domTree, ariaQuality };
862
+ }
863
+ function evaluateAriaQuality(ariaTree, interactiveCount) {
864
+ if (!ariaTree || typeof ariaTree !== "string") return "none";
865
+ const lines = ariaTree.split("\n").filter((l) => l.trim());
866
+ const namedInteractive = (ariaTree.match(/(button|link|textbox|combobox|checkbox|tab|radio|switch)\s+"[^"]+"/g) || []).length;
867
+ if (lines.length < 5) return "none";
868
+ if (namedInteractive < 3 && interactiveCount > 5) return "sparse";
869
+ if (lines.length < 20 && interactiveCount > 10) return "sparse";
870
+ return "good";
462
871
  }
463
872
  function simplifyAriaTree(node, depth = 0) {
464
873
  if (!node || depth > 8) return null;
@@ -512,30 +921,123 @@ function trimSnapshotForQuery(ariaYaml, query, maxLines = 150) {
512
921
  }
513
922
  return result.join("\n");
514
923
  }
924
+ function filterDomTreeForQuery(domTree, query, maxNodes = 80) {
925
+ if (!domTree || !domTree.length) return domTree;
926
+ if (domTree.length <= maxNodes) return domTree;
927
+ const queryTokens = query.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter(Boolean);
928
+ const scored = domTree.map((node, idx) => {
929
+ let score = 0;
930
+ if (node.interactive) score += 5;
931
+ if (node.heading) score += 3;
932
+ if (node.landmark) score += 2;
933
+ const searchable = [node.text, node.ariaLabel, node.label, node.placeholder, node.title, node.id, node.name, node.testId?.value].filter(Boolean).join(" ").toLowerCase();
934
+ for (const token of queryTokens) {
935
+ if (searchable.includes(token)) score += 15;
936
+ }
937
+ return { node, idx, score };
938
+ });
939
+ scored.sort((a, b) => b.score - a.score);
940
+ return scored.slice(0, maxNodes).sort((a, b) => a.idx - b.idx).map((s) => s.node);
941
+ }
515
942
 
516
943
  // src/resolver.js
517
944
  var DEBUG4 = process.env.LLM_LOCATOR_DEBUG === "true";
945
+ var PAYLOAD_LIMITS = {
946
+ good: 15e3,
947
+ sparse: 25e3,
948
+ none: 3e4
949
+ };
518
950
  function buildPayload(pageStructure, query) {
951
+ const quality = pageStructure.mainFrame.ariaQuality || "good";
952
+ const maxPayload = PAYLOAD_LIMITS[quality] || PAYLOAD_LIMITS.good;
953
+ if (quality === "none") {
954
+ return buildDomOnlyPayload(pageStructure, query, maxPayload);
955
+ }
956
+ if (quality === "sparse") {
957
+ return buildHybridPayload(pageStructure, query, maxPayload);
958
+ }
959
+ return buildAriaPayload(pageStructure, query, maxPayload);
960
+ }
961
+ function buildAriaPayload(pageStructure, query, maxPayload) {
519
962
  const trimmedAria = trimSnapshotForQuery(pageStructure.mainFrame.ariaTree, query);
963
+ const specialElements = pageStructure.mainFrame.specialElements;
964
+ const interactiveElements = pageStructure.mainFrame.interactiveElements || [];
520
965
  const payload = {
966
+ mode: "aria",
521
967
  ariaTree: trimmedAria,
522
- specialElements: pageStructure.mainFrame.specialElements,
523
- frames: pageStructure.frames.length > 0 ? pageStructure.frames.map((f) => ({
524
- selector: f.selector,
525
- ariaTree: trimSnapshotForQuery(f.content?.ariaTree, query, 50),
526
- specialElements: f.content?.specialElements || []
527
- })) : void 0
968
+ specialElements: specialElements.length > 30 ? specialElements.slice(0, 30) : specialElements,
969
+ interactiveElements: interactiveElements.length > 50 ? interactiveElements.slice(0, 50) : interactiveElements,
970
+ frames: buildFramesPayload(pageStructure, query)
528
971
  };
529
- let payloadStr = JSON.stringify(payload, null, 2);
530
- if (payloadStr.length > 15e3) payloadStr = payloadStr.substring(0, 15e3) + "\n...(truncated)";
531
- return payloadStr;
972
+ let payloadStr = JSON.stringify(payload);
973
+ if (payloadStr.length > maxPayload) payloadStr = payloadStr.substring(0, maxPayload) + "...(truncated)";
974
+ return { payloadStr, mode: "aria" };
975
+ }
976
+ function buildHybridPayload(pageStructure, query, maxPayload) {
977
+ const trimmedAria = trimSnapshotForQuery(pageStructure.mainFrame.ariaTree, query);
978
+ const interactiveElements = pageStructure.mainFrame.interactiveElements || [];
979
+ const domTree = filterDomTreeForQuery(pageStructure.mainFrame.domTree || [], query, 120);
980
+ const payload = {
981
+ mode: "hybrid",
982
+ ariaTree: trimmedAria,
983
+ interactiveElements: interactiveElements.slice(0, 60),
984
+ domTree,
985
+ frames: buildFramesPayload(pageStructure, query)
986
+ };
987
+ let payloadStr = JSON.stringify(payload);
988
+ if (payloadStr.length > maxPayload) {
989
+ payload.domTree = filterDomTreeForQuery(pageStructure.mainFrame.domTree || [], query, 60);
990
+ payloadStr = JSON.stringify(payload);
991
+ }
992
+ if (payloadStr.length > maxPayload) payloadStr = payloadStr.substring(0, maxPayload) + "...(truncated)";
993
+ return { payloadStr, mode: "hybrid" };
994
+ }
995
+ function buildDomOnlyPayload(pageStructure, query, maxPayload) {
996
+ const interactiveElements = pageStructure.mainFrame.interactiveElements || [];
997
+ const domTree = filterDomTreeForQuery(pageStructure.mainFrame.domTree || [], query, 150);
998
+ const specialElements = pageStructure.mainFrame.specialElements || [];
999
+ const payload = {
1000
+ mode: "dom",
1001
+ domTree,
1002
+ specialElements: specialElements.slice(0, 30),
1003
+ interactiveElements: interactiveElements.slice(0, 80),
1004
+ frames: buildFramesPayload(pageStructure, query)
1005
+ };
1006
+ let payloadStr = JSON.stringify(payload);
1007
+ if (payloadStr.length > maxPayload) {
1008
+ payload.domTree = filterDomTreeForQuery(pageStructure.mainFrame.domTree || [], query, 80);
1009
+ payload.interactiveElements = interactiveElements.slice(0, 50);
1010
+ payloadStr = JSON.stringify(payload);
1011
+ }
1012
+ if (payloadStr.length > maxPayload) payloadStr = payloadStr.substring(0, maxPayload) + "...(truncated)";
1013
+ return { payloadStr, mode: "dom" };
1014
+ }
1015
+ function buildFramesPayload(pageStructure, query) {
1016
+ if (!pageStructure.frames || pageStructure.frames.length === 0) return void 0;
1017
+ return pageStructure.frames.map((f) => {
1018
+ const frame = { selector: f.selector };
1019
+ if (f.content?.ariaTree) {
1020
+ frame.ariaTree = trimSnapshotForQuery(f.content.ariaTree, query, 50);
1021
+ }
1022
+ if (f.content?.specialElements) {
1023
+ frame.specialElements = (f.content.specialElements || []).slice(0, 10);
1024
+ }
1025
+ if (f.content?.interactiveElements) {
1026
+ frame.interactiveElements = (f.content.interactiveElements || []).slice(0, 20);
1027
+ }
1028
+ if (f.content?.domTree) {
1029
+ frame.domTree = filterDomTreeForQuery(f.content.domTree || [], query, 30);
1030
+ }
1031
+ return frame;
1032
+ });
532
1033
  }
533
1034
  async function queryLLM(pageStructure, query, options = {}) {
534
1035
  const { action = null } = options;
535
- const payloadStr = buildPayload(pageStructure, query);
536
- if (DEBUG4) console.log("[Resolver] Payload size:", payloadStr.length, "chars");
537
- const messages = buildMessages(payloadStr, query, action);
538
- const raw = await chatCompletion(messages, options);
1036
+ const { payloadStr, mode } = buildPayload(pageStructure, query);
1037
+ if (DEBUG4) console.log(`[Resolver] mode=${mode} payload=${payloadStr.length} chars`);
1038
+ const messages = mode === "dom" ? buildDomMessages(payloadStr, query, action) : mode === "hybrid" ? buildDomMessages(payloadStr, query, action) : buildMessages(payloadStr, query, action);
1039
+ const maxTokens = mode === "aria" ? 256 : 512;
1040
+ const raw = await chatCompletion(messages, { ...options, maxTokens });
539
1041
  return parseJsonResponse(raw);
540
1042
  }
541
1043
  async function resolveLocator(page, query, options = {}) {
@@ -545,7 +1047,7 @@ async function resolveLocator(page, query, options = {}) {
545
1047
  const memHit = getMemEntry(url, query, action);
546
1048
  if (memHit) {
547
1049
  if (shouldDebug) console.log(`[Memory Cache Hit] "${query}" \u2192 ${memHit.locatorString}`);
548
- return memHit;
1050
+ return { ...memHit, source: "memory" };
549
1051
  }
550
1052
  const diskHit = getDiskEntry(url, query, action);
551
1053
  if (diskHit) {
@@ -603,6 +1105,17 @@ async function getLocator(page, query, options = {}) {
603
1105
  bumpHitCount(url, query, action);
604
1106
  return { locator, result };
605
1107
  }
1108
+ if (result.fallbackLocators && result.fallbackLocators.length > 0) {
1109
+ if (shouldDebug) console.log(`[Fallback] Primary "${result.locatorString}" not found, trying ${result.fallbackLocators.length} fallbacks...`);
1110
+ const fbResult = await validateWithFallback(page, result.locatorString, result.fallbackLocators, result.isInFrame, result.frameSelector);
1111
+ if (fbResult && fbResult.locatorString !== result.locatorString) {
1112
+ if (shouldDebug) console.log(`[Fallback] Using fallback: ${fbResult.locatorString}`);
1113
+ const updatedResult = { ...result, locatorString: fbResult.locatorString, strategy: "fallback" };
1114
+ setMemEntry(url, query, action, updatedResult);
1115
+ setDiskEntry(url, query, action, updatedResult);
1116
+ return { locator: fbResult.locator, result: updatedResult };
1117
+ }
1118
+ }
606
1119
  if (shouldDebug) console.log(`[Stale Cache] "${query}" \u2192 ${result.locatorString} no longer valid, re-querying LLM...`);
607
1120
  invalidateDiskEntry(url, query, action);
608
1121
  deleteMemEntry(url, query, action);
@@ -612,6 +1125,20 @@ async function getLocator(page, query, options = {}) {
612
1125
  const freshLocator = createPlaywrightLocator(page, freshResult.locatorString, freshResult.isInFrame, freshResult.frameSelector);
613
1126
  return { locator: freshLocator, result: freshResult };
614
1127
  }
1128
+ if (result.source === "llm" && result.fallbackLocators && result.fallbackLocators.length > 0) {
1129
+ const valid = await validateLocator(locator, 2e3);
1130
+ if (!valid) {
1131
+ if (shouldDebug) console.log(`[Fallback] Primary "${result.locatorString}" not found, trying ${result.fallbackLocators.length} fallbacks...`);
1132
+ const fbResult = await validateWithFallback(page, result.locatorString, result.fallbackLocators, result.isInFrame, result.frameSelector);
1133
+ if (fbResult && fbResult.locatorString !== result.locatorString) {
1134
+ if (shouldDebug) console.log(`[Fallback] Using fallback: ${fbResult.locatorString}`);
1135
+ const updatedResult = { ...result, locatorString: fbResult.locatorString, strategy: "fallback" };
1136
+ setMemEntry(url, query, action, updatedResult);
1137
+ setDiskEntry(url, query, action, updatedResult);
1138
+ return { locator: fbResult.locator, result: updatedResult };
1139
+ }
1140
+ }
1141
+ }
615
1142
  return { locator, result };
616
1143
  }
617
1144
  async function resolveLocatorsBatch(page, queries, options = {}) {
@@ -647,11 +1174,11 @@ async function resolveLocatorsBatch(page, queries, options = {}) {
647
1174
  if (shouldDebug) console.log(`[Batch] Resolving ${uncached.length} queries in 1 call`);
648
1175
  const t0 = Date.now();
649
1176
  const pageStructure = await getPageStructure(page);
650
- let payloadStr = buildPayload(pageStructure, uncached.join(" "));
651
- if (payloadStr.length > 2e4) payloadStr = payloadStr.substring(0, 2e4) + "\n...(truncated)";
1177
+ const { payloadStr, mode } = buildPayload(pageStructure, uncached.join(" "));
652
1178
  try {
653
- const messages = buildBatchMessages(payloadStr, uncached);
654
- const raw = await chatCompletion(messages, options);
1179
+ const messages = mode === "dom" || mode === "hybrid" ? buildDomBatchMessages(payloadStr, uncached) : buildBatchMessages(payloadStr, uncached);
1180
+ const maxTokens = mode === "aria" ? 256 : 512;
1181
+ const raw = await chatCompletion(messages, { ...options, maxTokens });
655
1182
  const parsed = parseJsonResponse(raw);
656
1183
  if (shouldDebug) console.log(`[Batch] ${parsed.length} results in ${Date.now() - t0}ms`);
657
1184
  for (let i = 0; i < uncached.length && i < parsed.length; i++) {
@@ -701,66 +1228,84 @@ var SmartAction = class _SmartAction {
701
1228
  get query() {
702
1229
  return this.#query;
703
1230
  }
1231
+ async #safeAction(action, ...args) {
1232
+ try {
1233
+ return await this.#locator[action](...args);
1234
+ } catch (err) {
1235
+ if (err.message && err.message.includes("strict mode violation")) {
1236
+ if (DEBUG5) console.log(`[SmartAction] Strict mode on "${this.#query}", retrying with .first()`);
1237
+ return await this.#locator.first()[action](...args);
1238
+ }
1239
+ throw err;
1240
+ }
1241
+ }
704
1242
  async click(options) {
705
- return this.#locator.click(options);
1243
+ return this.#safeAction("click", options);
706
1244
  }
707
1245
  async dblclick(options) {
708
- return this.#locator.dblclick(options);
1246
+ return this.#safeAction("dblclick", options);
709
1247
  }
710
1248
  async tap(options) {
711
- return this.#locator.tap(options);
1249
+ return this.#safeAction("tap", options);
712
1250
  }
713
1251
  async hover(options) {
714
- return this.#locator.hover(options);
1252
+ return this.#safeAction("hover", options);
715
1253
  }
716
1254
  async focus() {
717
- return this.#locator.focus();
1255
+ return this.#safeAction("focus");
718
1256
  }
719
1257
  async blur() {
720
- return this.#locator.blur();
1258
+ return this.#safeAction("blur");
721
1259
  }
722
1260
  async fill(value, options) {
723
- return this.#locator.fill(value, options);
1261
+ return this.#safeAction("fill", value, options);
724
1262
  }
725
1263
  async type(text, options) {
726
- return this.#locator.type(text, options);
1264
+ return this.#safeAction("type", text, options);
727
1265
  }
728
1266
  async press(key, options) {
729
- return this.#locator.press(key, options);
1267
+ return this.#safeAction("press", key, options);
730
1268
  }
731
1269
  async pressSequentially(text, options) {
732
- return this.#locator.pressSequentially(text, options);
1270
+ return this.#safeAction("pressSequentially", text, options);
733
1271
  }
734
1272
  async clear(options) {
735
- return this.#locator.clear(options);
1273
+ return this.#safeAction("clear", options);
736
1274
  }
737
1275
  async setInputFiles(files, options) {
738
- return this.#locator.setInputFiles(files, options);
1276
+ return this.#safeAction("setInputFiles", files, options);
739
1277
  }
740
1278
  async selectOption(values, options) {
741
- return this.#locator.selectOption(values, options);
1279
+ return this.#safeAction("selectOption", values, options);
742
1280
  }
743
1281
  async selectText(options) {
744
- return this.#locator.selectText(options);
1282
+ return this.#safeAction("selectText", options);
745
1283
  }
746
1284
  async check(options) {
747
- return this.#locator.check(options);
1285
+ return this.#safeAction("check", options);
748
1286
  }
749
1287
  async uncheck(options) {
750
- return this.#locator.uncheck(options);
1288
+ return this.#safeAction("uncheck", options);
751
1289
  }
752
1290
  async setChecked(checked, options) {
753
- return this.#locator.setChecked(checked, options);
1291
+ return this.#safeAction("setChecked", checked, options);
754
1292
  }
755
1293
  async scrollIntoViewIfNeeded(options) {
756
- return this.#locator.scrollIntoViewIfNeeded(options);
1294
+ return this.#safeAction("scrollIntoViewIfNeeded", options);
757
1295
  }
758
1296
  async screenshot(options) {
759
- return this.#locator.screenshot(options);
1297
+ return this.#safeAction("screenshot", options);
760
1298
  }
761
1299
  async dragTo(target, options) {
762
1300
  const dest = target instanceof _SmartAction ? target.rawLocator : target;
763
- return this.#locator.dragTo(dest, options);
1301
+ try {
1302
+ return await this.#locator.dragTo(dest, options);
1303
+ } catch (err) {
1304
+ if (err.message && err.message.includes("strict mode violation")) {
1305
+ return await this.#locator.first().dragTo(dest, options);
1306
+ }
1307
+ throw err;
1308
+ }
764
1309
  }
765
1310
  async waitFor(options) {
766
1311
  return this.#locator.waitFor(options);
@@ -796,22 +1341,22 @@ var SmartAction = class _SmartAction {
796
1341
  return this.#locator.isEditable();
797
1342
  }
798
1343
  async textContent() {
799
- return this.#locator.textContent();
1344
+ return this.#safeAction("textContent");
800
1345
  }
801
1346
  async innerText() {
802
- return this.#locator.innerText();
1347
+ return this.#safeAction("innerText");
803
1348
  }
804
1349
  async innerHTML() {
805
- return this.#locator.innerHTML();
1350
+ return this.#safeAction("innerHTML");
806
1351
  }
807
1352
  async inputValue(options) {
808
- return this.#locator.inputValue(options);
1353
+ return this.#safeAction("inputValue", options);
809
1354
  }
810
1355
  async getAttribute(name) {
811
- return this.#locator.getAttribute(name);
1356
+ return this.#safeAction("getAttribute", name);
812
1357
  }
813
1358
  async boundingBox() {
814
- return this.#locator.boundingBox();
1359
+ return this.#safeAction("boundingBox");
815
1360
  }
816
1361
  async count() {
817
1362
  return this.#locator.count();
@@ -826,13 +1371,13 @@ var SmartAction = class _SmartAction {
826
1371
  return this.#locator.all();
827
1372
  }
828
1373
  async evaluate(pageFunction, arg) {
829
- return this.#locator.evaluate(pageFunction, arg);
1374
+ return this.#safeAction("evaluate", pageFunction, arg);
830
1375
  }
831
1376
  async evaluateAll(pageFunction, arg) {
832
1377
  return this.#locator.evaluateAll(pageFunction, arg);
833
1378
  }
834
1379
  async evaluateHandle(pageFunction, arg) {
835
- return this.#locator.evaluateHandle(pageFunction, arg);
1380
+ return this.#safeAction("evaluateHandle", pageFunction, arg);
836
1381
  }
837
1382
  first() {
838
1383
  return this.#locator.first();
@@ -878,14 +1423,15 @@ var SmartAction = class _SmartAction {
878
1423
  }
879
1424
  }
880
1425
  async getClasses() {
881
- const cls = await this.#locator.getAttribute("class");
1426
+ const cls = await this.getAttribute("class");
882
1427
  return cls ? cls.split(/\s+/).filter(Boolean) : [];
883
1428
  }
884
1429
  async hasClass(className) {
885
1430
  return (await this.getClasses()).includes(className);
886
1431
  }
887
1432
  async getCssProperty(property) {
888
- return this.#locator.evaluate(
1433
+ return this.#safeAction(
1434
+ "evaluate",
889
1435
  (el, prop) => window.getComputedStyle(el).getPropertyValue(prop),
890
1436
  property
891
1437
  );
@@ -904,12 +1450,21 @@ function clearAllCaches() {
904
1450
  clearDiskCache();
905
1451
  }
906
1452
  function createSmartLocator(page, options = {}) {
907
- const { verbose = false } = options;
1453
+ const { verbose = false, actionTimeout = 1e4 } = options;
908
1454
  async function resolve(query, action) {
909
1455
  const { locator, result } = await getLocator(page, query, { ...options, action });
910
1456
  if (verbose) console.log(`[Smart Locator] ${action || "locate"}("${query}") \u2192 ${result.locatorString} (${result.source})`);
911
1457
  return new SmartAction(locator, result, query);
912
1458
  }
1459
+ function withTimeout(promise, ms, label) {
1460
+ if (!ms) return promise;
1461
+ return Promise.race([
1462
+ promise,
1463
+ new Promise(
1464
+ (_, reject) => setTimeout(() => reject(new Error(`Timeout: "${label}" exceeded ${ms}ms`)), ms)
1465
+ )
1466
+ ]);
1467
+ }
913
1468
  return {
914
1469
  async prefetch(...queries) {
915
1470
  const t0 = Date.now();
@@ -922,93 +1477,93 @@ function createSmartLocator(page, options = {}) {
922
1477
  },
923
1478
  async click(query, clickOptions) {
924
1479
  const el = await resolve(query, "click");
925
- await el.click(clickOptions);
1480
+ await withTimeout(el.click(clickOptions), actionTimeout, `click("${query}")`);
926
1481
  return el;
927
1482
  },
928
1483
  async dblclick(query, clickOptions) {
929
1484
  const el = await resolve(query, "click");
930
- await el.dblclick(clickOptions);
1485
+ await withTimeout(el.dblclick(clickOptions), actionTimeout, `dblclick("${query}")`);
931
1486
  return el;
932
1487
  },
933
1488
  async fill(query, value, fillOptions) {
934
1489
  const el = await resolve(query, "fill");
935
- await el.fill(value, fillOptions);
1490
+ await withTimeout(el.fill(value, fillOptions), actionTimeout, `fill("${query}")`);
936
1491
  return el;
937
1492
  },
938
1493
  async type(query, text, typeOptions) {
939
1494
  const el = await resolve(query, "fill");
940
- await el.type(text, typeOptions);
1495
+ await withTimeout(el.type(text, typeOptions), actionTimeout, `type("${query}")`);
941
1496
  return el;
942
1497
  },
943
1498
  async pressSequentially(query, text, pressOptions) {
944
1499
  const el = await resolve(query, "fill");
945
- await el.pressSequentially(text, pressOptions);
1500
+ await withTimeout(el.pressSequentially(text, pressOptions), actionTimeout, `pressSequentially("${query}")`);
946
1501
  return el;
947
1502
  },
948
1503
  async clear(query) {
949
1504
  const el = await resolve(query, "fill");
950
- await el.clear();
1505
+ await withTimeout(el.clear(), actionTimeout, `clear("${query}")`);
951
1506
  return el;
952
1507
  },
953
1508
  async press(query, key, pressOptions) {
954
1509
  const el = await resolve(query, "fill");
955
- await el.press(key, pressOptions);
1510
+ await withTimeout(el.press(key, pressOptions), actionTimeout, `press("${query}")`);
956
1511
  return el;
957
1512
  },
958
1513
  async check(query, checkOptions) {
959
1514
  const el = await resolve(query, "check");
960
- await el.check(checkOptions);
1515
+ await withTimeout(el.check(checkOptions), actionTimeout, `check("${query}")`);
961
1516
  return el;
962
1517
  },
963
1518
  async uncheck(query, uncheckOptions) {
964
1519
  const el = await resolve(query, "uncheck");
965
- await el.uncheck(uncheckOptions);
1520
+ await withTimeout(el.uncheck(uncheckOptions), actionTimeout, `uncheck("${query}")`);
966
1521
  return el;
967
1522
  },
968
1523
  async setChecked(query, checked, checkOptions) {
969
1524
  const el = await resolve(query, checked ? "check" : "uncheck");
970
- await el.setChecked(checked, checkOptions);
1525
+ await withTimeout(el.setChecked(checked, checkOptions), actionTimeout, `setChecked("${query}")`);
971
1526
  return el;
972
1527
  },
973
1528
  async select(query, values, selectOptions) {
974
1529
  const el = await resolve(query, "select");
975
- await el.selectOption(values, selectOptions);
1530
+ await withTimeout(el.selectOption(values, selectOptions), actionTimeout, `select("${query}")`);
976
1531
  return el;
977
1532
  },
978
1533
  async hover(query, hoverOptions) {
979
1534
  const el = await resolve(query, "hover");
980
- await el.hover(hoverOptions);
1535
+ await withTimeout(el.hover(hoverOptions), actionTimeout, `hover("${query}")`);
981
1536
  return el;
982
1537
  },
983
1538
  async focus(query) {
984
1539
  const el = await resolve(query, "click");
985
- await el.focus();
1540
+ await withTimeout(el.focus(), actionTimeout, `focus("${query}")`);
986
1541
  return el;
987
1542
  },
988
1543
  async tap(query, tapOptions) {
989
1544
  const el = await resolve(query, "click");
990
- await el.tap(tapOptions);
1545
+ await withTimeout(el.tap(tapOptions), actionTimeout, `tap("${query}")`);
991
1546
  return el;
992
1547
  },
993
1548
  async setInputFiles(query, files, fileOptions) {
994
1549
  const el = await resolve(query, "fill");
995
- await el.setInputFiles(files, fileOptions);
1550
+ await withTimeout(el.setInputFiles(files, fileOptions), actionTimeout, `setInputFiles("${query}")`);
996
1551
  return el;
997
1552
  },
998
1553
  async dragTo(srcQuery, destQuery, dragOptions) {
999
1554
  const src = await resolve(srcQuery, "click");
1000
1555
  const dest = await resolve(destQuery, "click");
1001
- await src.dragTo(dest, dragOptions);
1556
+ await withTimeout(src.dragTo(dest, dragOptions), actionTimeout, `dragTo("${srcQuery}")`);
1002
1557
  return { source: src, target: dest };
1003
1558
  },
1004
1559
  async selectText(query) {
1005
1560
  const el = await resolve(query, "click");
1006
- await el.selectText();
1561
+ await withTimeout(el.selectText(), actionTimeout, `selectText("${query}")`);
1007
1562
  return el;
1008
1563
  },
1009
1564
  async scrollIntoView(query) {
1010
1565
  const el = await resolve(query, null);
1011
- await el.scrollIntoViewIfNeeded();
1566
+ await withTimeout(el.scrollIntoViewIfNeeded(), actionTimeout, `scrollIntoView("${query}")`);
1012
1567
  return el;
1013
1568
  },
1014
1569
  async screenshot(query, screenshotOptions) {
@@ -1095,8 +1650,11 @@ function createSmartLocator(page, options = {}) {
1095
1650
  },
1096
1651
  async exists(query) {
1097
1652
  try {
1098
- const el = await resolve(query, null);
1099
- return await el.exists();
1653
+ const result = await resolveLocator(page, query, { ...options, action: null });
1654
+ if (!result.found || result.confidence < 0.7) return false;
1655
+ const { createPlaywrightLocator: createPlaywrightLocator2 } = await Promise.resolve().then(() => (init_parser(), parser_exports));
1656
+ const locator = createPlaywrightLocator2(page, result.locatorString, result.isInFrame, result.frameSelector);
1657
+ return await locator.count() > 0;
1100
1658
  } catch {
1101
1659
  return false;
1102
1660
  }