@cutleryapp/agent 1.0.14 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/mcp-executor.js +123 -13
  2. package/package.json +1 -1
@@ -76,7 +76,7 @@ class TestExecutor {
76
76
  }
77
77
  }
78
78
  else if (lower.includes("click")) {
79
- const labelMatch = raw.match(/click\s+(?:on\s+)?(?:the\s+)?"?([^"]+?)"?\s*(?:button|link|tab)?$/i);
79
+ const labelMatch = raw.match(/click\s+(?:on\s+)?(?:the\s+)?"?([^"]+?)"?(?:\s+(?:button|link|tab))?$/i);
80
80
  let label = labelMatch?.[1]?.trim();
81
81
  if (label) {
82
82
  // Split "Add to cart under Sauce Labs Bike Light product" into target + scope
@@ -207,6 +207,8 @@ async function tryClick(page, nameRe, label) {
207
207
  () => page.getByRole('link', { name: nameRe }).first().click({ timeout: FAST }),
208
208
  () => page.getByText(nameRe).first().click({ timeout: FAST }),
209
209
  () => page.locator(`[value="${label}"], [aria-label="${label}"], [title="${label}"]`).first().click({ timeout: FAST }),
210
+ // data-* attributes (common in test automation)
211
+ () => page.locator(`[data-test*="${label}" i], [data-testid*="${label}" i], [id*="${label}" i]`).first().click({ timeout: FAST }),
210
212
  ];
211
213
  for (const fn of strategies) {
212
214
  try {
@@ -215,27 +217,131 @@ async function tryClick(page, nameRe, label) {
215
217
  }
216
218
  catch { /* try next */ }
217
219
  }
218
- return false;
220
+ // AI vision fallback
221
+ return await aiClickFallback(page, label);
222
+ }
223
+ /** Use OpenAI vision to identify the element and generate a selector, then click it */
224
+ async function aiClickFallback(page, description) {
225
+ const openaiKey = process.env.OPENAI_API_KEY;
226
+ if (!openaiKey)
227
+ return false;
228
+ try {
229
+ const { default: OpenAI } = await import('openai');
230
+ const openai = new OpenAI({ apiKey: openaiKey });
231
+ const screenshotBuffer = await page.screenshot({ type: 'png' });
232
+ const base64 = screenshotBuffer.toString('base64');
233
+ const response = await openai.chat.completions.create({
234
+ model: 'gpt-4o',
235
+ max_tokens: 100,
236
+ messages: [{
237
+ role: 'user',
238
+ content: [
239
+ {
240
+ type: 'text',
241
+ text: `I need to click on: "${description}"\n\nLook at this screenshot and return a single CSS selector that identifies the element to click.\nRules:\n- Return ONLY the CSS selector, nothing else\n- No markdown, no explanation\n- If not found, return: NOT_FOUND\n- Use data-testid, id, aria-label, or text-based selectors\n- Prefer: [data-test="..."], #id, [aria-label="..."], button:has-text("...")`
242
+ },
243
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
244
+ ]
245
+ }]
246
+ });
247
+ const selector = response.choices[0]?.message?.content?.trim() || '';
248
+ if (!selector || selector === 'NOT_FOUND')
249
+ return false;
250
+ const clean = selector.replace(/```[a-z]*\n?/gi, '').replace(/```/g, '').trim();
251
+ await page.locator(clean).first().click({ timeout: 5000 });
252
+ return true;
253
+ }
254
+ catch {
255
+ return false;
256
+ }
257
+ }
258
+ /** Use OpenAI vision to identify a field and fill it */
259
+ async function aiFillFallback(page, label, value) {
260
+ const openaiKey = process.env.OPENAI_API_KEY;
261
+ if (!openaiKey)
262
+ return false;
263
+ try {
264
+ const { default: OpenAI } = await import('openai');
265
+ const openai = new OpenAI({ apiKey: openaiKey });
266
+ const screenshotBuffer = await page.screenshot({ type: 'png' });
267
+ const base64 = screenshotBuffer.toString('base64');
268
+ const response = await openai.chat.completions.create({
269
+ model: 'gpt-4o',
270
+ max_tokens: 100,
271
+ messages: [{
272
+ role: 'user',
273
+ content: [
274
+ {
275
+ type: 'text',
276
+ text: `I need to fill the "${label}" input field with the value "${value}".\n\nLook at this screenshot and return a single CSS selector for the input field.\nRules:\n- Return ONLY the CSS selector, nothing else\n- No markdown, no explanation\n- If not found, return: NOT_FOUND\n- Prefer: input[name="..."], input[id="..."], input[placeholder="..."], [aria-label="..."]`
277
+ },
278
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
279
+ ]
280
+ }]
281
+ });
282
+ const selector = response.choices[0]?.message?.content?.trim() || '';
283
+ if (!selector || selector === 'NOT_FOUND')
284
+ return false;
285
+ const clean = selector.replace(/```[a-z]*\n?/gi, '').replace(/```/g, '').trim();
286
+ await page.locator(clean).first().fill(value);
287
+ return true;
288
+ }
289
+ catch {
290
+ return false;
291
+ }
219
292
  }
220
293
  async function tryClickScoped(page, nameRe, target, scope) {
221
294
  const FAST = 3000;
222
295
  // Strip trailing generic nouns that won't appear verbatim on the page
223
296
  const cleanScope = scope.replace(/\s+(?:product|item|section|card|row|container|element|button|link|area|panel|block)$/i, '').trim();
224
- const strategies = [
225
- () => page.locator(`:has-text("${cleanScope}")`).last().getByRole('button', { name: nameRe }).first().click({ timeout: FAST }),
226
- () => page.locator(`:has-text("${cleanScope}")`).last().getByRole('link', { name: nameRe }).first().click({ timeout: FAST }),
227
- () => page.locator(`:has-text("${cleanScope}")`).last().getByText(nameRe).first().click({ timeout: FAST }),
228
- // Fallback: ignore scope and click anywhere
229
- () => page.getByRole('button', { name: nameRe }).first().click({ timeout: FAST }),
230
- () => page.getByText(nameRe).first().click({ timeout: FAST }),
297
+ // Use card/item container selectors — these are tight enough to contain the button
298
+ // but not so deep that they exclude it. Filter by scope text, then click target within.
299
+ const containerSelectors = [
300
+ '[class*="item"]',
301
+ '[class*="card"]',
302
+ '[class*="product"]',
303
+ 'li',
304
+ 'article',
305
+ 'tr',
306
+ '[role="listitem"]',
307
+ '[role="row"]',
231
308
  ];
232
- for (const fn of strategies) {
309
+ for (const containerSel of containerSelectors) {
233
310
  try {
234
- await fn();
235
- return true;
311
+ const container = page.locator(containerSel).filter({ hasText: cleanScope });
312
+ const count = await container.count();
313
+ if (count === 0)
314
+ continue;
315
+ // Try button first, then any clickable element
316
+ try {
317
+ await container.first().getByRole('button', { name: nameRe }).first().click({ timeout: FAST });
318
+ return true;
319
+ }
320
+ catch { /* try next */ }
321
+ try {
322
+ await container.first().getByRole('link', { name: nameRe }).first().click({ timeout: FAST });
323
+ return true;
324
+ }
325
+ catch { /* try next */ }
326
+ try {
327
+ await container.first().getByText(nameRe).first().click({ timeout: FAST });
328
+ return true;
329
+ }
330
+ catch { /* try next */ }
236
331
  }
237
- catch { /* try next */ }
332
+ catch { /* try next container */ }
333
+ }
334
+ // Fallback: ignore scope
335
+ try {
336
+ await page.getByRole('button', { name: nameRe }).first().click({ timeout: FAST });
337
+ return true;
338
+ }
339
+ catch { }
340
+ try {
341
+ await page.getByText(nameRe).first().click({ timeout: FAST });
342
+ return true;
238
343
  }
344
+ catch { }
239
345
  return false;
240
346
  }
241
347
  async function tryFill(page, label, value) {
@@ -291,6 +397,10 @@ async function tryFill(page, label, value) {
291
397
  errors.push(e?.message?.split("\n")[0] || String(e));
292
398
  }
293
399
  }
400
+ // AI vision fallback
401
+ const aiSuccess = await aiFillFallback(page, label, value);
402
+ if (aiSuccess)
403
+ return;
294
404
  throw new Error(`Could not find input field: "${label}". Tried ${strategies.length} strategies.`);
295
405
  }
296
406
  /** Token-aware variant generation matching executor.ts/labelVariants. */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cutleryapp/agent",
3
- "version": "1.0.14",
3
+ "version": "1.0.16",
4
4
  "description": "Local agent that connects your machine to the Cutlery QA platform and runs UI tests via Playwright",
5
5
  "main": "dist/cli.js",
6
6
  "bin": {