@cutleryapp/agent 1.0.15 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -76,7 +76,7 @@ class TestExecutor {
76
76
  }
77
77
  }
78
78
  else if (lower.includes("click")) {
79
- const labelMatch = raw.match(/click\s+(?:on\s+)?(?:the\s+)?"?([^"]+?)"?\s*(?:button|link|tab)?$/i);
79
+ const labelMatch = raw.match(/click\s+(?:on\s+)?(?:the\s+)?"?([^"]+?)"?(?:\s+(?:button|link|tab))?$/i);
80
80
  let label = labelMatch?.[1]?.trim();
81
81
  if (label) {
82
82
  // Split "Add to cart under Sauce Labs Bike Light product" into target + scope
@@ -207,6 +207,8 @@ async function tryClick(page, nameRe, label) {
207
207
  () => page.getByRole('link', { name: nameRe }).first().click({ timeout: FAST }),
208
208
  () => page.getByText(nameRe).first().click({ timeout: FAST }),
209
209
  () => page.locator(`[value="${label}"], [aria-label="${label}"], [title="${label}"]`).first().click({ timeout: FAST }),
210
+ // data-* attributes (common in test automation)
211
+ () => page.locator(`[data-test*="${label}" i], [data-testid*="${label}" i], [id*="${label}" i]`).first().click({ timeout: FAST }),
210
212
  ];
211
213
  for (const fn of strategies) {
212
214
  try {
@@ -215,7 +217,78 @@ async function tryClick(page, nameRe, label) {
215
217
  }
216
218
  catch { /* try next */ }
217
219
  }
218
- return false;
220
+ // AI vision fallback
221
+ return await aiClickFallback(page, label);
222
+ }
223
+ /** Use OpenAI vision to identify the element and generate a selector, then click it */
224
+ async function aiClickFallback(page, description) {
225
+ const openaiKey = process.env.OPENAI_API_KEY;
226
+ if (!openaiKey)
227
+ return false;
228
+ try {
229
+ const { default: OpenAI } = await import('openai');
230
+ const openai = new OpenAI({ apiKey: openaiKey });
231
+ const screenshotBuffer = await page.screenshot({ type: 'png' });
232
+ const base64 = screenshotBuffer.toString('base64');
233
+ const response = await openai.chat.completions.create({
234
+ model: 'gpt-4o',
235
+ max_tokens: 100,
236
+ messages: [{
237
+ role: 'user',
238
+ content: [
239
+ {
240
+ type: 'text',
241
+ text: `I need to click on: "${description}"\n\nLook at this screenshot and return a single CSS selector that identifies the element to click.\nRules:\n- Return ONLY the CSS selector, nothing else\n- No markdown, no explanation\n- If not found, return: NOT_FOUND\n- Use data-testid, id, aria-label, or text-based selectors\n- Prefer: [data-test="..."], #id, [aria-label="..."], button:has-text("...")`
242
+ },
243
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
244
+ ]
245
+ }]
246
+ });
247
+ const selector = response.choices[0]?.message?.content?.trim() || '';
248
+ if (!selector || selector === 'NOT_FOUND')
249
+ return false;
250
+ const clean = selector.replace(/```[a-z]*\n?/gi, '').replace(/```/g, '').trim();
251
+ await page.locator(clean).first().click({ timeout: 5000 });
252
+ return true;
253
+ }
254
+ catch {
255
+ return false;
256
+ }
257
+ }
258
+ /** Use OpenAI vision to identify a field and fill it */
259
+ async function aiFillFallback(page, label, value) {
260
+ const openaiKey = process.env.OPENAI_API_KEY;
261
+ if (!openaiKey)
262
+ return false;
263
+ try {
264
+ const { default: OpenAI } = await import('openai');
265
+ const openai = new OpenAI({ apiKey: openaiKey });
266
+ const screenshotBuffer = await page.screenshot({ type: 'png' });
267
+ const base64 = screenshotBuffer.toString('base64');
268
+ const response = await openai.chat.completions.create({
269
+ model: 'gpt-4o',
270
+ max_tokens: 100,
271
+ messages: [{
272
+ role: 'user',
273
+ content: [
274
+ {
275
+ type: 'text',
276
+ text: `I need to fill the "${label}" input field with the value "${value}".\n\nLook at this screenshot and return a single CSS selector for the input field.\nRules:\n- Return ONLY the CSS selector, nothing else\n- No markdown, no explanation\n- If not found, return: NOT_FOUND\n- Prefer: input[name="..."], input[id="..."], input[placeholder="..."], [aria-label="..."]`
277
+ },
278
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
279
+ ]
280
+ }]
281
+ });
282
+ const selector = response.choices[0]?.message?.content?.trim() || '';
283
+ if (!selector || selector === 'NOT_FOUND')
284
+ return false;
285
+ const clean = selector.replace(/```[a-z]*\n?/gi, '').replace(/```/g, '').trim();
286
+ await page.locator(clean).first().fill(value);
287
+ return true;
288
+ }
289
+ catch {
290
+ return false;
291
+ }
219
292
  }
220
293
  async function tryClickScoped(page, nameRe, target, scope) {
221
294
  const FAST = 3000;
@@ -324,6 +397,10 @@ async function tryFill(page, label, value) {
324
397
  errors.push(e?.message?.split("\n")[0] || String(e));
325
398
  }
326
399
  }
400
+ // AI vision fallback
401
+ const aiSuccess = await aiFillFallback(page, label, value);
402
+ if (aiSuccess)
403
+ return;
327
404
  throw new Error(`Could not find input field: "${label}". Tried ${strategies.length} strategies.`);
328
405
  }
329
406
  /** Token-aware variant generation matching executor.ts/labelVariants. */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cutleryapp/agent",
3
- "version": "1.0.15",
3
+ "version": "1.0.16",
4
4
  "description": "Local agent that connects your machine to the Cutlery QA platform and runs UI tests via Playwright",
5
5
  "main": "dist/cli.js",
6
6
  "bin": {