@cutleryapp/agent 1.0.21 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/mcp-executor.js +137 -13
  2. package/package.json +1 -1
@@ -293,14 +293,77 @@ Set "done": true with empty "actions" array when the goal is fully accomplished.
293
293
  * and returns a SEQUENCE of actions to accomplish it — then executes them one by one.
294
294
  * After each action it re-screenshots so the AI can verify progress and adapt.
295
295
  */
296
+ /** Extract real interactive elements from the DOM for AI selector accuracy */
297
+ async function extractDomElements(page) {
298
+ try {
299
+ const elements = await page.evaluate(() => {
300
+ const selectors = [
301
+ 'a', 'button', 'input', 'select', 'textarea',
302
+ '[role="button"]', '[role="link"]', '[role="tab"]',
303
+ '[role="menuitem"]', '[role="option"]', '[onclick]',
304
+ '[data-test]', '[data-testid]', '[data-cy]',
305
+ ];
306
+ const seen = new Set();
307
+ const result = [];
308
+ document.querySelectorAll(selectors.join(',')).forEach((el) => {
309
+ const rect = el.getBoundingClientRect();
310
+ if (rect.width === 0 || rect.height === 0)
311
+ return;
312
+ const style = window.getComputedStyle(el);
313
+ if (style.display === 'none' || style.visibility === 'hidden')
314
+ return;
315
+ const tag = el.tagName.toLowerCase();
316
+ const id = el.id ? `#${el.id}` : '';
317
+ const cls = Array.from(el.classList).slice(0, 3).map(c => `.${c}`).join('');
318
+ const text = (el.textContent || '').trim().slice(0, 40);
319
+ const ariaLabel = el.getAttribute('aria-label') || '';
320
+ const dataTest = el.getAttribute('data-test') || el.getAttribute('data-testid') || el.getAttribute('data-cy') || '';
321
+ const placeholder = el.getAttribute('placeholder') || '';
322
+ const name = el.getAttribute('name') || '';
323
+ const type = el.getAttribute('type') || '';
324
+ const href = el.getAttribute('href') || '';
325
+ const key = `${tag}${id}${text}`;
326
+ if (seen.has(key))
327
+ return;
328
+ seen.add(key);
329
+ const parts = [tag];
330
+ if (id)
331
+ parts.push(`id="${el.id}"`);
332
+ if (name)
333
+ parts.push(`name="${name}"`);
334
+ if (type)
335
+ parts.push(`type="${type}"`);
336
+ if (ariaLabel)
337
+ parts.push(`aria-label="${ariaLabel}"`);
338
+ if (dataTest)
339
+ parts.push(`data-test="${dataTest}"`);
340
+ if (placeholder)
341
+ parts.push(`placeholder="${placeholder}"`);
342
+ if (href && href !== '#')
343
+ parts.push(`href="${href.slice(0, 50)}"`);
344
+ if (text)
345
+ parts.push(`text="${text}"`);
346
+ result.push(parts.join(' | '));
347
+ });
348
+ return result.slice(0, 60).join('\n');
349
+ });
350
+ return elements || '(no elements found)';
351
+ }
352
+ catch {
353
+ return '(could not extract elements)';
354
+ }
355
+ }
296
356
  async function aiStepFallback(page, stepText) {
297
357
  const openaiKey = process.env.OPENAI_API_KEY;
298
358
  if (!openaiKey)
299
359
  throw new Error(`No OPENAI_API_KEY — cannot use AI fallback for: "${stepText}"`);
300
360
  const { default: OpenAI } = await import('openai');
301
361
  const openai = new OpenAI({ apiKey: openaiKey });
302
- const MAX_ROUNDS = 6; // prevent infinite loops
362
+ const MAX_ROUNDS = 6;
363
+ let consecutiveFailures = 0;
303
364
  for (let round = 0; round < MAX_ROUNDS; round++) {
365
+ // Extract real DOM elements so AI uses accurate selectors
366
+ const domElements = await extractDomElements(page);
304
367
  const screenshotBuffer = await page.screenshot({ type: 'png' });
305
368
  const base64 = screenshotBuffer.toString('base64');
306
369
  const response = await openai.chat.completions.create({
@@ -309,16 +372,20 @@ async function aiStepFallback(page, stepText) {
309
372
  messages: [{
310
373
  role: 'user',
311
374
  content: [
312
- { type: 'text', text: buildAgentPrompt(stepText, round) },
375
+ {
376
+ type: 'text',
377
+ text: buildAgentPrompt(stepText, round) + `
378
+
379
+ ## REAL DOM ELEMENTS ON THIS PAGE
380
+ Use these actual elements — prefer id, data-test, name, aria-label over guessing:
381
+ ${domElements}`
382
+ },
313
383
  { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
314
384
  ]
315
385
  }]
316
386
  });
317
387
  const raw = (response.choices[0]?.message?.content || '')
318
- .trim()
319
- .replace(/```json\n?/gi, '')
320
- .replace(/```/g, '')
321
- .trim();
388
+ .trim().replace(/```json\n?/gi, '').replace(/```/g, '').trim();
322
389
  if (!raw)
323
390
  throw new Error(`AI returned empty response for: "${stepText}"`);
324
391
  const plan = JSON.parse(raw);
@@ -327,16 +394,19 @@ async function aiStepFallback(page, stepText) {
327
394
  console.log(` ✅ AI agent completed: "${stepText}"`);
328
395
  return;
329
396
  }
330
- // Execute each action in this round
397
+ let anySucceeded = false;
331
398
  for (const act of plan.actions) {
332
399
  console.log(` 🤖 Executing: ${JSON.stringify(act)}`);
333
400
  try {
334
401
  if (act.action === 'click') {
335
- await page.locator(act.selector).first().click({ timeout: 10000 });
336
- await page.waitForTimeout(500); // brief settle
402
+ // Try multiple selector variants derived from the DOM
403
+ const tried = await tryAIClick(page, act.selector);
404
+ if (!tried)
405
+ throw new Error(`Could not find element: ${act.selector}`);
406
+ await page.waitForTimeout(500);
337
407
  }
338
408
  else if (act.action === 'fill') {
339
- await page.locator(act.selector).first().fill(act.value || '');
409
+ await tryAIFill(page, act.selector, act.value || '');
340
410
  }
341
411
  else if (act.action === 'select') {
342
412
  await page.locator(act.selector).first().selectOption({ label: act.value });
@@ -354,23 +424,77 @@ async function aiStepFallback(page, stepText) {
354
424
  }
355
425
  else if (act.action === 'scroll') {
356
426
  if (act.selector) {
357
- await page.locator(act.selector).first().scrollIntoViewIfNeeded();
427
+ await page.locator(act.selector).first().scrollIntoViewIfNeeded().catch(() => { });
358
428
  }
359
429
  else {
360
430
  await page.evaluate(() => window.scrollBy(0, 400));
361
431
  }
362
432
  }
433
+ anySucceeded = true;
434
+ consecutiveFailures = 0;
363
435
  }
364
436
  catch (e) {
365
- console.log(` ⚠️ Action failed (${e.message}), continuing to next round...`);
437
+ console.log(` ⚠️ Action failed (${e.message.split('\n')[0]})`);
438
+ consecutiveFailures++;
439
+ }
440
+ }
441
+ // If 2 consecutive rounds had zero successes, bail early
442
+ if (!anySucceeded) {
443
+ if (consecutiveFailures >= plan.actions.length * 2) {
444
+ throw new Error(`AI agent stuck — no actions succeeded after multiple rounds for: "${stepText}"`);
366
445
  }
367
446
  }
368
- // Wait for page to settle before next round
369
447
  await page.waitForLoadState('domcontentloaded').catch(() => { });
370
448
  await page.waitForTimeout(300);
371
449
  }
372
450
  throw new Error(`AI agent could not complete "${stepText}" within ${MAX_ROUNDS} rounds`);
373
451
  }
452
+ /** Try clicking with multiple selector strategies derived from AI suggestion */
453
+ async function tryAIClick(page, selector) {
454
+ const TIMEOUT = 5000;
455
+ // Build fallback variants: the AI selector + text-based alternatives
456
+ const textMatch = selector.match(/:has-text\("([^"]+)"\)|:text\("([^"]+)"\)/);
457
+ const text = textMatch ? (textMatch[1] || textMatch[2]) : null;
458
+ const strategies = [
459
+ () => page.locator(selector).first().click({ timeout: TIMEOUT }),
460
+ ...(text ? [
461
+ () => page.getByRole('button', { name: new RegExp(text, 'i') }).first().click({ timeout: TIMEOUT }),
462
+ () => page.getByRole('link', { name: new RegExp(text, 'i') }).first().click({ timeout: TIMEOUT }),
463
+ () => page.getByText(new RegExp(text, 'i')).first().click({ timeout: TIMEOUT }),
464
+ () => page.locator(`[class*="${text.toLowerCase().replace(/\s+/g, '_')}"]`).first().click({ timeout: TIMEOUT }),
465
+ ] : []),
466
+ ];
467
+ for (const fn of strategies) {
468
+ try {
469
+ await fn();
470
+ return true;
471
+ }
472
+ catch { /* next */ }
473
+ }
474
+ return false;
475
+ }
476
+ /** Try filling with multiple selector strategies */
477
+ async function tryAIFill(page, selector, value) {
478
+ const TIMEOUT = 5000;
479
+ const placeholderMatch = selector.match(/\[placeholder="([^"]+)"\]/);
480
+ const nameMatch = selector.match(/\[name="([^"]+)"\]|#([\w-]+)/);
481
+ const label = placeholderMatch?.[1] || nameMatch?.[1] || nameMatch?.[2] || '';
482
+ const strategies = [
483
+ () => page.locator(selector).first().fill(value),
484
+ ...(label ? [
485
+ () => page.getByPlaceholder(new RegExp(label, 'i')).first().fill(value),
486
+ () => page.getByLabel(new RegExp(label, 'i')).first().fill(value),
487
+ ] : []),
488
+ ];
489
+ for (const fn of strategies) {
490
+ try {
491
+ await Promise.race([fn(), new Promise((_, r) => setTimeout(() => r(new Error('timeout')), TIMEOUT))]);
492
+ return;
493
+ }
494
+ catch { /* next */ }
495
+ }
496
+ throw new Error(`Could not fill element: ${selector}`);
497
+ }
374
498
  /** Use OpenAI vision to identify the element and generate a selector, then click it */
375
499
  async function aiClickFallback(page, description) {
376
500
  const openaiKey = process.env.OPENAI_API_KEY;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cutleryapp/agent",
3
- "version": "1.0.21",
3
+ "version": "1.0.22",
4
4
  "description": "Local agent that connects your machine to the Cutlery QA platform and runs UI tests via Playwright",
5
5
  "main": "dist/cli.js",
6
6
  "bin": {