@cutleryapp/agent 1.0.21 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/mcp-executor.js +220 -19
  2. package/package.json +1 -1
@@ -147,17 +147,24 @@ class TestExecutor {
147
147
  catch { /* fall to AI */ }
148
148
  }
149
149
  }
150
- // 7. AI fallback — for anything not handled or ambiguous
150
+ // 7. AI — single-shot for deterministic steps, full loop for intent steps
151
151
  if (!handled) {
152
- console.log(` 🤖 MCP could not handle step, using AI: "${raw}"`);
153
- await aiStepFallback(page, raw);
152
+ const isDeterministic = /^(click|fill|enter|type|verify|check|assert|select|choose|wait|hover|scroll)/i.test(lower.trim());
153
+ if (isDeterministic) {
154
+ console.log(` 🤖 Quick AI selector lookup for: "${raw}"`);
155
+ await aiSingleShot(page, raw);
156
+ }
157
+ else {
158
+ console.log(` 🤖 AI intent loop for: "${raw}"`);
159
+ await aiStepFallback(page, raw);
160
+ }
154
161
  }
155
162
  }
156
163
  catch (err) {
157
- // MCP execution failed — let AI try to recover
158
- console.log(` ⚠️ MCP step failed (${err.message}), trying AI...`);
164
+ // MCP execution failed — single-shot AI recovery, no loop
165
+ console.log(` ⚠️ MCP step failed (${err.message.split('\n')[0]}), trying AI...`);
159
166
  try {
160
- await aiStepFallback(page, raw);
167
+ await aiSingleShot(page, raw);
161
168
  }
162
169
  catch (aiErr) {
163
170
  stepError = err.message;
@@ -293,14 +300,147 @@ Set "done": true with empty "actions" array when the goal is fully accomplished.
293
300
  * and returns a SEQUENCE of actions to accomplish it — then executes them one by one.
294
301
  * After each action it re-screenshots so the AI can verify progress and adapt.
295
302
  */
303
+ /** Single-shot AI: one DOM extract + screenshot → one action → done. No looping. */
304
+ async function aiSingleShot(page, stepText) {
305
+ const openaiKey = process.env.OPENAI_API_KEY;
306
+ if (!openaiKey)
307
+ throw new Error(`No OPENAI_API_KEY for: "${stepText}"`);
308
+ const { default: OpenAI } = await import('openai');
309
+ const openai = new OpenAI({ apiKey: openaiKey });
310
+ const domElements = await extractDomElements(page);
311
+ const screenshotBuffer = await page.screenshot({ type: 'png' });
312
+ const base64 = screenshotBuffer.toString('base64');
313
+ const response = await openai.chat.completions.create({
314
+ model: 'gpt-4o',
315
+ max_tokens: 300,
316
+ messages: [{
317
+ role: 'user',
318
+ content: [
319
+ {
320
+ type: 'text',
321
+ text: `You are a Playwright selector expert. Given this test step and the current page, return a single JSON action.
322
+
323
+ Step: "${stepText}"
324
+
325
+ ## REAL PAGE ELEMENTS (use these — do NOT guess selectors):
326
+ ${domElements}
327
+
328
+ Return ONLY valid JSON, one of:
329
+ {"action":"click","selector":"EXACT_SELECTOR"}
330
+ {"action":"fill","selector":"EXACT_SELECTOR","value":"VALUE"}
331
+ {"action":"verify","text":"TEXT_TO_CHECK","not":false}
332
+ {"action":"select","selector":"EXACT_SELECTOR","value":"OPTION"}
333
+ {"action":"wait","ms":1000}
334
+
335
+ Rules:
336
+ - Pick selector from the DOM list above using id, name, data-test, aria-label, class exactly as shown
337
+ - For "icon" steps: find element whose class/id/data-test contains the icon keyword
338
+ - For verify: check if text appears in page body`
339
+ },
340
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
341
+ ]
342
+ }]
343
+ });
344
+ const raw2 = (response.choices[0]?.message?.content || '')
345
+ .trim().replace(/```json\n?/gi, '').replace(/```/g, '').trim();
346
+ if (!raw2 || raw2 === 'NOT_FOUND')
347
+ throw new Error(`AI could not find element for: "${stepText}"`);
348
+ const act = JSON.parse(raw2);
349
+ console.log(` 🤖 AI action: ${JSON.stringify(act)}`);
350
+ if (act.action === 'click') {
351
+ const ok = await tryAIClick(page, act.selector);
352
+ if (!ok)
353
+ throw new Error(`AI click failed: ${act.selector}`);
354
+ }
355
+ else if (act.action === 'fill') {
356
+ await tryAIFill(page, act.selector, act.value || '');
357
+ }
358
+ else if (act.action === 'verify') {
359
+ const content = await page.textContent('body') || '';
360
+ const found = content.includes(act.text);
361
+ if (act.not && found)
362
+ throw new Error(`Text "${act.text}" should NOT be visible`);
363
+ if (!act.not && !found)
364
+ throw new Error(`Expected text not found: "${act.text}"`);
365
+ }
366
+ else if (act.action === 'select') {
367
+ await page.locator(act.selector).first().selectOption({ label: act.value });
368
+ }
369
+ else if (act.action === 'wait') {
370
+ await page.waitForTimeout(act.ms || 1000);
371
+ }
372
+ }
373
+ /** Extract real interactive elements from the DOM for AI selector accuracy */
374
+ async function extractDomElements(page) {
375
+ try {
376
+ const elements = await page.evaluate(() => {
377
+ const selectors = [
378
+ 'a', 'button', 'input', 'select', 'textarea',
379
+ '[role="button"]', '[role="link"]', '[role="tab"]',
380
+ '[role="menuitem"]', '[role="option"]', '[onclick]',
381
+ '[data-test]', '[data-testid]', '[data-cy]',
382
+ ];
383
+ const seen = new Set();
384
+ const result = [];
385
+ document.querySelectorAll(selectors.join(',')).forEach((el) => {
386
+ const rect = el.getBoundingClientRect();
387
+ if (rect.width === 0 || rect.height === 0)
388
+ return;
389
+ const style = window.getComputedStyle(el);
390
+ if (style.display === 'none' || style.visibility === 'hidden')
391
+ return;
392
+ const tag = el.tagName.toLowerCase();
393
+ const id = el.id ? `#${el.id}` : '';
394
+ const cls = Array.from(el.classList).slice(0, 3).map(c => `.${c}`).join('');
395
+ const text = (el.textContent || '').trim().slice(0, 40);
396
+ const ariaLabel = el.getAttribute('aria-label') || '';
397
+ const dataTest = el.getAttribute('data-test') || el.getAttribute('data-testid') || el.getAttribute('data-cy') || '';
398
+ const placeholder = el.getAttribute('placeholder') || '';
399
+ const name = el.getAttribute('name') || '';
400
+ const type = el.getAttribute('type') || '';
401
+ const href = el.getAttribute('href') || '';
402
+ const key = `${tag}${id}${text}`;
403
+ if (seen.has(key))
404
+ return;
405
+ seen.add(key);
406
+ const parts = [tag];
407
+ if (id)
408
+ parts.push(`id="${el.id}"`);
409
+ if (name)
410
+ parts.push(`name="${name}"`);
411
+ if (type)
412
+ parts.push(`type="${type}"`);
413
+ if (ariaLabel)
414
+ parts.push(`aria-label="${ariaLabel}"`);
415
+ if (dataTest)
416
+ parts.push(`data-test="${dataTest}"`);
417
+ if (placeholder)
418
+ parts.push(`placeholder="${placeholder}"`);
419
+ if (href && href !== '#')
420
+ parts.push(`href="${href.slice(0, 50)}"`);
421
+ if (text)
422
+ parts.push(`text="${text}"`);
423
+ result.push(parts.join(' | '));
424
+ });
425
+ return result.slice(0, 60).join('\n');
426
+ });
427
+ return elements || '(no elements found)';
428
+ }
429
+ catch {
430
+ return '(could not extract elements)';
431
+ }
432
+ }
296
433
  async function aiStepFallback(page, stepText) {
297
434
  const openaiKey = process.env.OPENAI_API_KEY;
298
435
  if (!openaiKey)
299
436
  throw new Error(`No OPENAI_API_KEY — cannot use AI fallback for: "${stepText}"`);
300
437
  const { default: OpenAI } = await import('openai');
301
438
  const openai = new OpenAI({ apiKey: openaiKey });
302
- const MAX_ROUNDS = 6; // prevent infinite loops
439
+ const MAX_ROUNDS = 6;
440
+ let consecutiveFailures = 0;
303
441
  for (let round = 0; round < MAX_ROUNDS; round++) {
442
+ // Extract real DOM elements so AI uses accurate selectors
443
+ const domElements = await extractDomElements(page);
304
444
  const screenshotBuffer = await page.screenshot({ type: 'png' });
305
445
  const base64 = screenshotBuffer.toString('base64');
306
446
  const response = await openai.chat.completions.create({
@@ -309,16 +449,20 @@ async function aiStepFallback(page, stepText) {
309
449
  messages: [{
310
450
  role: 'user',
311
451
  content: [
312
- { type: 'text', text: buildAgentPrompt(stepText, round) },
452
+ {
453
+ type: 'text',
454
+ text: buildAgentPrompt(stepText, round) + `
455
+
456
+ ## REAL DOM ELEMENTS ON THIS PAGE
457
+ Use these actual elements — prefer id, data-test, name, aria-label over guessing:
458
+ ${domElements}`
459
+ },
313
460
  { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
314
461
  ]
315
462
  }]
316
463
  });
317
464
  const raw = (response.choices[0]?.message?.content || '')
318
- .trim()
319
- .replace(/```json\n?/gi, '')
320
- .replace(/```/g, '')
321
- .trim();
465
+ .trim().replace(/```json\n?/gi, '').replace(/```/g, '').trim();
322
466
  if (!raw)
323
467
  throw new Error(`AI returned empty response for: "${stepText}"`);
324
468
  const plan = JSON.parse(raw);
@@ -327,16 +471,19 @@ async function aiStepFallback(page, stepText) {
327
471
  console.log(` ✅ AI agent completed: "${stepText}"`);
328
472
  return;
329
473
  }
330
- // Execute each action in this round
474
+ let anySucceeded = false;
331
475
  for (const act of plan.actions) {
332
476
  console.log(` 🤖 Executing: ${JSON.stringify(act)}`);
333
477
  try {
334
478
  if (act.action === 'click') {
335
- await page.locator(act.selector).first().click({ timeout: 10000 });
336
- await page.waitForTimeout(500); // brief settle
479
+ // Try multiple selector variants derived from the DOM
480
+ const tried = await tryAIClick(page, act.selector);
481
+ if (!tried)
482
+ throw new Error(`Could not find element: ${act.selector}`);
483
+ await page.waitForTimeout(500);
337
484
  }
338
485
  else if (act.action === 'fill') {
339
- await page.locator(act.selector).first().fill(act.value || '');
486
+ await tryAIFill(page, act.selector, act.value || '');
340
487
  }
341
488
  else if (act.action === 'select') {
342
489
  await page.locator(act.selector).first().selectOption({ label: act.value });
@@ -354,23 +501,77 @@ async function aiStepFallback(page, stepText) {
354
501
  }
355
502
  else if (act.action === 'scroll') {
356
503
  if (act.selector) {
357
- await page.locator(act.selector).first().scrollIntoViewIfNeeded();
504
+ await page.locator(act.selector).first().scrollIntoViewIfNeeded().catch(() => { });
358
505
  }
359
506
  else {
360
507
  await page.evaluate(() => window.scrollBy(0, 400));
361
508
  }
362
509
  }
510
+ anySucceeded = true;
511
+ consecutiveFailures = 0;
363
512
  }
364
513
  catch (e) {
365
- console.log(` ⚠️ Action failed (${e.message}), continuing to next round...`);
514
+ console.log(` ⚠️ Action failed (${e.message.split('\n')[0]})`);
515
+ consecutiveFailures++;
516
+ }
517
+ }
518
+ // If 2 consecutive rounds had zero successes, bail early
519
+ if (!anySucceeded) {
520
+ if (consecutiveFailures >= plan.actions.length * 2) {
521
+ throw new Error(`AI agent stuck — no actions succeeded after multiple rounds for: "${stepText}"`);
366
522
  }
367
523
  }
368
- // Wait for page to settle before next round
369
524
  await page.waitForLoadState('domcontentloaded').catch(() => { });
370
525
  await page.waitForTimeout(300);
371
526
  }
372
527
  throw new Error(`AI agent could not complete "${stepText}" within ${MAX_ROUNDS} rounds`);
373
528
  }
529
+ /** Try clicking with multiple selector strategies derived from AI suggestion */
530
+ async function tryAIClick(page, selector) {
531
+ const TIMEOUT = 5000;
532
+ // Build fallback variants: the AI selector + text-based alternatives
533
+ const textMatch = selector.match(/:has-text\("([^"]+)"\)|:text\("([^"]+)"\)/);
534
+ const text = textMatch ? (textMatch[1] || textMatch[2]) : null;
535
+ const strategies = [
536
+ () => page.locator(selector).first().click({ timeout: TIMEOUT }),
537
+ ...(text ? [
538
+ () => page.getByRole('button', { name: new RegExp(text, 'i') }).first().click({ timeout: TIMEOUT }),
539
+ () => page.getByRole('link', { name: new RegExp(text, 'i') }).first().click({ timeout: TIMEOUT }),
540
+ () => page.getByText(new RegExp(text, 'i')).first().click({ timeout: TIMEOUT }),
541
+ () => page.locator(`[class*="${text.toLowerCase().replace(/\s+/g, '_')}"]`).first().click({ timeout: TIMEOUT }),
542
+ ] : []),
543
+ ];
544
+ for (const fn of strategies) {
545
+ try {
546
+ await fn();
547
+ return true;
548
+ }
549
+ catch { /* next */ }
550
+ }
551
+ return false;
552
+ }
553
+ /** Try filling with multiple selector strategies */
554
+ async function tryAIFill(page, selector, value) {
555
+ const TIMEOUT = 5000;
556
+ const placeholderMatch = selector.match(/\[placeholder="([^"]+)"\]/);
557
+ const nameMatch = selector.match(/\[name="([^"]+)"\]|#([\w-]+)/);
558
+ const label = placeholderMatch?.[1] || nameMatch?.[1] || nameMatch?.[2] || '';
559
+ const strategies = [
560
+ () => page.locator(selector).first().fill(value),
561
+ ...(label ? [
562
+ () => page.getByPlaceholder(new RegExp(label, 'i')).first().fill(value),
563
+ () => page.getByLabel(new RegExp(label, 'i')).first().fill(value),
564
+ ] : []),
565
+ ];
566
+ for (const fn of strategies) {
567
+ try {
568
+ await Promise.race([fn(), new Promise((_, r) => setTimeout(() => r(new Error('timeout')), TIMEOUT))]);
569
+ return;
570
+ }
571
+ catch { /* next */ }
572
+ }
573
+ throw new Error(`Could not fill element: ${selector}`);
574
+ }
374
575
  /** Use OpenAI vision to identify the element and generate a selector, then click it */
375
576
  async function aiClickFallback(page, description) {
376
577
  const openaiKey = process.env.OPENAI_API_KEY;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cutleryapp/agent",
3
- "version": "1.0.21",
3
+ "version": "1.0.23",
4
4
  "description": "Local agent that connects your machine to the Cutlery QA platform and runs UI tests via Playwright",
5
5
  "main": "dist/cli.js",
6
6
  "bin": {