@cutleryapp/agent 1.0.20 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/mcp-executor.js +225 -21
  2. package/package.json +1 -1
@@ -60,29 +60,109 @@ class TestExecutor {
60
60
  });
61
61
  let stepError;
62
62
  try {
63
- // Navigate is handled directly — URL extraction doesn't need vision
63
+ let handled = false;
64
+ // 1. Navigate — direct URL goto, no selector needed
64
65
  if (lower.includes("navigate to") || lower.includes("go to")) {
65
66
  const urlMatch = raw.match(/(?:navigate\s+to|go\s+to)\s+(https?:\/\/\S+|\/\S*|\S+\.\S+)/i);
66
67
  if (urlMatch) {
67
68
  let url = urlMatch[1].trim();
68
- if (url.startsWith("/") && this.options.baseUrl) {
69
+ if (url.startsWith("/") && this.options.baseUrl)
69
70
  url = this.options.baseUrl.replace(/\/$/, "") + url;
70
- }
71
71
  await page.goto(url, { waitUntil: "domcontentloaded", timeout: 30000 });
72
+ handled = true;
72
73
  }
73
74
  else if (this.options.baseUrl) {
74
75
  await page.goto(this.options.baseUrl, { waitUntil: "domcontentloaded", timeout: 30000 });
76
+ handled = true;
75
77
  }
76
78
  }
77
- else {
78
- // ALL other steps: AI reads the screen and performs the action
79
- console.log(` 🤖 AI executing: "${raw}"`);
79
+ // 2. Click — smart selector strategies via MCP/Playwright
80
+ if (!handled && lower.includes("click")) {
81
+ const labelMatch = raw.match(/click\s+(?:on\s+)?(?:the\s+)?"?([^"]+?)"?(?:\s+(?:button|link|tab))?$/i);
82
+ const label = labelMatch?.[1]?.trim();
83
+ if (label) {
84
+ const scopeMatch = label.match(/^(.+?)\s+(?:under|inside|within|in the|in)\s+(.+)$/i);
85
+ const target = scopeMatch ? scopeMatch[1].trim() : label;
86
+ const scope = scopeMatch ? scopeMatch[2].trim() : null;
87
+ const nameRe = new RegExp(escapeRegex(target), 'i');
88
+ const clicked = scope
89
+ ? await tryClickScoped(page, nameRe, target, scope)
90
+ : await tryClick(page, nameRe, target);
91
+ if (clicked)
92
+ handled = true;
93
+ }
94
+ }
95
+ // 3. Fill — smart selector strategies via MCP/Playwright
96
+ if (!handled && (lower.includes("fill") || lower.includes("type") || lower.includes("enter"))) {
97
+ const match = raw.match(/(?:enter|fill|type)\s+"([^"]+)"\s+(?:in|into)\s+(?:the\s+)?"?([^"]+?)"?\s*(?:field|input|box|area)?\s*$/i) ||
98
+ raw.match(/(?:enter|fill|type)\s+(\S+)\s+(?:in|into)\s+(?:the\s+)?(.+?)\s*(?:field|input|box|area)?\s*$/i);
99
+ if (match) {
100
+ const value = match[1].trim();
101
+ const fieldLabel = match[2].trim();
102
+ const looksLikeCss = (s) => /[#.\[\]:>]/.test(s);
103
+ if (looksLikeCss(fieldLabel)) {
104
+ await page.waitForSelector(fieldLabel, { state: "visible", timeout: 5000 });
105
+ await page.fill(fieldLabel, value);
106
+ }
107
+ else {
108
+ await tryFill(page, fieldLabel, value);
109
+ }
110
+ handled = true;
111
+ }
112
+ }
113
+ // 4. Verify — check page text
114
+ if (!handled && (lower.includes("verify") || lower.includes("assert") || lower.includes("check") || lower.includes("should"))) {
115
+ const isNegative = /not\s+(?:displayed|visible|present)/i.test(raw);
116
+ const textMatch = raw.match(/"([^"]+)"/) ||
117
+ raw.match(/(?:verify|check|assert)\s+(?:i\s+see\s+(?:text\s+)?|text\s+)?(.+?)(?:\s+is\s+(?:not\s+)?(?:displayed|visible|present))?$/i);
118
+ if (textMatch) {
119
+ const expected = textMatch[1].trim();
120
+ if (isNegative) {
121
+ const content = await page.textContent('body') || '';
122
+ if (content.includes(expected))
123
+ throw new Error(`Text "${expected}" should NOT be visible`);
124
+ }
125
+ else {
126
+ await page.waitForFunction((t) => document.body.innerText.includes(t), expected, { timeout: 10000 });
127
+ }
128
+ handled = true;
129
+ }
130
+ }
131
+ // 5. Wait — simple timeout
132
+ if (!handled && lower.includes("wait")) {
133
+ const ms = raw.match(/wait\s+(\d+)\s*(?:second|ms|millisecond)/i);
134
+ if (ms) {
135
+ await page.waitForTimeout(parseInt(ms[1]) * (raw.toLowerCase().includes('ms') ? 1 : 1000));
136
+ handled = true;
137
+ }
138
+ }
139
+ // 6. Select — dropdown
140
+ if (!handled && (lower.includes("select") || lower.includes("choose"))) {
141
+ const selMatch = raw.match(/select\s+"?([^"]+?)"?\s+(?:from|in)\s+"?([^"]+?)"?\s*(?:dropdown|select|field)?$/i);
142
+ if (selMatch) {
143
+ try {
144
+ await page.selectOption(selMatch[2].trim(), { label: selMatch[1].trim() });
145
+ handled = true;
146
+ }
147
+ catch { /* fall to AI */ }
148
+ }
149
+ }
150
+ // 7. AI fallback — for anything not handled or ambiguous
151
+ if (!handled) {
152
+ console.log(` 🤖 MCP could not handle step, using AI: "${raw}"`);
80
153
  await aiStepFallback(page, raw);
81
154
  }
82
155
  }
83
156
  catch (err) {
84
- stepError = err.message;
85
- result.success = false;
157
+ // MCP execution failed — let AI try to recover
158
+ console.log(` ⚠️ MCP step failed (${err.message}), trying AI...`);
159
+ try {
160
+ await aiStepFallback(page, raw);
161
+ }
162
+ catch (aiErr) {
163
+ stepError = err.message;
164
+ result.success = false;
165
+ }
86
166
  }
87
167
  // Screenshot after each step
88
168
  let screenshotB64 = "";
@@ -213,14 +293,77 @@ Set "done": true with empty "actions" array when the goal is fully accomplished.
213
293
  * and returns a SEQUENCE of actions to accomplish it — then executes them one by one.
214
294
  * After each action it re-screenshots so the AI can verify progress and adapt.
215
295
  */
296
+ /** Extract real interactive elements from the DOM for AI selector accuracy */
297
+ async function extractDomElements(page) {
298
+ try {
299
+ const elements = await page.evaluate(() => {
300
+ const selectors = [
301
+ 'a', 'button', 'input', 'select', 'textarea',
302
+ '[role="button"]', '[role="link"]', '[role="tab"]',
303
+ '[role="menuitem"]', '[role="option"]', '[onclick]',
304
+ '[data-test]', '[data-testid]', '[data-cy]',
305
+ ];
306
+ const seen = new Set();
307
+ const result = [];
308
+ document.querySelectorAll(selectors.join(',')).forEach((el) => {
309
+ const rect = el.getBoundingClientRect();
310
+ if (rect.width === 0 || rect.height === 0)
311
+ return;
312
+ const style = window.getComputedStyle(el);
313
+ if (style.display === 'none' || style.visibility === 'hidden')
314
+ return;
315
+ const tag = el.tagName.toLowerCase();
316
+ const id = el.id ? `#${el.id}` : '';
317
+ const cls = Array.from(el.classList).slice(0, 3).map(c => `.${c}`).join('');
318
+ const text = (el.textContent || '').trim().slice(0, 40);
319
+ const ariaLabel = el.getAttribute('aria-label') || '';
320
+ const dataTest = el.getAttribute('data-test') || el.getAttribute('data-testid') || el.getAttribute('data-cy') || '';
321
+ const placeholder = el.getAttribute('placeholder') || '';
322
+ const name = el.getAttribute('name') || '';
323
+ const type = el.getAttribute('type') || '';
324
+ const href = el.getAttribute('href') || '';
325
+ const key = `${tag}${id}${text}`;
326
+ if (seen.has(key))
327
+ return;
328
+ seen.add(key);
329
+ const parts = [tag];
330
+ if (id)
331
+ parts.push(`id="${el.id}"`);
332
+ if (name)
333
+ parts.push(`name="${name}"`);
334
+ if (type)
335
+ parts.push(`type="${type}"`);
336
+ if (ariaLabel)
337
+ parts.push(`aria-label="${ariaLabel}"`);
338
+ if (dataTest)
339
+ parts.push(`data-test="${dataTest}"`);
340
+ if (placeholder)
341
+ parts.push(`placeholder="${placeholder}"`);
342
+ if (href && href !== '#')
343
+ parts.push(`href="${href.slice(0, 50)}"`);
344
+ if (text)
345
+ parts.push(`text="${text}"`);
346
+ result.push(parts.join(' | '));
347
+ });
348
+ return result.slice(0, 60).join('\n');
349
+ });
350
+ return elements || '(no elements found)';
351
+ }
352
+ catch {
353
+ return '(could not extract elements)';
354
+ }
355
+ }
216
356
  async function aiStepFallback(page, stepText) {
217
357
  const openaiKey = process.env.OPENAI_API_KEY;
218
358
  if (!openaiKey)
219
359
  throw new Error(`No OPENAI_API_KEY — cannot use AI fallback for: "${stepText}"`);
220
360
  const { default: OpenAI } = await import('openai');
221
361
  const openai = new OpenAI({ apiKey: openaiKey });
222
- const MAX_ROUNDS = 6; // prevent infinite loops
362
+ const MAX_ROUNDS = 6;
363
+ let consecutiveFailures = 0;
223
364
  for (let round = 0; round < MAX_ROUNDS; round++) {
365
+ // Extract real DOM elements so AI uses accurate selectors
366
+ const domElements = await extractDomElements(page);
224
367
  const screenshotBuffer = await page.screenshot({ type: 'png' });
225
368
  const base64 = screenshotBuffer.toString('base64');
226
369
  const response = await openai.chat.completions.create({
@@ -229,16 +372,20 @@ async function aiStepFallback(page, stepText) {
229
372
  messages: [{
230
373
  role: 'user',
231
374
  content: [
232
- { type: 'text', text: buildAgentPrompt(stepText, round) },
375
+ {
376
+ type: 'text',
377
+ text: buildAgentPrompt(stepText, round) + `
378
+
379
+ ## REAL DOM ELEMENTS ON THIS PAGE
380
+ Use these actual elements — prefer id, data-test, name, aria-label over guessing:
381
+ ${domElements}`
382
+ },
233
383
  { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
234
384
  ]
235
385
  }]
236
386
  });
237
387
  const raw = (response.choices[0]?.message?.content || '')
238
- .trim()
239
- .replace(/```json\n?/gi, '')
240
- .replace(/```/g, '')
241
- .trim();
388
+ .trim().replace(/```json\n?/gi, '').replace(/```/g, '').trim();
242
389
  if (!raw)
243
390
  throw new Error(`AI returned empty response for: "${stepText}"`);
244
391
  const plan = JSON.parse(raw);
@@ -247,16 +394,19 @@ async function aiStepFallback(page, stepText) {
247
394
  console.log(` ✅ AI agent completed: "${stepText}"`);
248
395
  return;
249
396
  }
250
- // Execute each action in this round
397
+ let anySucceeded = false;
251
398
  for (const act of plan.actions) {
252
399
  console.log(` 🤖 Executing: ${JSON.stringify(act)}`);
253
400
  try {
254
401
  if (act.action === 'click') {
255
- await page.locator(act.selector).first().click({ timeout: 10000 });
256
- await page.waitForTimeout(500); // brief settle
402
+ // Try multiple selector variants derived from the DOM
403
+ const tried = await tryAIClick(page, act.selector);
404
+ if (!tried)
405
+ throw new Error(`Could not find element: ${act.selector}`);
406
+ await page.waitForTimeout(500);
257
407
  }
258
408
  else if (act.action === 'fill') {
259
- await page.locator(act.selector).first().fill(act.value || '');
409
+ await tryAIFill(page, act.selector, act.value || '');
260
410
  }
261
411
  else if (act.action === 'select') {
262
412
  await page.locator(act.selector).first().selectOption({ label: act.value });
@@ -274,23 +424,77 @@ async function aiStepFallback(page, stepText) {
274
424
  }
275
425
  else if (act.action === 'scroll') {
276
426
  if (act.selector) {
277
- await page.locator(act.selector).first().scrollIntoViewIfNeeded();
427
+ await page.locator(act.selector).first().scrollIntoViewIfNeeded().catch(() => { });
278
428
  }
279
429
  else {
280
430
  await page.evaluate(() => window.scrollBy(0, 400));
281
431
  }
282
432
  }
433
+ anySucceeded = true;
434
+ consecutiveFailures = 0;
283
435
  }
284
436
  catch (e) {
285
- console.log(` ⚠️ Action failed (${e.message}), continuing to next round...`);
437
+ console.log(` ⚠️ Action failed (${e.message.split('\n')[0]})`);
438
+ consecutiveFailures++;
439
+ }
440
+ }
441
+ // If 2 consecutive rounds had zero successes, bail early
442
+ if (!anySucceeded) {
443
+ if (consecutiveFailures >= plan.actions.length * 2) {
444
+ throw new Error(`AI agent stuck — no actions succeeded after multiple rounds for: "${stepText}"`);
286
445
  }
287
446
  }
288
- // Wait for page to settle before next round
289
447
  await page.waitForLoadState('domcontentloaded').catch(() => { });
290
448
  await page.waitForTimeout(300);
291
449
  }
292
450
  throw new Error(`AI agent could not complete "${stepText}" within ${MAX_ROUNDS} rounds`);
293
451
  }
452
+ /** Try clicking with multiple selector strategies derived from AI suggestion */
453
+ async function tryAIClick(page, selector) {
454
+ const TIMEOUT = 5000;
455
+ // Build fallback variants: the AI selector + text-based alternatives
456
+ const textMatch = selector.match(/:has-text\("([^"]+)"\)|:text\("([^"]+)"\)/);
457
+ const text = textMatch ? (textMatch[1] || textMatch[2]) : null;
458
+ const strategies = [
459
+ () => page.locator(selector).first().click({ timeout: TIMEOUT }),
460
+ ...(text ? [
461
+ () => page.getByRole('button', { name: new RegExp(text, 'i') }).first().click({ timeout: TIMEOUT }),
462
+ () => page.getByRole('link', { name: new RegExp(text, 'i') }).first().click({ timeout: TIMEOUT }),
463
+ () => page.getByText(new RegExp(text, 'i')).first().click({ timeout: TIMEOUT }),
464
+ () => page.locator(`[class*="${text.toLowerCase().replace(/\s+/g, '_')}"]`).first().click({ timeout: TIMEOUT }),
465
+ ] : []),
466
+ ];
467
+ for (const fn of strategies) {
468
+ try {
469
+ await fn();
470
+ return true;
471
+ }
472
+ catch { /* next */ }
473
+ }
474
+ return false;
475
+ }
476
+ /** Try filling with multiple selector strategies */
477
+ async function tryAIFill(page, selector, value) {
478
+ const TIMEOUT = 5000;
479
+ const placeholderMatch = selector.match(/\[placeholder="([^"]+)"\]/);
480
+ const nameMatch = selector.match(/\[name="([^"]+)"\]|#([\w-]+)/);
481
+ const label = placeholderMatch?.[1] || nameMatch?.[1] || nameMatch?.[2] || '';
482
+ const strategies = [
483
+ () => page.locator(selector).first().fill(value),
484
+ ...(label ? [
485
+ () => page.getByPlaceholder(new RegExp(label, 'i')).first().fill(value),
486
+ () => page.getByLabel(new RegExp(label, 'i')).first().fill(value),
487
+ ] : []),
488
+ ];
489
+ for (const fn of strategies) {
490
+ try {
491
+ await Promise.race([fn(), new Promise((_, r) => setTimeout(() => r(new Error('timeout')), TIMEOUT))]);
492
+ return;
493
+ }
494
+ catch { /* next */ }
495
+ }
496
+ throw new Error(`Could not fill element: ${selector}`);
497
+ }
294
498
  /** Use OpenAI vision to identify the element and generate a selector, then click it */
295
499
  async function aiClickFallback(page, description) {
296
500
  const openaiKey = process.env.OPENAI_API_KEY;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cutleryapp/agent",
3
- "version": "1.0.20",
3
+ "version": "1.0.22",
4
4
  "description": "Local agent that connects your machine to the Cutlery QA platform and runs UI tests via Playwright",
5
5
  "main": "dist/cli.js",
6
6
  "bin": {