@cutleryapp/agent 1.0.21 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-executor.js +137 -13
- package/package.json +1 -1
package/dist/mcp-executor.js
CHANGED
|
@@ -293,14 +293,77 @@ Set "done": true with empty "actions" array when the goal is fully accomplished.
|
|
|
293
293
|
* and returns a SEQUENCE of actions to accomplish it — then executes them one by one.
|
|
294
294
|
* After each action it re-screenshots so the AI can verify progress and adapt.
|
|
295
295
|
*/
|
|
296
|
+
/** Extract real interactive elements from the DOM for AI selector accuracy */
|
|
297
|
+
async function extractDomElements(page) {
|
|
298
|
+
try {
|
|
299
|
+
const elements = await page.evaluate(() => {
|
|
300
|
+
const selectors = [
|
|
301
|
+
'a', 'button', 'input', 'select', 'textarea',
|
|
302
|
+
'[role="button"]', '[role="link"]', '[role="tab"]',
|
|
303
|
+
'[role="menuitem"]', '[role="option"]', '[onclick]',
|
|
304
|
+
'[data-test]', '[data-testid]', '[data-cy]',
|
|
305
|
+
];
|
|
306
|
+
const seen = new Set();
|
|
307
|
+
const result = [];
|
|
308
|
+
document.querySelectorAll(selectors.join(',')).forEach((el) => {
|
|
309
|
+
const rect = el.getBoundingClientRect();
|
|
310
|
+
if (rect.width === 0 || rect.height === 0)
|
|
311
|
+
return;
|
|
312
|
+
const style = window.getComputedStyle(el);
|
|
313
|
+
if (style.display === 'none' || style.visibility === 'hidden')
|
|
314
|
+
return;
|
|
315
|
+
const tag = el.tagName.toLowerCase();
|
|
316
|
+
const id = el.id ? `#${el.id}` : '';
|
|
317
|
+
const cls = Array.from(el.classList).slice(0, 3).map(c => `.${c}`).join('');
|
|
318
|
+
const text = (el.textContent || '').trim().slice(0, 40);
|
|
319
|
+
const ariaLabel = el.getAttribute('aria-label') || '';
|
|
320
|
+
const dataTest = el.getAttribute('data-test') || el.getAttribute('data-testid') || el.getAttribute('data-cy') || '';
|
|
321
|
+
const placeholder = el.getAttribute('placeholder') || '';
|
|
322
|
+
const name = el.getAttribute('name') || '';
|
|
323
|
+
const type = el.getAttribute('type') || '';
|
|
324
|
+
const href = el.getAttribute('href') || '';
|
|
325
|
+
const key = `${tag}${id}${text}`;
|
|
326
|
+
if (seen.has(key))
|
|
327
|
+
return;
|
|
328
|
+
seen.add(key);
|
|
329
|
+
const parts = [tag];
|
|
330
|
+
if (id)
|
|
331
|
+
parts.push(`id="${el.id}"`);
|
|
332
|
+
if (name)
|
|
333
|
+
parts.push(`name="${name}"`);
|
|
334
|
+
if (type)
|
|
335
|
+
parts.push(`type="${type}"`);
|
|
336
|
+
if (ariaLabel)
|
|
337
|
+
parts.push(`aria-label="${ariaLabel}"`);
|
|
338
|
+
if (dataTest)
|
|
339
|
+
parts.push(`data-test="${dataTest}"`);
|
|
340
|
+
if (placeholder)
|
|
341
|
+
parts.push(`placeholder="${placeholder}"`);
|
|
342
|
+
if (href && href !== '#')
|
|
343
|
+
parts.push(`href="${href.slice(0, 50)}"`);
|
|
344
|
+
if (text)
|
|
345
|
+
parts.push(`text="${text}"`);
|
|
346
|
+
result.push(parts.join(' | '));
|
|
347
|
+
});
|
|
348
|
+
return result.slice(0, 60).join('\n');
|
|
349
|
+
});
|
|
350
|
+
return elements || '(no elements found)';
|
|
351
|
+
}
|
|
352
|
+
catch {
|
|
353
|
+
return '(could not extract elements)';
|
|
354
|
+
}
|
|
355
|
+
}
|
|
296
356
|
async function aiStepFallback(page, stepText) {
|
|
297
357
|
const openaiKey = process.env.OPENAI_API_KEY;
|
|
298
358
|
if (!openaiKey)
|
|
299
359
|
throw new Error(`No OPENAI_API_KEY — cannot use AI fallback for: "${stepText}"`);
|
|
300
360
|
const { default: OpenAI } = await import('openai');
|
|
301
361
|
const openai = new OpenAI({ apiKey: openaiKey });
|
|
302
|
-
const MAX_ROUNDS = 6;
|
|
362
|
+
const MAX_ROUNDS = 6;
|
|
363
|
+
let consecutiveFailures = 0;
|
|
303
364
|
for (let round = 0; round < MAX_ROUNDS; round++) {
|
|
365
|
+
// Extract real DOM elements so AI uses accurate selectors
|
|
366
|
+
const domElements = await extractDomElements(page);
|
|
304
367
|
const screenshotBuffer = await page.screenshot({ type: 'png' });
|
|
305
368
|
const base64 = screenshotBuffer.toString('base64');
|
|
306
369
|
const response = await openai.chat.completions.create({
|
|
@@ -309,16 +372,20 @@ async function aiStepFallback(page, stepText) {
|
|
|
309
372
|
messages: [{
|
|
310
373
|
role: 'user',
|
|
311
374
|
content: [
|
|
312
|
-
{
|
|
375
|
+
{
|
|
376
|
+
type: 'text',
|
|
377
|
+
text: buildAgentPrompt(stepText, round) + `
|
|
378
|
+
|
|
379
|
+
## REAL DOM ELEMENTS ON THIS PAGE
|
|
380
|
+
Use these actual elements — prefer id, data-test, name, aria-label over guessing:
|
|
381
|
+
${domElements}`
|
|
382
|
+
},
|
|
313
383
|
{ type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
|
|
314
384
|
]
|
|
315
385
|
}]
|
|
316
386
|
});
|
|
317
387
|
const raw = (response.choices[0]?.message?.content || '')
|
|
318
|
-
.trim()
|
|
319
|
-
.replace(/```json\n?/gi, '')
|
|
320
|
-
.replace(/```/g, '')
|
|
321
|
-
.trim();
|
|
388
|
+
.trim().replace(/```json\n?/gi, '').replace(/```/g, '').trim();
|
|
322
389
|
if (!raw)
|
|
323
390
|
throw new Error(`AI returned empty response for: "${stepText}"`);
|
|
324
391
|
const plan = JSON.parse(raw);
|
|
@@ -327,16 +394,19 @@ async function aiStepFallback(page, stepText) {
|
|
|
327
394
|
console.log(` ✅ AI agent completed: "${stepText}"`);
|
|
328
395
|
return;
|
|
329
396
|
}
|
|
330
|
-
|
|
397
|
+
let anySucceeded = false;
|
|
331
398
|
for (const act of plan.actions) {
|
|
332
399
|
console.log(` 🤖 Executing: ${JSON.stringify(act)}`);
|
|
333
400
|
try {
|
|
334
401
|
if (act.action === 'click') {
|
|
335
|
-
|
|
336
|
-
await page.
|
|
402
|
+
// Try multiple selector variants derived from the DOM
|
|
403
|
+
const tried = await tryAIClick(page, act.selector);
|
|
404
|
+
if (!tried)
|
|
405
|
+
throw new Error(`Could not find element: ${act.selector}`);
|
|
406
|
+
await page.waitForTimeout(500);
|
|
337
407
|
}
|
|
338
408
|
else if (act.action === 'fill') {
|
|
339
|
-
await page
|
|
409
|
+
await tryAIFill(page, act.selector, act.value || '');
|
|
340
410
|
}
|
|
341
411
|
else if (act.action === 'select') {
|
|
342
412
|
await page.locator(act.selector).first().selectOption({ label: act.value });
|
|
@@ -354,23 +424,77 @@ async function aiStepFallback(page, stepText) {
|
|
|
354
424
|
}
|
|
355
425
|
else if (act.action === 'scroll') {
|
|
356
426
|
if (act.selector) {
|
|
357
|
-
await page.locator(act.selector).first().scrollIntoViewIfNeeded();
|
|
427
|
+
await page.locator(act.selector).first().scrollIntoViewIfNeeded().catch(() => { });
|
|
358
428
|
}
|
|
359
429
|
else {
|
|
360
430
|
await page.evaluate(() => window.scrollBy(0, 400));
|
|
361
431
|
}
|
|
362
432
|
}
|
|
433
|
+
anySucceeded = true;
|
|
434
|
+
consecutiveFailures = 0;
|
|
363
435
|
}
|
|
364
436
|
catch (e) {
|
|
365
|
-
console.log(` ⚠️ Action failed (${e.message})
|
|
437
|
+
console.log(` ⚠️ Action failed (${e.message.split('\n')[0]})`);
|
|
438
|
+
consecutiveFailures++;
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
// If 2 consecutive rounds had zero successes, bail early
|
|
442
|
+
if (!anySucceeded) {
|
|
443
|
+
if (consecutiveFailures >= plan.actions.length * 2) {
|
|
444
|
+
throw new Error(`AI agent stuck — no actions succeeded after multiple rounds for: "${stepText}"`);
|
|
366
445
|
}
|
|
367
446
|
}
|
|
368
|
-
// Wait for page to settle before next round
|
|
369
447
|
await page.waitForLoadState('domcontentloaded').catch(() => { });
|
|
370
448
|
await page.waitForTimeout(300);
|
|
371
449
|
}
|
|
372
450
|
throw new Error(`AI agent could not complete "${stepText}" within ${MAX_ROUNDS} rounds`);
|
|
373
451
|
}
|
|
452
|
+
/** Try clicking with multiple selector strategies derived from AI suggestion */
|
|
453
|
+
async function tryAIClick(page, selector) {
|
|
454
|
+
const TIMEOUT = 5000;
|
|
455
|
+
// Build fallback variants: the AI selector + text-based alternatives
|
|
456
|
+
const textMatch = selector.match(/:has-text\("([^"]+)"\)|:text\("([^"]+)"\)/);
|
|
457
|
+
const text = textMatch ? (textMatch[1] || textMatch[2]) : null;
|
|
458
|
+
const strategies = [
|
|
459
|
+
() => page.locator(selector).first().click({ timeout: TIMEOUT }),
|
|
460
|
+
...(text ? [
|
|
461
|
+
() => page.getByRole('button', { name: new RegExp(text, 'i') }).first().click({ timeout: TIMEOUT }),
|
|
462
|
+
() => page.getByRole('link', { name: new RegExp(text, 'i') }).first().click({ timeout: TIMEOUT }),
|
|
463
|
+
() => page.getByText(new RegExp(text, 'i')).first().click({ timeout: TIMEOUT }),
|
|
464
|
+
() => page.locator(`[class*="${text.toLowerCase().replace(/\s+/g, '_')}"]`).first().click({ timeout: TIMEOUT }),
|
|
465
|
+
] : []),
|
|
466
|
+
];
|
|
467
|
+
for (const fn of strategies) {
|
|
468
|
+
try {
|
|
469
|
+
await fn();
|
|
470
|
+
return true;
|
|
471
|
+
}
|
|
472
|
+
catch { /* next */ }
|
|
473
|
+
}
|
|
474
|
+
return false;
|
|
475
|
+
}
|
|
476
|
+
/** Try filling with multiple selector strategies */
|
|
477
|
+
async function tryAIFill(page, selector, value) {
|
|
478
|
+
const TIMEOUT = 5000;
|
|
479
|
+
const placeholderMatch = selector.match(/\[placeholder="([^"]+)"\]/);
|
|
480
|
+
const nameMatch = selector.match(/\[name="([^"]+)"\]|#([\w-]+)/);
|
|
481
|
+
const label = placeholderMatch?.[1] || nameMatch?.[1] || nameMatch?.[2] || '';
|
|
482
|
+
const strategies = [
|
|
483
|
+
() => page.locator(selector).first().fill(value),
|
|
484
|
+
...(label ? [
|
|
485
|
+
() => page.getByPlaceholder(new RegExp(label, 'i')).first().fill(value),
|
|
486
|
+
() => page.getByLabel(new RegExp(label, 'i')).first().fill(value),
|
|
487
|
+
] : []),
|
|
488
|
+
];
|
|
489
|
+
for (const fn of strategies) {
|
|
490
|
+
try {
|
|
491
|
+
await Promise.race([fn(), new Promise((_, r) => setTimeout(() => r(new Error('timeout')), TIMEOUT))]);
|
|
492
|
+
return;
|
|
493
|
+
}
|
|
494
|
+
catch { /* next */ }
|
|
495
|
+
}
|
|
496
|
+
throw new Error(`Could not fill element: ${selector}`);
|
|
497
|
+
}
|
|
374
498
|
/** Use OpenAI vision to identify the element and generate a selector, then click it */
|
|
375
499
|
async function aiClickFallback(page, description) {
|
|
376
500
|
const openaiKey = process.env.OPENAI_API_KEY;
|