@cutleryapp/agent 1.0.24 â 1.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-executor.js +158 -31
- package/package.json +1 -1
package/dist/mcp-executor.js
CHANGED
|
@@ -58,9 +58,18 @@ class TestExecutor {
|
|
|
58
58
|
total: steps.length,
|
|
59
59
|
message: raw,
|
|
60
60
|
});
|
|
61
|
+
const stepAttachment = (testCase.step_attachments || {})[String(i)] || null;
|
|
62
|
+
console.log(` đ Step ${i} attachment: ${stepAttachment ? `YES (${stepAttachment.length} chars)` : 'none'}`);
|
|
61
63
|
let stepError;
|
|
62
64
|
try {
|
|
63
|
-
|
|
65
|
+
// When a reference image is attached, skip MCP strategies entirely and go
|
|
66
|
+
// straight to the AI multi-field loop so it can scan the form and fill everything.
|
|
67
|
+
if (stepAttachment) {
|
|
68
|
+
console.log(` đ Reference image attached â using AI form-fill loop`);
|
|
69
|
+
await aiStepFallback(page, raw, stepAttachment);
|
|
70
|
+
// fall through to screenshot/result logging below
|
|
71
|
+
}
|
|
72
|
+
let handled = !!stepAttachment; // skip MCP strategies when attachment present
|
|
64
73
|
// 1. Navigate â direct URL goto, no selector needed
|
|
65
74
|
if (lower.includes("navigate to") || lower.includes("go to")) {
|
|
66
75
|
const urlMatch = raw.match(/(?:navigate\s+to|go\s+to)\s+(https?:\/\/\S+|\/\S*|\S+\.\S+)/i);
|
|
@@ -92,6 +101,39 @@ class TestExecutor {
|
|
|
92
101
|
handled = true;
|
|
93
102
|
}
|
|
94
103
|
}
|
|
104
|
+
// 3a. Multi-field fill: "Fill firstname, lastname" â fill each with inferred value
|
|
105
|
+
if (!handled && /^(?:fill|type)\s+/i.test(raw) && !/\s+(?:in|into|with)\s+/i.test(raw)) {
|
|
106
|
+
const fieldsPart = raw.replace(/^(?:fill|type)\s+/i, "").trim();
|
|
107
|
+
const fields = fieldsPart.split(/,\s*/).map((f) => f.trim().replace(/^["']|["']$/g, "")).filter(Boolean);
|
|
108
|
+
if (fields.length > 1) {
|
|
109
|
+
const valueMap = {
|
|
110
|
+
firstname: "John", first: "John", fname: "John",
|
|
111
|
+
lastname: "Smith", last: "Smith", lname: "Smith", surname: "Smith",
|
|
112
|
+
name: "John Smith", fullname: "John Smith",
|
|
113
|
+
email: "john.smith@example.com", emailaddress: "john.smith@example.com",
|
|
114
|
+
phone: "9876543210", mobile: "9876543210", phonenumber: "9876543210", mobilenumber: "9876543210",
|
|
115
|
+
address: "123 Test Street", currentaddress: "123 Test Street", streetaddress: "123 Test Street",
|
|
116
|
+
city: "New York", state: "New York",
|
|
117
|
+
zip: "10001", zipcode: "10001", postalcode: "10001",
|
|
118
|
+
dob: "01/01/1990", dateofbirth: "01/01/1990", birthdate: "01/01/1990",
|
|
119
|
+
age: "30", username: "john.smith",
|
|
120
|
+
password: "Test@1234", company: "Acme Corp",
|
|
121
|
+
subject: "Mathematics", subjects: "Mathematics",
|
|
122
|
+
message: "This is a test message.", comment: "Test comment.",
|
|
123
|
+
description: "Test description.",
|
|
124
|
+
};
|
|
125
|
+
for (const field of fields) {
|
|
126
|
+
const key = field.toLowerCase().replace(/[\s_-]+/g, "");
|
|
127
|
+
const value = valueMap[key] || "Test Value";
|
|
128
|
+
console.log(` â¨ī¸ Multi-fill: "${field}" â "${value}"`);
|
|
129
|
+
try {
|
|
130
|
+
await tryFill(page, field, value);
|
|
131
|
+
}
|
|
132
|
+
catch { /* ignore individual failures */ }
|
|
133
|
+
}
|
|
134
|
+
handled = true;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
95
137
|
// 3. Fill â smart selector strategies via MCP/Playwright
|
|
96
138
|
if (!handled && (lower.includes("fill") || lower.includes("type") || lower.includes("enter"))) {
|
|
97
139
|
const match = raw.match(/(?:enter|fill|type)\s+"([^"]+)"\s+(?:in|into)\s+(?:the\s+)?"?([^"]+?)"?\s*(?:field|input|box|area)?\s*$/i) ||
|
|
@@ -156,7 +198,7 @@ class TestExecutor {
|
|
|
156
198
|
}
|
|
157
199
|
else {
|
|
158
200
|
console.log(` đ¤ AI intent loop for: "${raw}"`);
|
|
159
|
-
await aiStepFallback(page, raw);
|
|
201
|
+
await aiStepFallback(page, raw, null);
|
|
160
202
|
}
|
|
161
203
|
}
|
|
162
204
|
}
|
|
@@ -243,23 +285,39 @@ async function tryClick(page, nameRe, label) {
|
|
|
243
285
|
// AI vision fallback
|
|
244
286
|
return await aiClickFallback(page, label);
|
|
245
287
|
}
|
|
246
|
-
function buildAgentPrompt(stepText, round) {
|
|
288
|
+
function buildAgentPrompt(stepText, round, hasAttachment = false) {
|
|
289
|
+
const attachmentSection = hasAttachment ? `
|
|
290
|
+
## REFERENCE IMAGE (SECOND IMAGE ATTACHED)
|
|
291
|
+
A reference screenshot of the form is attached as the SECOND image.
|
|
292
|
+
- FIRST image = live current browser state
|
|
293
|
+
- SECOND image = reference showing all form fields to fill
|
|
294
|
+
- Scan EVERY field in the reference image: text inputs, email, phone, date, radio buttons, checkboxes, dropdowns, textareas, file inputs
|
|
295
|
+
- Fill ALL fields â do not skip any. Use the DOM elements list below to find accurate selectors.
|
|
296
|
+
- For radio buttons â {"action":"click","selector":"input[type='radio'][value='Male']"}
|
|
297
|
+
- For checkboxes â {"action":"check","selector":"#hobbies-checkbox-1","label":"Sports"}
|
|
298
|
+
- For dropdowns â {"action":"select","selector":"#state","value":"NCR"}
|
|
299
|
+
- After all fields, click Submit.
|
|
300
|
+
` : '';
|
|
247
301
|
return `You are an intelligent browser test agent with vision. Your job is to look at the current screen, understand the test step intent, and decide what actions to perform.
|
|
248
302
|
|
|
249
303
|
GOAL: "${stepText}"
|
|
250
304
|
ROUND: ${round + 1}
|
|
251
|
-
|
|
305
|
+
${attachmentSection}
|
|
252
306
|
## YOUR CAPABILITIES
|
|
253
307
|
1. SCREEN UNDERSTANDING â Identify every visible UI element, form field, button, link, and label on screen.
|
|
254
|
-
2. INTENT MAPPING â Understand what the test step MEANS even if the wording is vague or high-level
|
|
308
|
+
2. INTENT MAPPING â Understand what the test step MEANS even if the wording is vague or high-level.
|
|
255
309
|
3. DATA SIMULATION â If a form needs data that is not specified, INVENT realistic test data:
|
|
256
|
-
-
|
|
257
|
-
- Email: "
|
|
258
|
-
- Phone: "
|
|
259
|
-
- Address: "123 Test
|
|
310
|
+
- First name: "John", Last name: "Smith"
|
|
311
|
+
- Email: "john.smith@example.com"
|
|
312
|
+
- Phone/Mobile: "9876543210" (10 digits)
|
|
313
|
+
- Address: "123 Test Street, Springfield"
|
|
314
|
+
- Date of Birth: use format required by field (e.g. "15 Jan 1990")
|
|
260
315
|
- Postal/ZIP: "12345"
|
|
316
|
+
- Subjects: "Mathematics"
|
|
261
317
|
- Credit card: "4111111111111111", expiry "12/25", CVV "123"
|
|
262
318
|
- Password: "Test@1234"
|
|
319
|
+
- State: pick first available option after opening dropdown
|
|
320
|
+
- City: pick first available option after state is selected
|
|
263
321
|
- Any other field: invent plausible data based on the field label
|
|
264
322
|
|
|
265
323
|
## RETURN FORMAT
|
|
@@ -286,13 +344,36 @@ Set "done": true with empty "actions" array when the goal is fully accomplished.
|
|
|
286
344
|
- Use name attribute: input[name="firstName"]
|
|
287
345
|
- NEVER use position or coordinates
|
|
288
346
|
|
|
347
|
+
## RETURN FORMAT
|
|
348
|
+
Return ONLY valid JSON â no markdown, no explanation:
|
|
349
|
+
{
|
|
350
|
+
"reasoning": "What I see and what I plan to do",
|
|
351
|
+
"done": false,
|
|
352
|
+
"actions": [
|
|
353
|
+
{"action": "click", "selector": "SELECTOR"},
|
|
354
|
+
{"action": "fill", "selector": "SELECTOR", "value": "VALUE"},
|
|
355
|
+
{"action": "select", "selector": "SELECTOR", "value": "OPTION_LABEL"},
|
|
356
|
+
{"action": "check", "selector": "SELECTOR"},
|
|
357
|
+
{"action": "verify", "text": "TEXT_TO_CHECK", "not": false},
|
|
358
|
+
{"action": "wait", "ms": 500},
|
|
359
|
+
{"action": "scroll", "selector": "SELECTOR"}
|
|
360
|
+
]
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
Set "done": true with empty "actions" when goal is fully accomplished.
|
|
364
|
+
|
|
365
|
+
## SELECTOR RULES
|
|
366
|
+
- Prefer id: #firstName, #lastName, #userEmail
|
|
367
|
+
- Use name: input[name="firstName"]
|
|
368
|
+
- Use placeholder: input[placeholder="First Name"]
|
|
369
|
+
- Use text: button:has-text("Submit")
|
|
370
|
+
- NEVER use position or coordinates
|
|
371
|
+
|
|
289
372
|
## SMART RULES
|
|
290
|
-
- Look at ALL visible form fields and fill them ALL in one round
|
|
373
|
+
- Look at ALL visible form fields and fill them ALL in one round (up to 20 actions)
|
|
291
374
|
- If you see a multi-step form, complete this step fully then click continue/next/submit
|
|
292
|
-
- If
|
|
293
|
-
- If the goal is
|
|
294
|
-
- If the goal is already accomplished (correct page shown), set done: true immediately
|
|
295
|
-
- Maximum 5 actions per round`;
|
|
375
|
+
- If a reference image is attached, scan it to identify every field and fill them all
|
|
376
|
+
- If the goal is already accomplished (correct page shown), set done: true immediately`;
|
|
296
377
|
}
|
|
297
378
|
/**
|
|
298
379
|
* Universal AI agentic fallback.
|
|
@@ -430,34 +511,35 @@ async function extractDomElements(page) {
|
|
|
430
511
|
return '(could not extract elements)';
|
|
431
512
|
}
|
|
432
513
|
}
|
|
433
|
-
async function aiStepFallback(page, stepText) {
|
|
514
|
+
async function aiStepFallback(page, stepText, stepAttachment = null) {
|
|
434
515
|
const openaiKey = process.env.OPENAI_API_KEY;
|
|
435
516
|
if (!openaiKey)
|
|
436
517
|
throw new Error(`No OPENAI_API_KEY â cannot use AI fallback for: "${stepText}"`);
|
|
518
|
+
console.log(` đ¤ aiStepFallback called. hasAttachment=${!!stepAttachment}`);
|
|
437
519
|
const { default: OpenAI } = await import('openai');
|
|
438
520
|
const openai = new OpenAI({ apiKey: openaiKey });
|
|
439
|
-
const MAX_ROUNDS = 6;
|
|
521
|
+
const MAX_ROUNDS = stepAttachment ? 10 : 6;
|
|
440
522
|
let consecutiveFailures = 0;
|
|
441
523
|
for (let round = 0; round < MAX_ROUNDS; round++) {
|
|
442
|
-
// Extract real DOM elements so AI uses accurate selectors
|
|
443
524
|
const domElements = await extractDomElements(page);
|
|
444
525
|
const screenshotBuffer = await page.screenshot({ type: 'png' });
|
|
445
526
|
const base64 = screenshotBuffer.toString('base64');
|
|
446
527
|
const response = await openai.chat.completions.create({
|
|
447
528
|
model: 'gpt-4o',
|
|
448
|
-
max_tokens: 800,
|
|
529
|
+
max_tokens: stepAttachment ? 3000 : 800,
|
|
449
530
|
messages: [{
|
|
450
531
|
role: 'user',
|
|
451
532
|
content: [
|
|
452
533
|
{
|
|
453
534
|
type: 'text',
|
|
454
|
-
text: buildAgentPrompt(stepText, round) + `
|
|
535
|
+
text: buildAgentPrompt(stepText, round, !!stepAttachment) + `
|
|
455
536
|
|
|
456
537
|
## REAL DOM ELEMENTS ON THIS PAGE
|
|
457
538
|
Use these actual elements â prefer id, data-test, name, aria-label over guessing:
|
|
458
|
-
${domElements}`
|
|
539
|
+
${domElements}` + (stepAttachment ? `\n\nThe REFERENCE IMAGE (second image) shows all form fields. Fill every field you see in it using the DOM elements above.` : '')
|
|
459
540
|
},
|
|
460
|
-
{ type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
|
|
541
|
+
{ type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } },
|
|
542
|
+
...(stepAttachment ? [{ type: 'image_url', image_url: { url: `data:image/jpeg;base64,${stepAttachment}` } }] : [])
|
|
461
543
|
]
|
|
462
544
|
}]
|
|
463
545
|
});
|
|
@@ -465,8 +547,15 @@ ${domElements}`
|
|
|
465
547
|
.trim().replace(/```json\n?/gi, '').replace(/```/g, '').trim();
|
|
466
548
|
if (!raw)
|
|
467
549
|
throw new Error(`AI returned empty response for: "${stepText}"`);
|
|
468
|
-
|
|
469
|
-
|
|
550
|
+
let plan;
|
|
551
|
+
try {
|
|
552
|
+
plan = JSON.parse(raw);
|
|
553
|
+
}
|
|
554
|
+
catch (parseErr) {
|
|
555
|
+
console.log(` â ī¸ JSON parse failed. Raw (first 300): ${raw.slice(0, 300)}`);
|
|
556
|
+
throw new Error(`AI returned invalid JSON: ${parseErr.message}`);
|
|
557
|
+
}
|
|
558
|
+
console.log(` đ¤ AI round ${round + 1} â ${plan.reasoning} â actions: ${plan.actions?.length || 0}`);
|
|
470
559
|
if (plan.done || !plan.actions?.length) {
|
|
471
560
|
console.log(` â
AI agent completed: "${stepText}"`);
|
|
472
561
|
return;
|
|
@@ -476,17 +565,58 @@ ${domElements}`
|
|
|
476
565
|
console.log(` đ¤ Executing: ${JSON.stringify(act)}`);
|
|
477
566
|
try {
|
|
478
567
|
if (act.action === 'click') {
|
|
479
|
-
// Try multiple selector variants derived from the DOM
|
|
480
568
|
const tried = await tryAIClick(page, act.selector);
|
|
481
569
|
if (!tried)
|
|
482
570
|
throw new Error(`Could not find element: ${act.selector}`);
|
|
483
|
-
await page.waitForTimeout(
|
|
571
|
+
await page.waitForTimeout(400);
|
|
484
572
|
}
|
|
485
573
|
else if (act.action === 'fill') {
|
|
486
574
|
await tryAIFill(page, act.selector, act.value || '');
|
|
487
575
|
}
|
|
488
576
|
else if (act.action === 'select') {
|
|
489
|
-
|
|
577
|
+
let done = false;
|
|
578
|
+
for (const fn of [
|
|
579
|
+
() => page.locator(act.selector).first().selectOption({ label: act.value }, { timeout: 4000 }),
|
|
580
|
+
() => page.locator(act.selector).first().selectOption({ value: act.value }, { timeout: 4000 }),
|
|
581
|
+
async () => { await page.locator(act.selector).first().click({ timeout: 3000 }); await page.getByText(act.value, { exact: false }).first().click({ timeout: 3000 }); },
|
|
582
|
+
]) {
|
|
583
|
+
try {
|
|
584
|
+
await fn();
|
|
585
|
+
done = true;
|
|
586
|
+
break;
|
|
587
|
+
}
|
|
588
|
+
catch { /* next */ }
|
|
589
|
+
}
|
|
590
|
+
if (!done)
|
|
591
|
+
throw new Error(`Could not select "${act.value}" in ${act.selector}`);
|
|
592
|
+
}
|
|
593
|
+
else if (act.action === 'check') {
|
|
594
|
+
const el = page.locator(act.selector).first();
|
|
595
|
+
const checked = await el.isChecked().catch(() => false);
|
|
596
|
+
if (!checked) {
|
|
597
|
+
let done = false;
|
|
598
|
+
for (const fn of [
|
|
599
|
+
() => el.click({ timeout: 4000 }),
|
|
600
|
+
() => page.locator(`label:has-text("${act.label || ''}")`).click({ timeout: 4000 }),
|
|
601
|
+
]) {
|
|
602
|
+
try {
|
|
603
|
+
await fn();
|
|
604
|
+
done = true;
|
|
605
|
+
break;
|
|
606
|
+
}
|
|
607
|
+
catch { /* next */ }
|
|
608
|
+
}
|
|
609
|
+
if (!done)
|
|
610
|
+
throw new Error(`Could not check ${act.selector}`);
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
else if (act.action === 'upload') {
|
|
614
|
+
try {
|
|
615
|
+
await page.setInputFiles(act.selector, act.value || '', { timeout: 4000 });
|
|
616
|
+
}
|
|
617
|
+
catch {
|
|
618
|
+
console.log(` âšī¸ File upload skipped (${act.selector})`);
|
|
619
|
+
}
|
|
490
620
|
}
|
|
491
621
|
else if (act.action === 'verify') {
|
|
492
622
|
const content = await page.textContent('body') || '';
|
|
@@ -515,11 +645,8 @@ ${domElements}`
|
|
|
515
645
|
consecutiveFailures++;
|
|
516
646
|
}
|
|
517
647
|
}
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
if (consecutiveFailures >= plan.actions.length * 2) {
|
|
521
|
-
throw new Error(`AI agent stuck â no actions succeeded after multiple rounds for: "${stepText}"`);
|
|
522
|
-
}
|
|
648
|
+
if (!anySucceeded && consecutiveFailures >= plan.actions.length * 2) {
|
|
649
|
+
throw new Error(`AI agent stuck â no actions succeeded for: "${stepText}"`);
|
|
523
650
|
}
|
|
524
651
|
await page.waitForLoadState('domcontentloaded').catch(() => { });
|
|
525
652
|
await page.waitForTimeout(300);
|