@cutleryapp/agent 1.0.24 → 1.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/mcp-executor.js +158 -31
  2. package/package.json +1 -1
@@ -58,9 +58,18 @@ class TestExecutor {
58
58
  total: steps.length,
59
59
  message: raw,
60
60
  });
61
+ const stepAttachment = (testCase.step_attachments || {})[String(i)] || null;
62
+ console.log(` 📎 Step ${i} attachment: ${stepAttachment ? `YES (${stepAttachment.length} chars)` : 'none'}`);
61
63
  let stepError;
62
64
  try {
63
- let handled = false;
65
+ // When a reference image is attached, skip MCP strategies entirely and go
66
+ // straight to the AI multi-field loop so it can scan the form and fill everything.
67
+ if (stepAttachment) {
68
+ console.log(` 📎 Reference image attached — using AI form-fill loop`);
69
+ await aiStepFallback(page, raw, stepAttachment);
70
+ // fall through to screenshot/result logging below
71
+ }
72
+ let handled = !!stepAttachment; // skip MCP strategies when attachment present
64
73
  // 1. Navigate — direct URL goto, no selector needed
65
74
  if (lower.includes("navigate to") || lower.includes("go to")) {
66
75
  const urlMatch = raw.match(/(?:navigate\s+to|go\s+to)\s+(https?:\/\/\S+|\/\S*|\S+\.\S+)/i);
@@ -92,6 +101,39 @@ class TestExecutor {
92
101
  handled = true;
93
102
  }
94
103
  }
104
+ // 3a. Multi-field fill: "Fill firstname, lastname" → fill each with inferred value
105
+ if (!handled && /^(?:fill|type)\s+/i.test(raw) && !/\s+(?:in|into|with)\s+/i.test(raw)) {
106
+ const fieldsPart = raw.replace(/^(?:fill|type)\s+/i, "").trim();
107
+ const fields = fieldsPart.split(/,\s*/).map((f) => f.trim().replace(/^["']|["']$/g, "")).filter(Boolean);
108
+ if (fields.length > 1) {
109
+ const valueMap = {
110
+ firstname: "John", first: "John", fname: "John",
111
+ lastname: "Smith", last: "Smith", lname: "Smith", surname: "Smith",
112
+ name: "John Smith", fullname: "John Smith",
113
+ email: "john.smith@example.com", emailaddress: "john.smith@example.com",
114
+ phone: "9876543210", mobile: "9876543210", phonenumber: "9876543210", mobilenumber: "9876543210",
115
+ address: "123 Test Street", currentaddress: "123 Test Street", streetaddress: "123 Test Street",
116
+ city: "New York", state: "New York",
117
+ zip: "10001", zipcode: "10001", postalcode: "10001",
118
+ dob: "01/01/1990", dateofbirth: "01/01/1990", birthdate: "01/01/1990",
119
+ age: "30", username: "john.smith",
120
+ password: "Test@1234", company: "Acme Corp",
121
+ subject: "Mathematics", subjects: "Mathematics",
122
+ message: "This is a test message.", comment: "Test comment.",
123
+ description: "Test description.",
124
+ };
125
+ for (const field of fields) {
126
+ const key = field.toLowerCase().replace(/[\s_-]+/g, "");
127
+ const value = valueMap[key] || "Test Value";
128
+ console.log(` âŒ¨ī¸ Multi-fill: "${field}" → "${value}"`);
129
+ try {
130
+ await tryFill(page, field, value);
131
+ }
132
+ catch { /* ignore individual failures */ }
133
+ }
134
+ handled = true;
135
+ }
136
+ }
95
137
  // 3. Fill — smart selector strategies via MCP/Playwright
96
138
  if (!handled && (lower.includes("fill") || lower.includes("type") || lower.includes("enter"))) {
97
139
  const match = raw.match(/(?:enter|fill|type)\s+"([^"]+)"\s+(?:in|into)\s+(?:the\s+)?"?([^"]+?)"?\s*(?:field|input|box|area)?\s*$/i) ||
@@ -156,7 +198,7 @@ class TestExecutor {
156
198
  }
157
199
  else {
158
200
  console.log(` 🤖 AI intent loop for: "${raw}"`);
159
- await aiStepFallback(page, raw);
201
+ await aiStepFallback(page, raw, null);
160
202
  }
161
203
  }
162
204
  }
@@ -243,23 +285,39 @@ async function tryClick(page, nameRe, label) {
243
285
  // AI vision fallback
244
286
  return await aiClickFallback(page, label);
245
287
  }
246
- function buildAgentPrompt(stepText, round) {
288
+ function buildAgentPrompt(stepText, round, hasAttachment = false) {
289
+ const attachmentSection = hasAttachment ? `
290
+ ## REFERENCE IMAGE (SECOND IMAGE ATTACHED)
291
+ A reference screenshot of the form is attached as the SECOND image.
292
+ - FIRST image = live current browser state
293
+ - SECOND image = reference showing all form fields to fill
294
+ - Scan EVERY field in the reference image: text inputs, email, phone, date, radio buttons, checkboxes, dropdowns, textareas, file inputs
295
+ - Fill ALL fields — do not skip any. Use the DOM elements list below to find accurate selectors.
296
+ - For radio buttons → {"action":"click","selector":"input[type='radio'][value='Male']"}
297
+ - For checkboxes → {"action":"check","selector":"#hobbies-checkbox-1","label":"Sports"}
298
+ - For dropdowns → {"action":"select","selector":"#state","value":"NCR"}
299
+ - After all fields, click Submit.
300
+ ` : '';
247
301
  return `You are an intelligent browser test agent with vision. Your job is to look at the current screen, understand the test step intent, and decide what actions to perform.
248
302
 
249
303
  GOAL: "${stepText}"
250
304
  ROUND: ${round + 1}
251
-
305
+ ${attachmentSection}
252
306
  ## YOUR CAPABILITIES
253
307
  1. SCREEN UNDERSTANDING — Identify every visible UI element, form field, button, link, and label on screen.
254
- 2. INTENT MAPPING — Understand what the test step MEANS even if the wording is vague or high-level (e.g. "checkout the product" = navigate to cart → fill shipping info → complete purchase).
308
+ 2. INTENT MAPPING — Understand what the test step MEANS even if the wording is vague or high-level.
255
309
  3. DATA SIMULATION — If a form needs data that is not specified, INVENT realistic test data:
256
- - Names: "John Smith" or "Test User"
257
- - Email: "testuser@example.com"
258
- - Phone: "555-0100"
259
- - Address: "123 Test St, Springfield"
310
+ - First name: "John", Last name: "Smith"
311
+ - Email: "john.smith@example.com"
312
+ - Phone/Mobile: "9876543210" (10 digits)
313
+ - Address: "123 Test Street, Springfield"
314
+ - Date of Birth: use format required by field (e.g. "15 Jan 1990")
260
315
  - Postal/ZIP: "12345"
316
+ - Subjects: "Mathematics"
261
317
  - Credit card: "4111111111111111", expiry "12/25", CVV "123"
262
318
  - Password: "Test@1234"
319
+ - State: pick first available option after opening dropdown
320
+ - City: pick first available option after state is selected
263
321
  - Any other field: invent plausible data based on the field label
264
322
 
265
323
  ## RETURN FORMAT
@@ -286,13 +344,36 @@ Set "done": true with empty "actions" array when the goal is fully accomplished.
286
344
  - Use name attribute: input[name="firstName"]
287
345
  - NEVER use position or coordinates
288
346
 
347
+ ## RETURN FORMAT
348
+ Return ONLY valid JSON — no markdown, no explanation:
349
+ {
350
+ "reasoning": "What I see and what I plan to do",
351
+ "done": false,
352
+ "actions": [
353
+ {"action": "click", "selector": "SELECTOR"},
354
+ {"action": "fill", "selector": "SELECTOR", "value": "VALUE"},
355
+ {"action": "select", "selector": "SELECTOR", "value": "OPTION_LABEL"},
356
+ {"action": "check", "selector": "SELECTOR"},
357
+ {"action": "verify", "text": "TEXT_TO_CHECK", "not": false},
358
+ {"action": "wait", "ms": 500},
359
+ {"action": "scroll", "selector": "SELECTOR"}
360
+ ]
361
+ }
362
+
363
+ Set "done": true with empty "actions" when goal is fully accomplished.
364
+
365
+ ## SELECTOR RULES
366
+ - Prefer id: #firstName, #lastName, #userEmail
367
+ - Use name: input[name="firstName"]
368
+ - Use placeholder: input[placeholder="First Name"]
369
+ - Use text: button:has-text("Submit")
370
+ - NEVER use position or coordinates
371
+
289
372
  ## SMART RULES
290
- - Look at ALL visible form fields and fill them ALL in one round
373
+ - Look at ALL visible form fields and fill them ALL in one round (up to 20 actions)
291
374
  - If you see a multi-step form, complete this step fully then click continue/next/submit
292
- - If the goal is "checkout": cart → fill info → continue → finish
293
- - If the goal is "register" or "sign up": fill all fields + submit
294
- - If the goal is already accomplished (correct page shown), set done: true immediately
295
- - Maximum 5 actions per round`;
375
+ - If a reference image is attached, scan it to identify every field and fill them all
376
+ - If the goal is already accomplished (correct page shown), set done: true immediately`;
296
377
  }
297
378
  /**
298
379
  * Universal AI agentic fallback.
@@ -430,34 +511,35 @@ async function extractDomElements(page) {
430
511
  return '(could not extract elements)';
431
512
  }
432
513
  }
433
- async function aiStepFallback(page, stepText) {
514
+ async function aiStepFallback(page, stepText, stepAttachment = null) {
434
515
  const openaiKey = process.env.OPENAI_API_KEY;
435
516
  if (!openaiKey)
436
517
  throw new Error(`No OPENAI_API_KEY — cannot use AI fallback for: "${stepText}"`);
518
+ console.log(` 🤖 aiStepFallback called. hasAttachment=${!!stepAttachment}`);
437
519
  const { default: OpenAI } = await import('openai');
438
520
  const openai = new OpenAI({ apiKey: openaiKey });
439
- const MAX_ROUNDS = 6;
521
+ const MAX_ROUNDS = stepAttachment ? 10 : 6;
440
522
  let consecutiveFailures = 0;
441
523
  for (let round = 0; round < MAX_ROUNDS; round++) {
442
- // Extract real DOM elements so AI uses accurate selectors
443
524
  const domElements = await extractDomElements(page);
444
525
  const screenshotBuffer = await page.screenshot({ type: 'png' });
445
526
  const base64 = screenshotBuffer.toString('base64');
446
527
  const response = await openai.chat.completions.create({
447
528
  model: 'gpt-4o',
448
- max_tokens: 800,
529
+ max_tokens: stepAttachment ? 3000 : 800,
449
530
  messages: [{
450
531
  role: 'user',
451
532
  content: [
452
533
  {
453
534
  type: 'text',
454
- text: buildAgentPrompt(stepText, round) + `
535
+ text: buildAgentPrompt(stepText, round, !!stepAttachment) + `
455
536
 
456
537
  ## REAL DOM ELEMENTS ON THIS PAGE
457
538
  Use these actual elements — prefer id, data-test, name, aria-label over guessing:
458
- ${domElements}`
539
+ ${domElements}` + (stepAttachment ? `\n\nThe REFERENCE IMAGE (second image) shows all form fields. Fill every field you see in it using the DOM elements above.` : '')
459
540
  },
460
- { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
541
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } },
542
+ ...(stepAttachment ? [{ type: 'image_url', image_url: { url: `data:image/jpeg;base64,${stepAttachment}` } }] : [])
461
543
  ]
462
544
  }]
463
545
  });
@@ -465,8 +547,15 @@ ${domElements}`
465
547
  .trim().replace(/```json\n?/gi, '').replace(/```/g, '').trim();
466
548
  if (!raw)
467
549
  throw new Error(`AI returned empty response for: "${stepText}"`);
468
- const plan = JSON.parse(raw);
469
- console.log(` 🤖 AI round ${round + 1} — ${plan.reasoning}`);
550
+ let plan;
551
+ try {
552
+ plan = JSON.parse(raw);
553
+ }
554
+ catch (parseErr) {
555
+ console.log(` âš ī¸ JSON parse failed. Raw (first 300): ${raw.slice(0, 300)}`);
556
+ throw new Error(`AI returned invalid JSON: ${parseErr.message}`);
557
+ }
558
+ console.log(` 🤖 AI round ${round + 1} — ${plan.reasoning} — actions: ${plan.actions?.length || 0}`);
470
559
  if (plan.done || !plan.actions?.length) {
471
560
  console.log(` ✅ AI agent completed: "${stepText}"`);
472
561
  return;
@@ -476,17 +565,58 @@ ${domElements}`
476
565
  console.log(` 🤖 Executing: ${JSON.stringify(act)}`);
477
566
  try {
478
567
  if (act.action === 'click') {
479
- // Try multiple selector variants derived from the DOM
480
568
  const tried = await tryAIClick(page, act.selector);
481
569
  if (!tried)
482
570
  throw new Error(`Could not find element: ${act.selector}`);
483
- await page.waitForTimeout(500);
571
+ await page.waitForTimeout(400);
484
572
  }
485
573
  else if (act.action === 'fill') {
486
574
  await tryAIFill(page, act.selector, act.value || '');
487
575
  }
488
576
  else if (act.action === 'select') {
489
- await page.locator(act.selector).first().selectOption({ label: act.value });
577
+ let done = false;
578
+ for (const fn of [
579
+ () => page.locator(act.selector).first().selectOption({ label: act.value }, { timeout: 4000 }),
580
+ () => page.locator(act.selector).first().selectOption({ value: act.value }, { timeout: 4000 }),
581
+ async () => { await page.locator(act.selector).first().click({ timeout: 3000 }); await page.getByText(act.value, { exact: false }).first().click({ timeout: 3000 }); },
582
+ ]) {
583
+ try {
584
+ await fn();
585
+ done = true;
586
+ break;
587
+ }
588
+ catch { /* next */ }
589
+ }
590
+ if (!done)
591
+ throw new Error(`Could not select "${act.value}" in ${act.selector}`);
592
+ }
593
+ else if (act.action === 'check') {
594
+ const el = page.locator(act.selector).first();
595
+ const checked = await el.isChecked().catch(() => false);
596
+ if (!checked) {
597
+ let done = false;
598
+ for (const fn of [
599
+ () => el.click({ timeout: 4000 }),
600
+ () => page.locator(`label:has-text("${act.label || ''}")`).click({ timeout: 4000 }),
601
+ ]) {
602
+ try {
603
+ await fn();
604
+ done = true;
605
+ break;
606
+ }
607
+ catch { /* next */ }
608
+ }
609
+ if (!done)
610
+ throw new Error(`Could not check ${act.selector}`);
611
+ }
612
+ }
613
+ else if (act.action === 'upload') {
614
+ try {
615
+ await page.setInputFiles(act.selector, act.value || '', { timeout: 4000 });
616
+ }
617
+ catch {
618
+ console.log(` â„šī¸ File upload skipped (${act.selector})`);
619
+ }
490
620
  }
491
621
  else if (act.action === 'verify') {
492
622
  const content = await page.textContent('body') || '';
@@ -515,11 +645,8 @@ ${domElements}`
515
645
  consecutiveFailures++;
516
646
  }
517
647
  }
518
- // If 2 consecutive rounds had zero successes, bail early
519
- if (!anySucceeded) {
520
- if (consecutiveFailures >= plan.actions.length * 2) {
521
- throw new Error(`AI agent stuck — no actions succeeded after multiple rounds for: "${stepText}"`);
522
- }
648
+ if (!anySucceeded && consecutiveFailures >= plan.actions.length * 2) {
649
+ throw new Error(`AI agent stuck — no actions succeeded for: "${stepText}"`);
523
650
  }
524
651
  await page.waitForLoadState('domcontentloaded').catch(() => { });
525
652
  await page.waitForTimeout(300);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cutleryapp/agent",
3
- "version": "1.0.24",
3
+ "version": "1.0.26",
4
4
  "description": "Local agent that connects your machine to the Cutlery QA platform and runs UI tests via Playwright",
5
5
  "main": "dist/cli.js",
6
6
  "bin": {