@cutleryapp/agent 1.0.23 → 1.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/mcp-executor.js +125 -31
  2. package/package.json +1 -1
@@ -58,9 +58,18 @@ class TestExecutor {
58
58
  total: steps.length,
59
59
  message: raw,
60
60
  });
61
+ const stepAttachment = (testCase.step_attachments || {})[String(i)] || null;
62
+ console.log(` 📎 Step ${i} attachment: ${stepAttachment ? `YES (${stepAttachment.length} chars)` : 'none'}`);
61
63
  let stepError;
62
64
  try {
63
- let handled = false;
65
+ // When a reference image is attached, skip MCP strategies entirely and go
66
+ // straight to the AI multi-field loop so it can scan the form and fill everything.
67
+ if (stepAttachment) {
68
+ console.log(` 📎 Reference image attached — using AI form-fill loop`);
69
+ await aiStepFallback(page, raw, stepAttachment);
70
+ // fall through to screenshot/result logging below
71
+ }
72
+ let handled = !!stepAttachment; // skip MCP strategies when attachment present
64
73
  // 1. Navigate — direct URL goto, no selector needed
65
74
  if (lower.includes("navigate to") || lower.includes("go to")) {
66
75
  const urlMatch = raw.match(/(?:navigate\s+to|go\s+to)\s+(https?:\/\/\S+|\/\S*|\S+\.\S+)/i);
@@ -156,7 +165,7 @@ class TestExecutor {
156
165
  }
157
166
  else {
158
167
  console.log(` 🤖 AI intent loop for: "${raw}"`);
159
- await aiStepFallback(page, raw);
168
+ await aiStepFallback(page, raw, null);
160
169
  }
161
170
  }
162
171
  }
@@ -243,23 +252,39 @@ async function tryClick(page, nameRe, label) {
243
252
  // AI vision fallback
244
253
  return await aiClickFallback(page, label);
245
254
  }
246
- function buildAgentPrompt(stepText, round) {
255
+ function buildAgentPrompt(stepText, round, hasAttachment = false) {
256
+ const attachmentSection = hasAttachment ? `
257
+ ## REFERENCE IMAGE (SECOND IMAGE ATTACHED)
258
+ A reference screenshot of the form is attached as the SECOND image.
259
+ - FIRST image = live current browser state
260
+ - SECOND image = reference showing all form fields to fill
261
+ - Scan EVERY field in the reference image: text inputs, email, phone, date, radio buttons, checkboxes, dropdowns, textareas, file inputs
262
+ - Fill ALL fields — do not skip any. Use the DOM elements list below to find accurate selectors.
263
+ - For radio buttons → {"action":"click","selector":"input[type='radio'][value='Male']"}
264
+ - For checkboxes → {"action":"check","selector":"#hobbies-checkbox-1","label":"Sports"}
265
+ - For dropdowns → {"action":"select","selector":"#state","value":"NCR"}
266
+ - After all fields, click Submit.
267
+ ` : '';
247
268
  return `You are an intelligent browser test agent with vision. Your job is to look at the current screen, understand the test step intent, and decide what actions to perform.
248
269
 
249
270
  GOAL: "${stepText}"
250
271
  ROUND: ${round + 1}
251
-
272
+ ${attachmentSection}
252
273
  ## YOUR CAPABILITIES
253
274
  1. SCREEN UNDERSTANDING — Identify every visible UI element, form field, button, link, and label on screen.
254
- 2. INTENT MAPPING — Understand what the test step MEANS even if the wording is vague or high-level (e.g. "checkout the product" = navigate to cart → fill shipping info → complete purchase).
275
+ 2. INTENT MAPPING — Understand what the test step MEANS even if the wording is vague or high-level.
255
276
  3. DATA SIMULATION — If a form needs data that is not specified, INVENT realistic test data:
256
- - Names: "John Smith" or "Test User"
257
- - Email: "testuser@example.com"
258
- - Phone: "555-0100"
259
- - Address: "123 Test St, Springfield"
277
+ - First name: "John", Last name: "Smith"
278
+ - Email: "john.smith@example.com"
279
+ - Phone/Mobile: "9876543210" (10 digits)
280
+ - Address: "123 Test Street, Springfield"
281
+ - Date of Birth: use format required by field (e.g. "15 Jan 1990")
260
282
  - Postal/ZIP: "12345"
283
+ - Subjects: "Mathematics"
261
284
  - Credit card: "4111111111111111", expiry "12/25", CVV "123"
262
285
  - Password: "Test@1234"
286
+ - State: pick first available option after opening dropdown
287
+ - City: pick first available option after state is selected
263
288
  - Any other field: invent plausible data based on the field label
264
289
 
265
290
  ## RETURN FORMAT
@@ -286,13 +311,36 @@ Set "done": true with empty "actions" array when the goal is fully accomplished.
286
311
  - Use name attribute: input[name="firstName"]
287
312
  - NEVER use position or coordinates
288
313
 
314
+ ## RETURN FORMAT
315
+ Return ONLY valid JSON — no markdown, no explanation:
316
+ {
317
+ "reasoning": "What I see and what I plan to do",
318
+ "done": false,
319
+ "actions": [
320
+ {"action": "click", "selector": "SELECTOR"},
321
+ {"action": "fill", "selector": "SELECTOR", "value": "VALUE"},
322
+ {"action": "select", "selector": "SELECTOR", "value": "OPTION_LABEL"},
323
+ {"action": "check", "selector": "SELECTOR"},
324
+ {"action": "verify", "text": "TEXT_TO_CHECK", "not": false},
325
+ {"action": "wait", "ms": 500},
326
+ {"action": "scroll", "selector": "SELECTOR"}
327
+ ]
328
+ }
329
+
330
+ Set "done": true with empty "actions" when goal is fully accomplished.
331
+
332
+ ## SELECTOR RULES
333
+ - Prefer id: #firstName, #lastName, #userEmail
334
+ - Use name: input[name="firstName"]
335
+ - Use placeholder: input[placeholder="First Name"]
336
+ - Use text: button:has-text("Submit")
337
+ - NEVER use position or coordinates
338
+
289
339
  ## SMART RULES
290
- - Look at ALL visible form fields and fill them ALL in one round
340
+ - Look at ALL visible form fields and fill them ALL in one round (up to 20 actions)
291
341
  - If you see a multi-step form, complete this step fully then click continue/next/submit
292
- - If the goal is "checkout": cart → fill info → continue → finish
293
- - If the goal is "register" or "sign up": fill all fields + submit
294
- - If the goal is already accomplished (correct page shown), set done: true immediately
295
- - Maximum 5 actions per round`;
342
+ - If a reference image is attached, scan it to identify every field and fill them all
343
+ - If the goal is already accomplished (correct page shown), set done: true immediately`;
296
344
  }
297
345
  /**
298
346
  * Universal AI agentic fallback.
@@ -430,34 +478,35 @@ async function extractDomElements(page) {
430
478
  return '(could not extract elements)';
431
479
  }
432
480
  }
433
- async function aiStepFallback(page, stepText) {
481
+ async function aiStepFallback(page, stepText, stepAttachment = null) {
434
482
  const openaiKey = process.env.OPENAI_API_KEY;
435
483
  if (!openaiKey)
436
484
  throw new Error(`No OPENAI_API_KEY — cannot use AI fallback for: "${stepText}"`);
485
+ console.log(` 🤖 aiStepFallback called. hasAttachment=${!!stepAttachment}`);
437
486
  const { default: OpenAI } = await import('openai');
438
487
  const openai = new OpenAI({ apiKey: openaiKey });
439
- const MAX_ROUNDS = 6;
488
+ const MAX_ROUNDS = stepAttachment ? 10 : 6;
440
489
  let consecutiveFailures = 0;
441
490
  for (let round = 0; round < MAX_ROUNDS; round++) {
442
- // Extract real DOM elements so AI uses accurate selectors
443
491
  const domElements = await extractDomElements(page);
444
492
  const screenshotBuffer = await page.screenshot({ type: 'png' });
445
493
  const base64 = screenshotBuffer.toString('base64');
446
494
  const response = await openai.chat.completions.create({
447
495
  model: 'gpt-4o',
448
- max_tokens: 800,
496
+ max_tokens: stepAttachment ? 3000 : 800,
449
497
  messages: [{
450
498
  role: 'user',
451
499
  content: [
452
500
  {
453
501
  type: 'text',
454
- text: buildAgentPrompt(stepText, round) + `
502
+ text: buildAgentPrompt(stepText, round, !!stepAttachment) + `
455
503
 
456
504
  ## REAL DOM ELEMENTS ON THIS PAGE
457
505
  Use these actual elements — prefer id, data-test, name, aria-label over guessing:
458
- ${domElements}`
506
+ ${domElements}` + (stepAttachment ? `\n\nThe REFERENCE IMAGE (second image) shows all form fields. Fill every field you see in it using the DOM elements above.` : '')
459
507
  },
460
- { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
508
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } },
509
+ ...(stepAttachment ? [{ type: 'image_url', image_url: { url: `data:image/jpeg;base64,${stepAttachment}` } }] : [])
461
510
  ]
462
511
  }]
463
512
  });
@@ -465,8 +514,15 @@ ${domElements}`
465
514
  .trim().replace(/```json\n?/gi, '').replace(/```/g, '').trim();
466
515
  if (!raw)
467
516
  throw new Error(`AI returned empty response for: "${stepText}"`);
468
- const plan = JSON.parse(raw);
469
- console.log(` 🤖 AI round ${round + 1} — ${plan.reasoning}`);
517
+ let plan;
518
+ try {
519
+ plan = JSON.parse(raw);
520
+ }
521
+ catch (parseErr) {
522
+ console.log(` âš ī¸ JSON parse failed. Raw (first 300): ${raw.slice(0, 300)}`);
523
+ throw new Error(`AI returned invalid JSON: ${parseErr.message}`);
524
+ }
525
+ console.log(` 🤖 AI round ${round + 1} — ${plan.reasoning} — actions: ${plan.actions?.length || 0}`);
470
526
  if (plan.done || !plan.actions?.length) {
471
527
  console.log(` ✅ AI agent completed: "${stepText}"`);
472
528
  return;
@@ -476,17 +532,58 @@ ${domElements}`
476
532
  console.log(` 🤖 Executing: ${JSON.stringify(act)}`);
477
533
  try {
478
534
  if (act.action === 'click') {
479
- // Try multiple selector variants derived from the DOM
480
535
  const tried = await tryAIClick(page, act.selector);
481
536
  if (!tried)
482
537
  throw new Error(`Could not find element: ${act.selector}`);
483
- await page.waitForTimeout(500);
538
+ await page.waitForTimeout(400);
484
539
  }
485
540
  else if (act.action === 'fill') {
486
541
  await tryAIFill(page, act.selector, act.value || '');
487
542
  }
488
543
  else if (act.action === 'select') {
489
- await page.locator(act.selector).first().selectOption({ label: act.value });
544
+ let done = false;
545
+ for (const fn of [
546
+ () => page.locator(act.selector).first().selectOption({ label: act.value }, { timeout: 4000 }),
547
+ () => page.locator(act.selector).first().selectOption({ value: act.value }, { timeout: 4000 }),
548
+ async () => { await page.locator(act.selector).first().click({ timeout: 3000 }); await page.getByText(act.value, { exact: false }).first().click({ timeout: 3000 }); },
549
+ ]) {
550
+ try {
551
+ await fn();
552
+ done = true;
553
+ break;
554
+ }
555
+ catch { /* next */ }
556
+ }
557
+ if (!done)
558
+ throw new Error(`Could not select "${act.value}" in ${act.selector}`);
559
+ }
560
+ else if (act.action === 'check') {
561
+ const el = page.locator(act.selector).first();
562
+ const checked = await el.isChecked().catch(() => false);
563
+ if (!checked) {
564
+ let done = false;
565
+ for (const fn of [
566
+ () => el.click({ timeout: 4000 }),
567
+ () => page.locator(`label:has-text("${act.label || ''}")`).click({ timeout: 4000 }),
568
+ ]) {
569
+ try {
570
+ await fn();
571
+ done = true;
572
+ break;
573
+ }
574
+ catch { /* next */ }
575
+ }
576
+ if (!done)
577
+ throw new Error(`Could not check ${act.selector}`);
578
+ }
579
+ }
580
+ else if (act.action === 'upload') {
581
+ try {
582
+ await page.setInputFiles(act.selector, act.value || '', { timeout: 4000 });
583
+ }
584
+ catch {
585
+ console.log(` â„šī¸ File upload skipped (${act.selector})`);
586
+ }
490
587
  }
491
588
  else if (act.action === 'verify') {
492
589
  const content = await page.textContent('body') || '';
@@ -515,11 +612,8 @@ ${domElements}`
515
612
  consecutiveFailures++;
516
613
  }
517
614
  }
518
- // If 2 consecutive rounds had zero successes, bail early
519
- if (!anySucceeded) {
520
- if (consecutiveFailures >= plan.actions.length * 2) {
521
- throw new Error(`AI agent stuck — no actions succeeded after multiple rounds for: "${stepText}"`);
522
- }
615
+ if (!anySucceeded && consecutiveFailures >= plan.actions.length * 2) {
616
+ throw new Error(`AI agent stuck — no actions succeeded for: "${stepText}"`);
523
617
  }
524
618
  await page.waitForLoadState('domcontentloaded').catch(() => { });
525
619
  await page.waitForTimeout(300);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cutleryapp/agent",
3
- "version": "1.0.23",
3
+ "version": "1.0.25",
4
4
  "description": "Local agent that connects your machine to the Cutlery QA platform and runs UI tests via Playwright",
5
5
  "main": "dist/cli.js",
6
6
  "bin": {