npm - @cutleryapp/agent - Versions diffs - 1.0.24 → 1.0.26 - Mend

@cutleryapp/agent 1.0.24 → 1.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/mcp-executor.js +158 -31
package/package.json +1 -1

package/dist/mcp-executor.js CHANGED Viewed

@@ -58,9 +58,18 @@ class TestExecutor {
                     total: steps.length,
                     message: raw,
                 });
+                const stepAttachment = (testCase.step_attachments || {})[String(i)] || null;
+                console.log(`  📎 Step ${i} attachment: ${stepAttachment ? `YES (${stepAttachment.length} chars)` : 'none'}`);
                 let stepError;
                 try {
-                    let handled = false;
+                    // When a reference image is attached, skip MCP strategies entirely and go
+                    // straight to the AI multi-field loop so it can scan the form and fill everything.
+                    if (stepAttachment) {
+                        console.log(`  📎 Reference image attached — using AI form-fill loop`);
+                        await aiStepFallback(page, raw, stepAttachment);
+                        // fall through to screenshot/result logging below
+                    }
+                    let handled = !!stepAttachment; // skip MCP strategies when attachment present
                     // 1. Navigate — direct URL goto, no selector needed
                     if (lower.includes("navigate to") || lower.includes("go to")) {
                         const urlMatch = raw.match(/(?:navigate\s+to|go\s+to)\s+(https?:\/\/\S+|\/\S*|\S+\.\S+)/i);
@@ -92,6 +101,39 @@ class TestExecutor {
                                 handled = true;
                         }
                     }
+                    // 3a. Multi-field fill: "Fill firstname, lastname" → fill each with inferred value
+                    if (!handled && /^(?:fill|type)\s+/i.test(raw) && !/\s+(?:in|into|with)\s+/i.test(raw)) {
+                        const fieldsPart = raw.replace(/^(?:fill|type)\s+/i, "").trim();
+                        const fields = fieldsPart.split(/,\s*/).map((f) => f.trim().replace(/^["']|["']$/g, "")).filter(Boolean);
+                        if (fields.length > 1) {
+                            const valueMap = {
+                                firstname: "John", first: "John", fname: "John",
+                                lastname: "Smith", last: "Smith", lname: "Smith", surname: "Smith",
+                                name: "John Smith", fullname: "John Smith",
+                                email: "john.smith@example.com", emailaddress: "john.smith@example.com",
+                                phone: "9876543210", mobile: "9876543210", phonenumber: "9876543210", mobilenumber: "9876543210",
+                                address: "123 Test Street", currentaddress: "123 Test Street", streetaddress: "123 Test Street",
+                                city: "New York", state: "New York",
+                                zip: "10001", zipcode: "10001", postalcode: "10001",
+                                dob: "01/01/1990", dateofbirth: "01/01/1990", birthdate: "01/01/1990",
+                                age: "30", username: "john.smith",
+                                password: "Test@1234", company: "Acme Corp",
+                                subject: "Mathematics", subjects: "Mathematics",
+                                message: "This is a test message.", comment: "Test comment.",
+                                description: "Test description.",
+                            };
+                            for (const field of fields) {
+                                const key = field.toLowerCase().replace(/[\s_-]+/g, "");
+                                const value = valueMap[key] || "Test Value";
+                                console.log(`  ⌨️  Multi-fill: "${field}" → "${value}"`);
+                                try {
+                                    await tryFill(page, field, value);
+                                }
+                                catch { /* ignore individual failures */ }
+                            }
+                            handled = true;
+                        }
+                    }
                     // 3. Fill — smart selector strategies via MCP/Playwright
                     if (!handled && (lower.includes("fill") || lower.includes("type") || lower.includes("enter"))) {
                         const match = raw.match(/(?:enter|fill|type)\s+"([^"]+)"\s+(?:in|into)\s+(?:the\s+)?"?([^"]+?)"?\s*(?:field|input|box|area)?\s*$/i) ||
@@ -156,7 +198,7 @@ class TestExecutor {
                         }
                         else {
                             console.log(`  🤖 AI intent loop for: "${raw}"`);
-                            await aiStepFallback(page, raw);
+                            await aiStepFallback(page, raw, null);
                         }
                     }
                 }
@@ -243,23 +285,39 @@ async function tryClick(page, nameRe, label) {
     // AI vision fallback
     return await aiClickFallback(page, label);
 }
-function buildAgentPrompt(stepText, round) {
+function buildAgentPrompt(stepText, round, hasAttachment = false) {
+    const attachmentSection = hasAttachment ? `
+## REFERENCE IMAGE (SECOND IMAGE ATTACHED)
+A reference screenshot of the form is attached as the SECOND image.
+- FIRST image = live current browser state
+- SECOND image = reference showing all form fields to fill
+- Scan EVERY field in the reference image: text inputs, email, phone, date, radio buttons, checkboxes, dropdowns, textareas, file inputs
+- Fill ALL fields — do not skip any. Use the DOM elements list below to find accurate selectors.
+- For radio buttons → {"action":"click","selector":"input[type='radio'][value='Male']"}
+- For checkboxes → {"action":"check","selector":"#hobbies-checkbox-1","label":"Sports"}
+- For dropdowns → {"action":"select","selector":"#state","value":"NCR"}
+- After all fields, click Submit.
+` : '';
     return `You are an intelligent browser test agent with vision. Your job is to look at the current screen, understand the test step intent, and decide what actions to perform.
 GOAL: "${stepText}"
 ROUND: ${round + 1}
+${attachmentSection}
 ## YOUR CAPABILITIES
 1. SCREEN UNDERSTANDING — Identify every visible UI element, form field, button, link, and label on screen.
-2. INTENT MAPPING — Understand what the test step MEANS even if the wording is vague or high-level (e.g. "checkout the product" = navigate to cart → fill shipping info → complete purchase).
+2. INTENT MAPPING — Understand what the test step MEANS even if the wording is vague or high-level.
 3. DATA SIMULATION — If a form needs data that is not specified, INVENT realistic test data:
-   - Names: "John Smith" or "Test User"
-   - Email: "testuser@example.com"
-   - Phone: "555-0100"
-   - Address: "123 Test St, Springfield"
+   - First name: "John", Last name: "Smith"
+   - Email: "john.smith@example.com"
+   - Phone/Mobile: "9876543210" (10 digits)
+   - Address: "123 Test Street, Springfield"
+   - Date of Birth: use format required by field (e.g. "15 Jan 1990")
    - Postal/ZIP: "12345"
+   - Subjects: "Mathematics"
    - Credit card: "4111111111111111", expiry "12/25", CVV "123"
    - Password: "Test@1234"
+   - State: pick first available option after opening dropdown
+   - City: pick first available option after state is selected
    - Any other field: invent plausible data based on the field label
 ## RETURN FORMAT
@@ -286,13 +344,36 @@ Set "done": true with empty "actions" array when the goal is fully accomplished.
 - Use name attribute: input[name="firstName"]
 - NEVER use position or coordinates
+## RETURN FORMAT
+Return ONLY valid JSON — no markdown, no explanation:
+{
+  "reasoning": "What I see and what I plan to do",
+  "done": false,
+  "actions": [
+    {"action": "click",  "selector": "SELECTOR"},
+    {"action": "fill",   "selector": "SELECTOR", "value": "VALUE"},
+    {"action": "select", "selector": "SELECTOR", "value": "OPTION_LABEL"},
+    {"action": "check",  "selector": "SELECTOR"},
+    {"action": "verify", "text": "TEXT_TO_CHECK", "not": false},
+    {"action": "wait",   "ms": 500},
+    {"action": "scroll", "selector": "SELECTOR"}
+  ]
+}
+Set "done": true with empty "actions" when goal is fully accomplished.
+## SELECTOR RULES
+- Prefer id: #firstName, #lastName, #userEmail
+- Use name: input[name="firstName"]
+- Use placeholder: input[placeholder="First Name"]
+- Use text: button:has-text("Submit")
+- NEVER use position or coordinates
 ## SMART RULES
-- Look at ALL visible form fields and fill them ALL in one round
+- Look at ALL visible form fields and fill them ALL in one round (up to 20 actions)
 - If you see a multi-step form, complete this step fully then click continue/next/submit
-- If the goal is "checkout": cart → fill info → continue → finish
-- If the goal is "register" or "sign up": fill all fields + submit
-- If the goal is already accomplished (correct page shown), set done: true immediately
-- Maximum 5 actions per round`;
+- If a reference image is attached, scan it to identify every field and fill them all
+- If the goal is already accomplished (correct page shown), set done: true immediately`;
 }
 /**
  * Universal AI agentic fallback.
@@ -430,34 +511,35 @@ async function extractDomElements(page) {
         return '(could not extract elements)';
     }
 }
-async function aiStepFallback(page, stepText) {
+async function aiStepFallback(page, stepText, stepAttachment = null) {
     const openaiKey = process.env.OPENAI_API_KEY;
     if (!openaiKey)
         throw new Error(`No OPENAI_API_KEY — cannot use AI fallback for: "${stepText}"`);
+    console.log(`  🤖 aiStepFallback called. hasAttachment=${!!stepAttachment}`);
     const { default: OpenAI } = await import('openai');
     const openai = new OpenAI({ apiKey: openaiKey });
-    const MAX_ROUNDS = 6;
+    const MAX_ROUNDS = stepAttachment ? 10 : 6;
     let consecutiveFailures = 0;
     for (let round = 0; round < MAX_ROUNDS; round++) {
-        // Extract real DOM elements so AI uses accurate selectors
         const domElements = await extractDomElements(page);
         const screenshotBuffer = await page.screenshot({ type: 'png' });
         const base64 = screenshotBuffer.toString('base64');
         const response = await openai.chat.completions.create({
             model: 'gpt-4o',
-            max_tokens: 800,
+            max_tokens: stepAttachment ? 3000 : 800,
             messages: [{
                     role: 'user',
                     content: [
                         {
                             type: 'text',
-                            text: buildAgentPrompt(stepText, round) + `
+                            text: buildAgentPrompt(stepText, round, !!stepAttachment) + `
 ## REAL DOM ELEMENTS ON THIS PAGE
 Use these actual elements — prefer id, data-test, name, aria-label over guessing:
-${domElements}`
+${domElements}` + (stepAttachment ? `\n\nThe REFERENCE IMAGE (second image) shows all form fields. Fill every field you see in it using the DOM elements above.` : '')
                         },
-                        { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
+                        { type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } },
+                        ...(stepAttachment ? [{ type: 'image_url', image_url: { url: `data:image/jpeg;base64,${stepAttachment}` } }] : [])
                     ]
                 }]
         });
@@ -465,8 +547,15 @@ ${domElements}`
             .trim().replace(/```json\n?/gi, '').replace(/```/g, '').trim();
         if (!raw)
             throw new Error(`AI returned empty response for: "${stepText}"`);
-        const plan = JSON.parse(raw);
-        console.log(`  🤖 AI round ${round + 1} — ${plan.reasoning}`);
+        let plan;
+        try {
+            plan = JSON.parse(raw);
+        }
+        catch (parseErr) {
+            console.log(`  ⚠️ JSON parse failed. Raw (first 300): ${raw.slice(0, 300)}`);
+            throw new Error(`AI returned invalid JSON: ${parseErr.message}`);
+        }
+        console.log(`  🤖 AI round ${round + 1} — ${plan.reasoning} — actions: ${plan.actions?.length || 0}`);
         if (plan.done || !plan.actions?.length) {
             console.log(`  ✅ AI agent completed: "${stepText}"`);
             return;
@@ -476,17 +565,58 @@ ${domElements}`
             console.log(`  🤖 Executing: ${JSON.stringify(act)}`);
             try {
                 if (act.action === 'click') {
-                    // Try multiple selector variants derived from the DOM
                     const tried = await tryAIClick(page, act.selector);
                     if (!tried)
                         throw new Error(`Could not find element: ${act.selector}`);
-                    await page.waitForTimeout(500);
+                    await page.waitForTimeout(400);
                 }
                 else if (act.action === 'fill') {
                     await tryAIFill(page, act.selector, act.value || '');
                 }
                 else if (act.action === 'select') {
-                    await page.locator(act.selector).first().selectOption({ label: act.value });
+                    let done = false;
+                    for (const fn of [
+                        () => page.locator(act.selector).first().selectOption({ label: act.value }, { timeout: 4000 }),
+                        () => page.locator(act.selector).first().selectOption({ value: act.value }, { timeout: 4000 }),
+                        async () => { await page.locator(act.selector).first().click({ timeout: 3000 }); await page.getByText(act.value, { exact: false }).first().click({ timeout: 3000 }); },
+                    ]) {
+                        try {
+                            await fn();
+                            done = true;
+                            break;
+                        }
+                        catch { /* next */ }
+                    }
+                    if (!done)
+                        throw new Error(`Could not select "${act.value}" in ${act.selector}`);
+                }
+                else if (act.action === 'check') {
+                    const el = page.locator(act.selector).first();
+                    const checked = await el.isChecked().catch(() => false);
+                    if (!checked) {
+                        let done = false;
+                        for (const fn of [
+                            () => el.click({ timeout: 4000 }),
+                            () => page.locator(`label:has-text("${act.label || ''}")`).click({ timeout: 4000 }),
+                        ]) {
+                            try {
+                                await fn();
+                                done = true;
+                                break;
+                            }
+                            catch { /* next */ }
+                        }
+                        if (!done)
+                            throw new Error(`Could not check ${act.selector}`);
+                    }
+                }
+                else if (act.action === 'upload') {
+                    try {
+                        await page.setInputFiles(act.selector, act.value || '', { timeout: 4000 });
+                    }
+                    catch {
+                        console.log(`  ℹ️ File upload skipped (${act.selector})`);
+                    }
                 }
                 else if (act.action === 'verify') {
                     const content = await page.textContent('body') || '';
@@ -515,11 +645,8 @@ ${domElements}`
                 consecutiveFailures++;
             }
         }
-        // If 2 consecutive rounds had zero successes, bail early
-        if (!anySucceeded) {
-            if (consecutiveFailures >= plan.actions.length * 2) {
-                throw new Error(`AI agent stuck — no actions succeeded after multiple rounds for: "${stepText}"`);
-            }
+        if (!anySucceeded && consecutiveFailures >= plan.actions.length * 2) {
+            throw new Error(`AI agent stuck — no actions succeeded for: "${stepText}"`);
         }
         await page.waitForLoadState('domcontentloaded').catch(() => { });
         await page.waitForTimeout(300);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@cutleryapp/agent",
-    "version": "1.0.24",
+    "version": "1.0.26",
     "description": "Local agent that connects your machine to the Cutlery QA platform and runs UI tests via Playwright",
     "main": "dist/cli.js",
     "bin": {