@cutleryapp/agent 1.0.15 → 1.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-executor.js +79 -2
- package/package.json +1 -1
package/dist/mcp-executor.js
CHANGED
|
@@ -76,7 +76,7 @@ class TestExecutor {
|
|
|
76
76
|
}
|
|
77
77
|
}
|
|
78
78
|
else if (lower.includes("click")) {
|
|
79
|
-
const labelMatch = raw.match(/click\s+(?:on\s+)?(?:the\s+)?"?([^"]+?)"
|
|
79
|
+
const labelMatch = raw.match(/click\s+(?:on\s+)?(?:the\s+)?"?([^"]+?)"?(?:\s+(?:button|link|tab))?$/i);
|
|
80
80
|
let label = labelMatch?.[1]?.trim();
|
|
81
81
|
if (label) {
|
|
82
82
|
// Split "Add to cart under Sauce Labs Bike Light product" into target + scope
|
|
@@ -207,6 +207,8 @@ async function tryClick(page, nameRe, label) {
|
|
|
207
207
|
() => page.getByRole('link', { name: nameRe }).first().click({ timeout: FAST }),
|
|
208
208
|
() => page.getByText(nameRe).first().click({ timeout: FAST }),
|
|
209
209
|
() => page.locator(`[value="${label}"], [aria-label="${label}"], [title="${label}"]`).first().click({ timeout: FAST }),
|
|
210
|
+
// data-* attributes (common in test automation)
|
|
211
|
+
() => page.locator(`[data-test*="${label}" i], [data-testid*="${label}" i], [id*="${label}" i]`).first().click({ timeout: FAST }),
|
|
210
212
|
];
|
|
211
213
|
for (const fn of strategies) {
|
|
212
214
|
try {
|
|
@@ -215,7 +217,78 @@ async function tryClick(page, nameRe, label) {
|
|
|
215
217
|
}
|
|
216
218
|
catch { /* try next */ }
|
|
217
219
|
}
|
|
218
|
-
|
|
220
|
+
// AI vision fallback
|
|
221
|
+
return await aiClickFallback(page, label);
|
|
222
|
+
}
|
|
223
|
+
/** Use OpenAI vision to identify the element and generate a selector, then click it */
|
|
224
|
+
async function aiClickFallback(page, description) {
|
|
225
|
+
const openaiKey = process.env.OPENAI_API_KEY;
|
|
226
|
+
if (!openaiKey)
|
|
227
|
+
return false;
|
|
228
|
+
try {
|
|
229
|
+
const { default: OpenAI } = await import('openai');
|
|
230
|
+
const openai = new OpenAI({ apiKey: openaiKey });
|
|
231
|
+
const screenshotBuffer = await page.screenshot({ type: 'png' });
|
|
232
|
+
const base64 = screenshotBuffer.toString('base64');
|
|
233
|
+
const response = await openai.chat.completions.create({
|
|
234
|
+
model: 'gpt-4o',
|
|
235
|
+
max_tokens: 100,
|
|
236
|
+
messages: [{
|
|
237
|
+
role: 'user',
|
|
238
|
+
content: [
|
|
239
|
+
{
|
|
240
|
+
type: 'text',
|
|
241
|
+
text: `I need to click on: "${description}"\n\nLook at this screenshot and return a single CSS selector that identifies the element to click.\nRules:\n- Return ONLY the CSS selector, nothing else\n- No markdown, no explanation\n- If not found, return: NOT_FOUND\n- Use data-testid, id, aria-label, or text-based selectors\n- Prefer: [data-test="..."], #id, [aria-label="..."], button:has-text("...")`
|
|
242
|
+
},
|
|
243
|
+
{ type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
|
|
244
|
+
]
|
|
245
|
+
}]
|
|
246
|
+
});
|
|
247
|
+
const selector = response.choices[0]?.message?.content?.trim() || '';
|
|
248
|
+
if (!selector || selector === 'NOT_FOUND')
|
|
249
|
+
return false;
|
|
250
|
+
const clean = selector.replace(/```[a-z]*\n?/gi, '').replace(/```/g, '').trim();
|
|
251
|
+
await page.locator(clean).first().click({ timeout: 5000 });
|
|
252
|
+
return true;
|
|
253
|
+
}
|
|
254
|
+
catch {
|
|
255
|
+
return false;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
/** Use OpenAI vision to identify a field and fill it */
|
|
259
|
+
async function aiFillFallback(page, label, value) {
|
|
260
|
+
const openaiKey = process.env.OPENAI_API_KEY;
|
|
261
|
+
if (!openaiKey)
|
|
262
|
+
return false;
|
|
263
|
+
try {
|
|
264
|
+
const { default: OpenAI } = await import('openai');
|
|
265
|
+
const openai = new OpenAI({ apiKey: openaiKey });
|
|
266
|
+
const screenshotBuffer = await page.screenshot({ type: 'png' });
|
|
267
|
+
const base64 = screenshotBuffer.toString('base64');
|
|
268
|
+
const response = await openai.chat.completions.create({
|
|
269
|
+
model: 'gpt-4o',
|
|
270
|
+
max_tokens: 100,
|
|
271
|
+
messages: [{
|
|
272
|
+
role: 'user',
|
|
273
|
+
content: [
|
|
274
|
+
{
|
|
275
|
+
type: 'text',
|
|
276
|
+
text: `I need to fill the "${label}" input field with the value "${value}".\n\nLook at this screenshot and return a single CSS selector for the input field.\nRules:\n- Return ONLY the CSS selector, nothing else\n- No markdown, no explanation\n- If not found, return: NOT_FOUND\n- Prefer: input[name="..."], input[id="..."], input[placeholder="..."], [aria-label="..."]`
|
|
277
|
+
},
|
|
278
|
+
{ type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
|
|
279
|
+
]
|
|
280
|
+
}]
|
|
281
|
+
});
|
|
282
|
+
const selector = response.choices[0]?.message?.content?.trim() || '';
|
|
283
|
+
if (!selector || selector === 'NOT_FOUND')
|
|
284
|
+
return false;
|
|
285
|
+
const clean = selector.replace(/```[a-z]*\n?/gi, '').replace(/```/g, '').trim();
|
|
286
|
+
await page.locator(clean).first().fill(value);
|
|
287
|
+
return true;
|
|
288
|
+
}
|
|
289
|
+
catch {
|
|
290
|
+
return false;
|
|
291
|
+
}
|
|
219
292
|
}
|
|
220
293
|
async function tryClickScoped(page, nameRe, target, scope) {
|
|
221
294
|
const FAST = 3000;
|
|
@@ -324,6 +397,10 @@ async function tryFill(page, label, value) {
|
|
|
324
397
|
errors.push(e?.message?.split("\n")[0] || String(e));
|
|
325
398
|
}
|
|
326
399
|
}
|
|
400
|
+
// AI vision fallback
|
|
401
|
+
const aiSuccess = await aiFillFallback(page, label, value);
|
|
402
|
+
if (aiSuccess)
|
|
403
|
+
return;
|
|
327
404
|
throw new Error(`Could not find input field: "${label}". Tried ${strategies.length} strategies.`);
|
|
328
405
|
}
|
|
329
406
|
/** Token-aware variant generation matching executor.ts/labelVariants. */
|