@cutleryapp/agent 1.0.16 → 1.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-executor.js +146 -12
- package/package.json +1 -1
package/dist/mcp-executor.js
CHANGED
|
@@ -122,31 +122,59 @@ class TestExecutor {
|
|
|
122
122
|
await page.waitForSelector(sel, { state: "visible", timeout: 15000 });
|
|
123
123
|
}
|
|
124
124
|
else if (lower.includes("verify") || lower.includes("check") || lower.includes("assert") || lower.includes("should")) {
|
|
125
|
-
//
|
|
126
|
-
const
|
|
125
|
+
// Support: Verify "text", Verify I see text Foo, Verify text Foo is not displayed
|
|
126
|
+
const isNegative = /not\s+(?:displayed|visible|present)/i.test(raw);
|
|
127
|
+
const textMatch = raw.match(/"([^"]+)"/) ||
|
|
128
|
+
raw.match(/(?:verify|check|assert)\s+(?:i\s+see\s+(?:text\s+)?|text\s+)?(.+?)(?:\s+is\s+(?:not\s+)?(?:displayed|visible|present))?$/i);
|
|
127
129
|
if (textMatch) {
|
|
128
|
-
const expected = textMatch[1];
|
|
129
|
-
|
|
130
|
-
await page.
|
|
130
|
+
const expected = textMatch[1].trim();
|
|
131
|
+
if (isNegative) {
|
|
132
|
+
const content = await page.textContent('body') || '';
|
|
133
|
+
if (content.includes(expected))
|
|
134
|
+
throw new Error(`Text "${expected}" should NOT be visible but was found`);
|
|
131
135
|
}
|
|
132
|
-
|
|
133
|
-
|
|
136
|
+
else {
|
|
137
|
+
try {
|
|
138
|
+
await page.waitForFunction((text) => document.body.innerText.includes(text), expected, { timeout: 10000 });
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
throw new Error(`Expected text not found: "${expected}"`);
|
|
142
|
+
}
|
|
134
143
|
}
|
|
135
144
|
}
|
|
136
145
|
}
|
|
137
146
|
else if (lower.includes("select") || lower.includes("choose")) {
|
|
138
|
-
const selMatch = raw.match(/select\s+"([^"]
|
|
147
|
+
const selMatch = raw.match(/select\s+"?([^"]+?)"?\s+(?:from|in)\s+"?([^"]+?)"?\s*(?:dropdown|select|field)?$/i);
|
|
139
148
|
if (selMatch) {
|
|
140
|
-
|
|
149
|
+
try {
|
|
150
|
+
await page.selectOption(selMatch[2].trim(), { label: selMatch[1].trim() });
|
|
151
|
+
}
|
|
152
|
+
catch {
|
|
153
|
+
// fallback: click the dropdown then click the option
|
|
154
|
+
await tryClick(page, new RegExp(escapeRegex(selMatch[2].trim()), 'i'), selMatch[2].trim());
|
|
155
|
+
await tryClick(page, new RegExp(escapeRegex(selMatch[1].trim()), 'i'), selMatch[1].trim());
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
await aiStepFallback(page, raw);
|
|
141
160
|
}
|
|
142
161
|
}
|
|
143
162
|
else {
|
|
144
|
-
|
|
163
|
+
// Unknown step — let AI interpret and execute it
|
|
164
|
+
await aiStepFallback(page, raw);
|
|
145
165
|
}
|
|
146
166
|
}
|
|
147
167
|
catch (err) {
|
|
148
|
-
|
|
149
|
-
|
|
168
|
+
// If a recognised handler threw, try AI fallback before marking as failed
|
|
169
|
+
console.log(` ⚠️ Step failed (${err.message}), trying AI fallback...`);
|
|
170
|
+
try {
|
|
171
|
+
await aiStepFallback(page, raw);
|
|
172
|
+
stepError = undefined; // AI recovered it
|
|
173
|
+
}
|
|
174
|
+
catch (aiErr) {
|
|
175
|
+
stepError = err.message; // Report original error
|
|
176
|
+
result.success = false;
|
|
177
|
+
}
|
|
150
178
|
}
|
|
151
179
|
// Screenshot after each step
|
|
152
180
|
let screenshotB64 = "";
|
|
@@ -220,6 +248,112 @@ async function tryClick(page, nameRe, label) {
|
|
|
220
248
|
// AI vision fallback
|
|
221
249
|
return await aiClickFallback(page, label);
|
|
222
250
|
}
|
|
251
|
+
/**
|
|
252
|
+
* Universal AI agentic fallback.
|
|
253
|
+
* GPT-4o sees the current page screenshot, understands the GOAL of the step,
|
|
254
|
+
* and returns a SEQUENCE of actions to accomplish it — then executes them one by one.
|
|
255
|
+
* After each action it re-screenshots so the AI can verify progress and adapt.
|
|
256
|
+
*/
|
|
257
|
+
async function aiStepFallback(page, stepText) {
|
|
258
|
+
const openaiKey = process.env.OPENAI_API_KEY;
|
|
259
|
+
if (!openaiKey)
|
|
260
|
+
throw new Error(`No OPENAI_API_KEY — cannot use AI fallback for: "${stepText}"`);
|
|
261
|
+
const { default: OpenAI } = await import('openai');
|
|
262
|
+
const openai = new OpenAI({ apiKey: openaiKey });
|
|
263
|
+
const MAX_ROUNDS = 6; // prevent infinite loops
|
|
264
|
+
for (let round = 0; round < MAX_ROUNDS; round++) {
|
|
265
|
+
const screenshotBuffer = await page.screenshot({ type: 'png' });
|
|
266
|
+
const base64 = screenshotBuffer.toString('base64');
|
|
267
|
+
const response = await openai.chat.completions.create({
|
|
268
|
+
model: 'gpt-4o',
|
|
269
|
+
max_tokens: 600,
|
|
270
|
+
messages: [{
|
|
271
|
+
role: 'user',
|
|
272
|
+
content: [
|
|
273
|
+
{
|
|
274
|
+
type: 'text',
|
|
275
|
+
text: `You are an autonomous browser test agent. Your goal is to accomplish this test step:
|
|
276
|
+
|
|
277
|
+
"${stepText}"
|
|
278
|
+
|
|
279
|
+
Look at the current screenshot and decide what actions are needed RIGHT NOW to make progress toward the goal.
|
|
280
|
+
|
|
281
|
+
Return ONLY valid JSON (no markdown) with this structure:
|
|
282
|
+
{
|
|
283
|
+
"reasoning": "brief explanation of what you see and what needs to happen",
|
|
284
|
+
"done": false,
|
|
285
|
+
"actions": [
|
|
286
|
+
{"action": "click", "selector": "CSS_OR_TEXT_SELECTOR"},
|
|
287
|
+
{"action": "fill", "selector": "CSS_OR_TEXT_SELECTOR", "value": "TEXT"},
|
|
288
|
+
{"action": "select", "selector": "CSS_OR_TEXT_SELECTOR", "value": "OPTION_TEXT"},
|
|
289
|
+
{"action": "verify", "text": "EXPECTED_TEXT", "not": false},
|
|
290
|
+
{"action": "wait", "ms": 500}
|
|
291
|
+
]
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
Set "done": true and empty "actions" array when the goal has been fully accomplished.
|
|
295
|
+
|
|
296
|
+
Rules:
|
|
297
|
+
- Return only the actions needed for THIS screenshot — after executing them you will see the next state
|
|
298
|
+
- Use text-based selectors when possible: button:has-text("Checkout"), [data-test="..."], #id
|
|
299
|
+
- For "checkout" goals: look for cart/checkout buttons and click them in sequence
|
|
300
|
+
- For "login" goals: fill username, fill password, click login
|
|
301
|
+
- For "add to cart" goals: find and click the Add to cart button
|
|
302
|
+
- Maximum 3 actions per round to stay precise`
|
|
303
|
+
},
|
|
304
|
+
{ type: 'image_url', image_url: { url: `data:image/png;base64,${base64}` } }
|
|
305
|
+
]
|
|
306
|
+
}]
|
|
307
|
+
});
|
|
308
|
+
const raw = (response.choices[0]?.message?.content || '')
|
|
309
|
+
.trim()
|
|
310
|
+
.replace(/```json\n?/gi, '')
|
|
311
|
+
.replace(/```/g, '')
|
|
312
|
+
.trim();
|
|
313
|
+
if (!raw)
|
|
314
|
+
throw new Error(`AI returned empty response for: "${stepText}"`);
|
|
315
|
+
const plan = JSON.parse(raw);
|
|
316
|
+
console.log(` 🤖 AI round ${round + 1} — ${plan.reasoning}`);
|
|
317
|
+
if (plan.done || !plan.actions?.length) {
|
|
318
|
+
console.log(` ✅ AI agent completed: "${stepText}"`);
|
|
319
|
+
return;
|
|
320
|
+
}
|
|
321
|
+
// Execute each action in this round
|
|
322
|
+
for (const act of plan.actions) {
|
|
323
|
+
console.log(` 🤖 Executing: ${JSON.stringify(act)}`);
|
|
324
|
+
try {
|
|
325
|
+
if (act.action === 'click') {
|
|
326
|
+
await page.locator(act.selector).first().click({ timeout: 10000 });
|
|
327
|
+
await page.waitForTimeout(500); // brief settle
|
|
328
|
+
}
|
|
329
|
+
else if (act.action === 'fill') {
|
|
330
|
+
await page.locator(act.selector).first().fill(act.value || '');
|
|
331
|
+
}
|
|
332
|
+
else if (act.action === 'select') {
|
|
333
|
+
await page.locator(act.selector).first().selectOption({ label: act.value });
|
|
334
|
+
}
|
|
335
|
+
else if (act.action === 'verify') {
|
|
336
|
+
const content = await page.textContent('body') || '';
|
|
337
|
+
const found = content.includes(act.text);
|
|
338
|
+
if (act.not && found)
|
|
339
|
+
throw new Error(`Text "${act.text}" should NOT be visible`);
|
|
340
|
+
if (!act.not && !found)
|
|
341
|
+
throw new Error(`Expected text not found: "${act.text}"`);
|
|
342
|
+
}
|
|
343
|
+
else if (act.action === 'wait') {
|
|
344
|
+
await page.waitForTimeout(act.ms || 1000);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
catch (e) {
|
|
348
|
+
console.log(` ⚠️ Action failed (${e.message}), continuing to next round...`);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
// Wait for page to settle before next round
|
|
352
|
+
await page.waitForLoadState('domcontentloaded').catch(() => { });
|
|
353
|
+
await page.waitForTimeout(300);
|
|
354
|
+
}
|
|
355
|
+
throw new Error(`AI agent could not complete "${stepText}" within ${MAX_ROUNDS} rounds`);
|
|
356
|
+
}
|
|
223
357
|
/** Use OpenAI vision to identify the element and generate a selector, then click it */
|
|
224
358
|
async function aiClickFallback(page, description) {
|
|
225
359
|
const openaiKey = process.env.OPENAI_API_KEY;
|