@cutleryapp/agent 1.0.37 → 1.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/mcp-executor.js +201 -93
  2. package/package.json +1 -1
@@ -206,8 +206,129 @@ class TestExecutor {
206
206
  handled = true;
207
207
  }
208
208
  }
209
+ // 5b. Press key — keyboard actions
210
+ if (!handled && (lower.startsWith("press ") || lower.startsWith("hit "))) {
211
+ const keyMatch = raw.match(/(?:press|hit)\s+(.+)/i);
212
+ if (keyMatch) {
213
+ const keyName = keyMatch[1].trim();
214
+ const keyMap = {
215
+ enter: 'Enter', return: 'Enter', tab: 'Tab', escape: 'Escape', esc: 'Escape',
216
+ space: 'Space', backspace: 'Backspace', delete: 'Delete', del: 'Delete',
217
+ up: 'ArrowUp', down: 'ArrowDown', left: 'ArrowLeft', right: 'ArrowRight',
218
+ };
219
+ const key = keyMap[keyName.toLowerCase()] || keyName;
220
+ await page.keyboard.press(key);
221
+ handled = true;
222
+ }
223
+ }
224
+ // 5c. Hover
225
+ if (!handled && lower.startsWith("hover")) {
226
+ const hoverMatch = raw.match(/hover\s+(?:over\s+)?(?:on\s+)?(?:the\s+)?"?([^"]+?)"?(?:\s+(?:button|link|element|icon))?$/i);
227
+ if (hoverMatch) {
228
+ const target = hoverMatch[1].trim();
229
+ const nameRe = new RegExp(escapeRegex(target), 'i');
230
+ for (const fn of [
231
+ () => page.getByRole('button', { name: nameRe }).first().hover({ timeout: 1500 }),
232
+ () => page.getByRole('link', { name: nameRe }).first().hover({ timeout: 1500 }),
233
+ () => page.getByText(nameRe).first().hover({ timeout: 1500 }),
234
+ () => page.locator(`[aria-label="${target}" i],[title="${target}" i]`).first().hover({ timeout: 1500 }),
235
+ ]) {
236
+ try {
237
+ await fn();
238
+ handled = true;
239
+ break;
240
+ }
241
+ catch { /* next */ }
242
+ }
243
+ }
244
+ }
245
+ // 5d. Scroll
246
+ if (!handled && lower.includes("scroll")) {
247
+ if (/scroll\s+(?:to\s+)?(?:the\s+)?(?:top|beginning)/i.test(raw)) {
248
+ await page.evaluate(() => window.scrollTo(0, 0));
249
+ handled = true;
250
+ }
251
+ else if (/scroll\s+(?:to\s+)?(?:the\s+)?(?:bottom|end)/i.test(raw)) {
252
+ await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
253
+ handled = true;
254
+ }
255
+ else if (/scroll\s+down/i.test(raw)) {
256
+ const pxMatch = raw.match(/(\d+)/);
257
+ await page.evaluate((px) => window.scrollBy(0, px), pxMatch ? parseInt(pxMatch[1]) : 400);
258
+ handled = true;
259
+ }
260
+ else if (/scroll\s+up/i.test(raw)) {
261
+ const pxMatch = raw.match(/(\d+)/);
262
+ await page.evaluate((px) => window.scrollBy(0, -px), pxMatch ? parseInt(pxMatch[1]) : 400);
263
+ handled = true;
264
+ }
265
+ else {
266
+ const scrollToMatch = raw.match(/scroll\s+(?:to\s+)?(?:the\s+)?"?([^"]+?)"?\s*(?:element|section|field)?$/i);
267
+ if (scrollToMatch) {
268
+ const target = scrollToMatch[1].trim();
269
+ try {
270
+ await page.getByText(new RegExp(escapeRegex(target), 'i')).first().scrollIntoViewIfNeeded({ timeout: 2000 });
271
+ handled = true;
272
+ }
273
+ catch { /* fall through */ }
274
+ }
275
+ }
276
+ }
277
+ // 5e. Double click
278
+ if (!handled && lower.startsWith("double click")) {
279
+ const dcMatch = raw.match(/double\s+click\s+(?:on\s+)?(?:the\s+)?"?([^"]+?)"?$/i);
280
+ if (dcMatch) {
281
+ const target = dcMatch[1].trim();
282
+ const nameRe = new RegExp(escapeRegex(target), 'i');
283
+ for (const fn of [
284
+ () => page.getByRole('button', { name: nameRe }).first().dblclick({ timeout: 1500 }),
285
+ () => page.getByText(nameRe).first().dblclick({ timeout: 1500 }),
286
+ () => page.locator(`[aria-label="${target}" i]`).first().dblclick({ timeout: 1500 }),
287
+ ]) {
288
+ try {
289
+ await fn();
290
+ handled = true;
291
+ break;
292
+ }
293
+ catch { /* next */ }
294
+ }
295
+ }
296
+ }
297
+ // 5f. Clear field
298
+ if (!handled && (lower.startsWith("clear ") || lower.includes(" clear the "))) {
299
+ const clearMatch = raw.match(/clear\s+(?:the\s+)?"?([^"]+?)"?\s*(?:field|input|box)?$/i);
300
+ if (clearMatch) {
301
+ const fieldLabel = clearMatch[1].trim();
302
+ try {
303
+ await tryFill(page, fieldLabel, '');
304
+ handled = true;
305
+ }
306
+ catch { /* fall through */ }
307
+ }
308
+ }
309
+ // 5g. Upload file — "upload X to Y" / "choose X in Y field" where X looks like a path
310
+ if (!handled && (lower.includes("upload") || (lower.includes("choose") && /\.(jpg|jpeg|png|gif|pdf|csv|xlsx?|docx?|zip|txt)/i.test(raw)))) {
311
+ const uploadMatch = raw.match(/(?:upload|choose|attach)\s+"?([^"]+?)"?\s+(?:to|in|into)\s+"?([^"]+?)"?\s*(?:field|input)?$/i) ||
312
+ raw.match(/(?:upload|choose|attach)\s+"?([^"]+\.\w+)"?\s+(?:to|in|into)?\s*"?([^"]+?)"?$/i);
313
+ if (uploadMatch) {
314
+ const filePath = uploadMatch[1].trim();
315
+ const fieldLabel = uploadMatch[2].trim();
316
+ const fileLocators = [
317
+ page.getByLabel(new RegExp(escapeRegex(fieldLabel), 'i')),
318
+ page.locator(`input[type="file"]`),
319
+ ];
320
+ for (const loc of fileLocators) {
321
+ try {
322
+ await loc.first().setInputFiles(filePath, { timeout: 3000 });
323
+ handled = true;
324
+ break;
325
+ }
326
+ catch { /* try next */ }
327
+ }
328
+ }
329
+ }
209
330
  // 6. Select — native dropdown, then React-select/autocomplete fallback
210
- if (!handled && (lower.includes("select") || lower.includes("choose"))) {
331
+ if (!handled && (lower.includes("select") || (lower.includes("choose") && !/\.(jpg|jpeg|png|gif|pdf|csv|xlsx?|docx?|zip|txt)/i.test(raw)))) {
211
332
  const selMatch = raw.match(/(?:select|choose)\s+"?([^"]+?)"?\s+(?:from|in)\s+"?([^"]+?)"?\s*(?:dropdown|select|field)?$/i);
212
333
  if (selMatch) {
213
334
  const optionValue = selMatch[1].trim();
@@ -216,7 +337,7 @@ class TestExecutor {
216
337
  // Try native <select>
217
338
  try {
218
339
  const fieldLoc = page.getByLabel(new RegExp(fieldLabel, 'i')).first();
219
- await fieldLoc.selectOption({ label: optionValue }, { timeout: 3000 });
340
+ await fieldLoc.selectOption({ label: optionValue }, { timeout: 800 });
220
341
  selHandled = true;
221
342
  }
222
343
  catch { /* not a native select */ }
@@ -270,38 +391,41 @@ class TestExecutor {
270
391
  }
271
392
  }
272
393
  }
273
- // 7. AIsingle-shot for deterministic steps, full loop for intent steps
394
+ // 7. Generic click fallback try any element containing the step text before AI
274
395
  if (!handled) {
275
- const isDeterministic = /^(click|fill|enter|type|verify|check|assert|select|choose|wait|hover|scroll)/i.test(lower.trim());
276
- if (isDeterministic) {
277
- console.log(` 🤖 Quick AI selector lookup for: "${raw}"`);
278
- await aiSingleShot(page, raw);
279
- }
280
- else {
281
- console.log(` 🤖 AI intent loop for: "${raw}"`);
282
- await aiStepFallback(page, raw, null);
396
+ // Extract the most meaningful noun phrase from the step
397
+ const nounMatch = raw.match(/(?:click|press|tap|submit|open|close|expand|collapse|toggle|activate|dismiss|confirm|cancel|accept|reject|approve|deny|enable|disable|show|hide)\s+(?:on\s+|the\s+)?(?:the\s+)?"?([^"]+?)"?(?:\s+(?:button|link|tab|icon|menu|modal|dialog|popup|dropdown|option))?$/i);
398
+ if (nounMatch) {
399
+ const target = nounMatch[1].trim();
400
+ const nameRe = new RegExp(escapeRegex(target), 'i');
401
+ const clicked = await tryClick(page, nameRe, target);
402
+ if (clicked)
403
+ handled = true;
283
404
  }
284
405
  }
285
- }
286
- catch (err) {
287
- // MCP execution failed — single-shot AI recovery, no loop
288
- console.log(` ⚠️ MCP step failed (${err.message.split('\n')[0]}), trying AI...`);
289
- try {
406
+ // 8. AI — last resort only, single-shot for deterministic steps
407
+ if (!handled) {
408
+ console.log(` 🤖 AI fallback for: "${raw}"`);
290
409
  await aiSingleShot(page, raw);
291
410
  }
292
- catch (aiErr) {
293
- stepError = err.message;
294
- result.success = false;
295
- }
296
411
  }
297
- // Screenshot after each step
412
+ catch (err) {
413
+ // Log the error — do NOT call AI here, it's too slow and usually can't recover either
414
+ console.log(` ⚠️ Step failed: ${err.message.split('\n')[0]}`);
415
+ stepError = err.message;
416
+ result.success = false;
417
+ }
418
+ // Screenshot on failure, on the last step, or every 5 steps — not every step
298
419
  let screenshotB64 = "";
299
- try {
300
- const buf = await page.screenshot({ fullPage: false });
301
- screenshotB64 = buf.toString("base64");
302
- result.screenshots.push(screenshotB64);
420
+ const isLastStep = i === steps.length - 1;
421
+ if (stepError || isLastStep || i % 5 === 0) {
422
+ try {
423
+ const buf = await page.screenshot({ fullPage: false });
424
+ screenshotB64 = buf.toString("base64");
425
+ result.screenshots.push(screenshotB64);
426
+ }
427
+ catch { /* ignore screenshot errors */ }
303
428
  }
304
- catch { /* ignore screenshot errors */ }
305
429
  result.steps.push({
306
430
  step: raw,
307
431
  action: raw,
@@ -347,24 +471,32 @@ function extractSelector(step, pattern) {
347
471
  }
348
472
  // Fast probe: try each locator strategy with a short timeout so fallbacks don't stall
349
473
  async function tryClick(page, nameRe, label) {
350
- const FAST = 800;
351
- const strategies = [
352
- () => page.getByRole('button', { name: nameRe }).first().click({ timeout: FAST }),
353
- () => page.getByRole('link', { name: nameRe }).first().click({ timeout: FAST }),
354
- () => page.getByText(nameRe).first().click({ timeout: FAST }),
355
- () => page.locator(`[value="${label}"], [aria-label="${label}"], [title="${label}"]`).first().click({ timeout: FAST }),
356
- // data-* attributes (common in test automation)
357
- () => page.locator(`[data-test*="${label}" i], [data-testid*="${label}" i], [id*="${label}" i]`).first().click({ timeout: FAST }),
474
+ // Race all common role/text strategies simultaneously — first one wins
475
+ const T = 1000;
476
+ const raceStrategies = [
477
+ page.getByRole('button', { name: nameRe }).first().click({ timeout: T }),
478
+ page.getByRole('link', { name: nameRe }).first().click({ timeout: T }),
479
+ page.getByRole('tab', { name: nameRe }).first().click({ timeout: T }),
480
+ page.getByText(nameRe, { exact: false }).first().click({ timeout: T }),
481
+ page.locator(`[aria-label="${label}" i],[title="${label}" i],[data-test*="${label}" i],[data-testid*="${label}" i]`).first().click({ timeout: T }),
358
482
  ];
359
- for (const fn of strategies) {
483
+ try {
484
+ await Promise.any(raceStrategies);
485
+ return true;
486
+ }
487
+ catch { /* all failed, try sequential fallbacks */ }
488
+ // Sequential fallbacks for id/value/role selectors
489
+ for (const fn of [
490
+ () => page.locator(`#${label.replace(/\s+/g, '-')}, #${label.replace(/\s+/g, '_')}, [id*="${label}" i]`).first().click({ timeout: 500 }),
491
+ () => page.locator(`input[value="${label}" i], button[value="${label}" i]`).first().click({ timeout: 500 }),
492
+ ]) {
360
493
  try {
361
494
  await fn();
362
495
  return true;
363
496
  }
364
- catch { /* try next */ }
497
+ catch { /* next */ }
365
498
  }
366
- // AI vision fallback
367
- return await aiClickFallback(page, label);
499
+ return false;
368
500
  }
369
501
  function buildAgentPrompt(stepText, round, hasAttachment = false) {
370
502
  const attachmentSection = hasAttachment ? `
@@ -823,12 +955,12 @@ async function tryAutocomplete(page, fieldLabel, value) {
823
955
  const innerInput = page.locator('[class*="react-select__input"] input,[class*="select__input"] input').first();
824
956
  try {
825
957
  await innerInput.waitFor({ state: 'visible', timeout: 800 });
826
- await innerInput.type(value, { delay: 60 });
958
+ await innerInput.type(value, { delay: 30 });
827
959
  }
828
960
  catch {
829
- await page.keyboard.type(value, { delay: 60 });
961
+ await page.keyboard.type(value, { delay: 30 });
830
962
  }
831
- await page.waitForTimeout(600);
963
+ await page.waitForTimeout(400);
832
964
  if (await clickOpenOption())
833
965
  return true;
834
966
  // No confirmed click → don't claim success, fall through to next strategy
@@ -853,11 +985,10 @@ async function tryAutocomplete(page, fieldLabel, value) {
853
985
  await input.waitFor({ state: 'visible', timeout: 1500 });
854
986
  await input.click({ timeout: 1500 });
855
987
  await input.fill('');
856
- await input.type(value, { delay: 60 });
857
- await page.waitForTimeout(500);
988
+ await input.type(value, { delay: 30 });
989
+ await page.waitForTimeout(350);
858
990
  if (await clickOpenOption())
859
991
  return true;
860
- // Only count success when the option was actually clicked
861
992
  }
862
993
  catch { /* try next */ }
863
994
  }
@@ -956,7 +1087,7 @@ async function aiFillFallback(page, label, value) {
956
1087
  }
957
1088
  }
958
1089
  async function tryClickScoped(page, nameRe, target, scope) {
959
- const FAST = 3000;
1090
+ const FAST = 1000;
960
1091
  // Strip trailing generic nouns that won't appear verbatim on the page
961
1092
  const cleanScope = scope.replace(/\s+(?:product|item|section|card|row|container|element|button|link|area|panel|block)$/i, '').trim();
962
1093
  // Use card/item container selectors — these are tight enough to contain the button
@@ -1010,67 +1141,44 @@ async function tryClickScoped(page, nameRe, target, scope) {
1010
1141
  return false;
1011
1142
  }
1012
1143
  async function tryFill(page, label, value) {
1013
- const FAST = 800;
1014
1144
  const labelRe = new RegExp(escapeRegex(label), "i");
1015
1145
  const variants = labelVariants(label);
1016
- const attrContains = (attr) => variants
1017
- .map((v) => `input[${attr}*="${cssEscape(v)}" i], textarea[${attr}*="${cssEscape(v)}" i], [contenteditable="true"][${attr}*="${cssEscape(v)}" i]`)
1018
- .join(", ");
1019
- // Ordered most-likely-to-resolve first so the success path matches the old
1020
- // 4-strategy implementation in latency. The extra strategies only run when
1021
- // the cheaper ones miss (i.e. the field genuinely needs fuzzier matching).
1022
- const strategies = [
1023
- // 1–2. Same as before — covers labelled inputs and placeholder-only inputs.
1146
+ const attrContains = (attr) => variants.map(v => `input[${attr}*="${cssEscape(v)}" i], textarea[${attr}*="${cssEscape(v)}" i], [contenteditable="true"][${attr}*="${cssEscape(v)}" i]`).join(", ");
1147
+ function withTimeout(fn, ms) {
1148
+ return Promise.race([fn(), new Promise((_, r) => setTimeout(() => r(new Error('timeout')), ms))]);
1149
+ }
1150
+ // Phase 1: race the 3 highest-coverage strategies simultaneously (500ms cap)
1151
+ // These cover labelled inputs, placeholder inputs, and role-textbox 95%+ of forms
1152
+ const phase1 = [
1024
1153
  () => page.getByLabel(labelRe).first().fill(value),
1025
1154
  () => page.getByPlaceholder(labelRe).first().fill(value),
1026
- // 3. Accessible role match.
1027
1155
  () => page.getByRole("textbox", { name: labelRe }).first().fill(value),
1028
- // 4. Common automation hooks.
1156
+ ];
1157
+ try {
1158
+ await Promise.any(phase1.map(fn => withTimeout(fn, 500)));
1159
+ return;
1160
+ }
1161
+ catch { /* all 3 failed, try phase 2 */ }
1162
+ // Phase 2: attribute-based selectors covering name/id/data-test variants (300ms each)
1163
+ const exactAttrs = variants.flatMap(v => [
1164
+ `input[name="${cssEscape(v)}" i]`, `input[id="${cssEscape(v)}" i]`,
1165
+ `textarea[name="${cssEscape(v)}" i]`, `textarea[id="${cssEscape(v)}" i]`,
1166
+ ]).join(", ");
1167
+ const phase2 = [
1168
+ () => page.locator(exactAttrs).first().fill(value),
1169
+ () => page.locator(`${attrContains("name")}, ${attrContains("id")}`).first().fill(value),
1029
1170
  () => page.locator(attrContains("data-test")).first().fill(value),
1030
- () => page.locator(attrContains("data-testid")).first().fill(value),
1031
- // 5. Native attributes — exact across all variants (kebab/snake/camel/etc.).
1032
- () => page
1033
- .locator(variants
1034
- .flatMap((v) => [
1035
- `input[name="${cssEscape(v)}" i]`,
1036
- `input[id="${cssEscape(v)}" i]`,
1037
- `textarea[name="${cssEscape(v)}" i]`,
1038
- `textarea[id="${cssEscape(v)}" i]`,
1039
- ])
1040
- .join(", "))
1041
- .first()
1042
- .fill(value),
1043
- // 6. Native attributes — contains across all variants.
1044
- () => page
1045
- .locator(`${attrContains("name")}, ${attrContains("id")}`)
1046
- .first()
1047
- .fill(value),
1048
- // 7. ARIA / placeholder fallbacks.
1049
1171
  () => page.locator(attrContains("aria-label")).first().fill(value),
1050
1172
  () => page.locator(attrContains("placeholder")).first().fill(value),
1051
1173
  ];
1052
- const errors = [];
1053
- for (const fn of strategies) {
1174
+ for (const fn of phase2) {
1054
1175
  try {
1055
- await Promise.race([
1056
- fn(),
1057
- new Promise((_, r) => setTimeout(() => r(new Error("timeout")), FAST)),
1058
- ]);
1176
+ await withTimeout(fn, 300);
1059
1177
  return;
1060
1178
  }
1061
- catch (e) {
1062
- errors.push(e?.message?.split("\n")[0] || String(e));
1063
- }
1179
+ catch { /* next */ }
1064
1180
  }
1065
- // Autocomplete fallback type + wait for dropdown + click option
1066
- const acSuccess = await tryAutocomplete(page, label, value);
1067
- if (acSuccess)
1068
- return;
1069
- // AI vision fallback
1070
- const aiSuccess = await aiFillFallback(page, label, value);
1071
- if (aiSuccess)
1072
- return;
1073
- throw new Error(`Could not find input field: "${label}". Tried ${strategies.length} strategies.`);
1181
+ throw new Error(`Could not find input field: "${label}"`);
1074
1182
  }
1075
1183
  /** Token-aware variant generation matching executor.ts/labelVariants. */
1076
1184
  function labelVariants(label) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cutleryapp/agent",
3
- "version": "1.0.37",
3
+ "version": "1.0.39",
4
4
  "description": "Local agent that connects your machine to the Cutlery QA platform and runs UI tests via Playwright",
5
5
  "main": "dist/cli.js",
6
6
  "bin": {