@cutleryapp/agent 1.0.36 → 1.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/mcp-executor.js +191 -63
  2. package/package.json +1 -1
@@ -206,8 +206,129 @@ class TestExecutor {
206
206
  handled = true;
207
207
  }
208
208
  }
209
+ // 5b. Press key — keyboard actions
210
+ if (!handled && (lower.startsWith("press ") || lower.startsWith("hit "))) {
211
+ const keyMatch = raw.match(/(?:press|hit)\s+(.+)/i);
212
+ if (keyMatch) {
213
+ const keyName = keyMatch[1].trim();
214
+ const keyMap = {
215
+ enter: 'Enter', return: 'Enter', tab: 'Tab', escape: 'Escape', esc: 'Escape',
216
+ space: 'Space', backspace: 'Backspace', delete: 'Delete', del: 'Delete',
217
+ up: 'ArrowUp', down: 'ArrowDown', left: 'ArrowLeft', right: 'ArrowRight',
218
+ };
219
+ const key = keyMap[keyName.toLowerCase()] || keyName;
220
+ await page.keyboard.press(key);
221
+ handled = true;
222
+ }
223
+ }
224
+ // 5c. Hover
225
+ if (!handled && lower.startsWith("hover")) {
226
+ const hoverMatch = raw.match(/hover\s+(?:over\s+)?(?:on\s+)?(?:the\s+)?"?([^"]+?)"?(?:\s+(?:button|link|element|icon))?$/i);
227
+ if (hoverMatch) {
228
+ const target = hoverMatch[1].trim();
229
+ const nameRe = new RegExp(escapeRegex(target), 'i');
230
+ for (const fn of [
231
+ () => page.getByRole('button', { name: nameRe }).first().hover({ timeout: 1500 }),
232
+ () => page.getByRole('link', { name: nameRe }).first().hover({ timeout: 1500 }),
233
+ () => page.getByText(nameRe).first().hover({ timeout: 1500 }),
234
+ () => page.locator(`[aria-label="${target}" i],[title="${target}" i]`).first().hover({ timeout: 1500 }),
235
+ ]) {
236
+ try {
237
+ await fn();
238
+ handled = true;
239
+ break;
240
+ }
241
+ catch { /* next */ }
242
+ }
243
+ }
244
+ }
245
+ // 5d. Scroll
246
+ if (!handled && lower.includes("scroll")) {
247
+ if (/scroll\s+(?:to\s+)?(?:the\s+)?(?:top|beginning)/i.test(raw)) {
248
+ await page.evaluate(() => window.scrollTo(0, 0));
249
+ handled = true;
250
+ }
251
+ else if (/scroll\s+(?:to\s+)?(?:the\s+)?(?:bottom|end)/i.test(raw)) {
252
+ await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
253
+ handled = true;
254
+ }
255
+ else if (/scroll\s+down/i.test(raw)) {
256
+ const pxMatch = raw.match(/(\d+)/);
257
+ await page.evaluate((px) => window.scrollBy(0, px), pxMatch ? parseInt(pxMatch[1]) : 400);
258
+ handled = true;
259
+ }
260
+ else if (/scroll\s+up/i.test(raw)) {
261
+ const pxMatch = raw.match(/(\d+)/);
262
+ await page.evaluate((px) => window.scrollBy(0, -px), pxMatch ? parseInt(pxMatch[1]) : 400);
263
+ handled = true;
264
+ }
265
+ else {
266
+ const scrollToMatch = raw.match(/scroll\s+(?:to\s+)?(?:the\s+)?"?([^"]+?)"?\s*(?:element|section|field)?$/i);
267
+ if (scrollToMatch) {
268
+ const target = scrollToMatch[1].trim();
269
+ try {
270
+ await page.getByText(new RegExp(escapeRegex(target), 'i')).first().scrollIntoViewIfNeeded({ timeout: 2000 });
271
+ handled = true;
272
+ }
273
+ catch { /* fall through */ }
274
+ }
275
+ }
276
+ }
277
+ // 5e. Double click
278
+ if (!handled && lower.startsWith("double click")) {
279
+ const dcMatch = raw.match(/double\s+click\s+(?:on\s+)?(?:the\s+)?"?([^"]+?)"?$/i);
280
+ if (dcMatch) {
281
+ const target = dcMatch[1].trim();
282
+ const nameRe = new RegExp(escapeRegex(target), 'i');
283
+ for (const fn of [
284
+ () => page.getByRole('button', { name: nameRe }).first().dblclick({ timeout: 1500 }),
285
+ () => page.getByText(nameRe).first().dblclick({ timeout: 1500 }),
286
+ () => page.locator(`[aria-label="${target}" i]`).first().dblclick({ timeout: 1500 }),
287
+ ]) {
288
+ try {
289
+ await fn();
290
+ handled = true;
291
+ break;
292
+ }
293
+ catch { /* next */ }
294
+ }
295
+ }
296
+ }
297
+ // 5f. Clear field
298
+ if (!handled && (lower.startsWith("clear ") || lower.includes(" clear the "))) {
299
+ const clearMatch = raw.match(/clear\s+(?:the\s+)?"?([^"]+?)"?\s*(?:field|input|box)?$/i);
300
+ if (clearMatch) {
301
+ const fieldLabel = clearMatch[1].trim();
302
+ try {
303
+ await tryFill(page, fieldLabel, '');
304
+ handled = true;
305
+ }
306
+ catch { /* fall through */ }
307
+ }
308
+ }
309
+ // 5g. Upload file — "upload X to Y" / "choose X in Y field" where X looks like a path
310
+ if (!handled && (lower.includes("upload") || (lower.includes("choose") && /\.(jpg|jpeg|png|gif|pdf|csv|xlsx?|docx?|zip|txt)/i.test(raw)))) {
311
+ const uploadMatch = raw.match(/(?:upload|choose|attach)\s+"?([^"]+?)"?\s+(?:to|in|into)\s+"?([^"]+?)"?\s*(?:field|input)?$/i) ||
312
+ raw.match(/(?:upload|choose|attach)\s+"?([^"]+\.\w+)"?\s+(?:to|in|into)?\s*"?([^"]+?)"?$/i);
313
+ if (uploadMatch) {
314
+ const filePath = uploadMatch[1].trim();
315
+ const fieldLabel = uploadMatch[2].trim();
316
+ const fileLocators = [
317
+ page.getByLabel(new RegExp(escapeRegex(fieldLabel), 'i')),
318
+ page.locator(`input[type="file"]`),
319
+ ];
320
+ for (const loc of fileLocators) {
321
+ try {
322
+ await loc.first().setInputFiles(filePath, { timeout: 3000 });
323
+ handled = true;
324
+ break;
325
+ }
326
+ catch { /* try next */ }
327
+ }
328
+ }
329
+ }
209
330
  // 6. Select — native dropdown, then React-select/autocomplete fallback
210
- if (!handled && (lower.includes("select") || lower.includes("choose"))) {
331
+ if (!handled && (lower.includes("select") || (lower.includes("choose") && !/\.(jpg|jpeg|png|gif|pdf|csv|xlsx?|docx?|zip|txt)/i.test(raw)))) {
211
332
  const selMatch = raw.match(/(?:select|choose)\s+"?([^"]+?)"?\s+(?:from|in)\s+"?([^"]+?)"?\s*(?:dropdown|select|field)?$/i);
212
333
  if (selMatch) {
213
334
  const optionValue = selMatch[1].trim();
@@ -270,29 +391,29 @@ class TestExecutor {
270
391
  }
271
392
  }
272
393
  }
273
- // 7. AIsingle-shot for deterministic steps, full loop for intent steps
394
+ // 7. Generic click fallback try any element containing the step text before AI
274
395
  if (!handled) {
275
- const isDeterministic = /^(click|fill|enter|type|verify|check|assert|select|choose|wait|hover|scroll)/i.test(lower.trim());
276
- if (isDeterministic) {
277
- console.log(` 🤖 Quick AI selector lookup for: "${raw}"`);
278
- await aiSingleShot(page, raw);
279
- }
280
- else {
281
- console.log(` 🤖 AI intent loop for: "${raw}"`);
282
- await aiStepFallback(page, raw, null);
396
+ // Extract the most meaningful noun phrase from the step
397
+ const nounMatch = raw.match(/(?:click|press|tap|submit|open|close|expand|collapse|toggle|activate|dismiss|confirm|cancel|accept|reject|approve|deny|enable|disable|show|hide)\s+(?:on\s+|the\s+)?(?:the\s+)?"?([^"]+?)"?(?:\s+(?:button|link|tab|icon|menu|modal|dialog|popup|dropdown|option))?$/i);
398
+ if (nounMatch) {
399
+ const target = nounMatch[1].trim();
400
+ const nameRe = new RegExp(escapeRegex(target), 'i');
401
+ const clicked = await tryClick(page, nameRe, target);
402
+ if (clicked)
403
+ handled = true;
283
404
  }
284
405
  }
285
- }
286
- catch (err) {
287
- // MCP execution failed — single-shot AI recovery, no loop
288
- console.log(` ⚠️ MCP step failed (${err.message.split('\n')[0]}), trying AI...`);
289
- try {
406
+ // 8. AI — last resort only, single-shot for deterministic steps
407
+ if (!handled) {
408
+ console.log(` 🤖 AI fallback for: "${raw}"`);
290
409
  await aiSingleShot(page, raw);
291
410
  }
292
- catch (aiErr) {
293
- stepError = err.message;
294
- result.success = false;
295
- }
411
+ }
412
+ catch (err) {
413
+ // Log the error — do NOT call AI here, it's too slow and usually can't recover either
414
+ console.log(` ⚠️ Step failed: ${err.message.split('\n')[0]}`);
415
+ stepError = err.message;
416
+ result.success = false;
296
417
  }
297
418
  // Screenshot after each step
298
419
  let screenshotB64 = "";
@@ -765,9 +886,7 @@ async function tryAIClick(page, selector) {
765
886
  }
766
887
  /** Select an option from a React-select / autocomplete / combobox / dropdown */
767
888
  async function tryAutocomplete(page, fieldLabel, value) {
768
- const esc = (s) => s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
769
- const labelRe = new RegExp(esc(fieldLabel), 'i');
770
- // Click an option from whatever dropdown is currently open on the page
889
+ // Click the matching option in whatever dropdown is currently open — returns true only on success
771
890
  async function clickOpenOption() {
772
891
  const optionSelectors = [
773
892
  `[role="option"]:has-text("${value}")`,
@@ -789,38 +908,57 @@ async function tryAutocomplete(page, fieldLabel, value) {
789
908
  }
790
909
  return false;
791
910
  }
792
- // Strategy 1: React-select — click the control div, type into its hidden input
793
- const reactControlSelectors = [
794
- `[class*="react-select__control"]`,
795
- `[class*="select__control"]`,
796
- `[class*="Select__control"]`,
797
- ];
798
- for (const ctrlSel of reactControlSelectors) {
799
- try {
800
- // Find a React-select control near the label text
801
- // Find label element, then look for the control near it via DOM traversal
802
- const labelEl = page.getByText(labelRe, { exact: false }).first();
803
- if (!await labelEl.isVisible({ timeout: 500 }).catch(() => false))
804
- continue;
805
- // Try sibling/parent-scoped control
806
- const ctrl = page.locator(ctrlSel);
807
- if (await ctrl.count() === 0)
808
- continue;
809
- await ctrl.first().click({ timeout: 2000 });
810
- // The hidden input inside the control is now active
811
- const innerInput = page.locator(`[class*="react-select__input"] input, [class*="select__input"] input`).first();
812
- await innerInput.type(value, { delay: 60 });
813
- await page.waitForTimeout(500);
911
+ // Strategy 1: React-select — find the control that BELONGS to this label via DOM traversal
912
+ // so we don't accidentally open the wrong dropdown on a page with multiple selects
913
+ try {
914
+ const opened = await page.evaluate((lbl) => {
915
+ const allEls = Array.from(document.querySelectorAll('label, legend, [class*="label"]'));
916
+ const labelEl = allEls.find(el => (el.textContent || '').trim().toLowerCase().includes(lbl.toLowerCase()));
917
+ if (!labelEl)
918
+ return false;
919
+ // Walk up ancestors looking for a container that holds a react-select control
920
+ let ancestor = labelEl.parentElement;
921
+ for (let i = 0; i < 6 && ancestor; i++) {
922
+ const ctrl = ancestor.querySelector('[class*="react-select__control"],[class*="select__control"],[class*="Select__control"]');
923
+ if (ctrl) {
924
+ ctrl.click();
925
+ return true;
926
+ }
927
+ ancestor = ancestor.parentElement;
928
+ }
929
+ // Try label[for] wrapper containing a react-select
930
+ if (labelEl.tagName === 'LABEL') {
931
+ const forId = labelEl.htmlFor;
932
+ const wrapper = forId ? document.getElementById(forId) : null;
933
+ const ctrl = wrapper?.querySelector('[class*="control"]');
934
+ if (ctrl) {
935
+ ctrl.click();
936
+ return true;
937
+ }
938
+ }
939
+ return false;
940
+ }, fieldLabel);
941
+ if (opened) {
942
+ await page.waitForTimeout(300);
943
+ // Type into the now-visible input inside the react-select
944
+ const innerInput = page.locator('[class*="react-select__input"] input,[class*="select__input"] input').first();
945
+ try {
946
+ await innerInput.waitFor({ state: 'visible', timeout: 800 });
947
+ await innerInput.type(value, { delay: 60 });
948
+ }
949
+ catch {
950
+ await page.keyboard.type(value, { delay: 60 });
951
+ }
952
+ await page.waitForTimeout(600);
814
953
  if (await clickOpenOption())
815
954
  return true;
816
- // If specific option not found, press Enter
817
- await page.keyboard.press('Enter');
818
- await page.waitForTimeout(200);
819
- return true;
955
+ // No confirmed click don't claim success, fall through to next strategy
820
956
  }
821
- catch { /* try next */ }
822
957
  }
823
- // Strategy 2: combobox / input by label or placeholder — type and pick option
958
+ catch { /* DOM eval failed, try next */ }
959
+ // Strategy 2: combobox / text input by label or placeholder — type + pick option
960
+ const esc = (s) => s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
961
+ const labelRe = new RegExp(esc(fieldLabel), 'i');
824
962
  const inputLocators = [
825
963
  page.getByRole('combobox', { name: labelRe }),
826
964
  page.getByLabel(labelRe),
@@ -829,8 +967,8 @@ async function tryAutocomplete(page, fieldLabel, value) {
829
967
  for (const loc of inputLocators) {
830
968
  try {
831
969
  const input = loc.first();
970
+ // Skip wrapper divs — getByLabel can return the React-select container div
832
971
  const tag = await input.evaluate((el) => el.tagName.toLowerCase()).catch(() => '');
833
- // Skip non-input elements (e.g. wrapper divs returned by getByLabel for React-select)
834
972
  if (tag && !['input', 'textarea'].includes(tag))
835
973
  continue;
836
974
  await input.waitFor({ state: 'visible', timeout: 1500 });
@@ -840,9 +978,7 @@ async function tryAutocomplete(page, fieldLabel, value) {
840
978
  await page.waitForTimeout(500);
841
979
  if (await clickOpenOption())
842
980
  return true;
843
- await page.keyboard.press('Enter');
844
- await page.waitForTimeout(200);
845
- return true;
981
+ // Only count success when the option was actually clicked
846
982
  }
847
983
  catch { /* try next */ }
848
984
  }
@@ -995,7 +1131,7 @@ async function tryClickScoped(page, nameRe, target, scope) {
995
1131
  return false;
996
1132
  }
997
1133
  async function tryFill(page, label, value) {
998
- const FAST = 800;
1134
+ const FAST = 500;
999
1135
  const labelRe = new RegExp(escapeRegex(label), "i");
1000
1136
  const variants = labelVariants(label);
1001
1137
  const attrContains = (attr) => variants
@@ -1047,14 +1183,6 @@ async function tryFill(page, label, value) {
1047
1183
  errors.push(e?.message?.split("\n")[0] || String(e));
1048
1184
  }
1049
1185
  }
1050
- // Autocomplete fallback — type + wait for dropdown + click option
1051
- const acSuccess = await tryAutocomplete(page, label, value);
1052
- if (acSuccess)
1053
- return;
1054
- // AI vision fallback
1055
- const aiSuccess = await aiFillFallback(page, label, value);
1056
- if (aiSuccess)
1057
- return;
1058
1186
  throw new Error(`Could not find input field: "${label}". Tried ${strategies.length} strategies.`);
1059
1187
  }
1060
1188
  /** Token-aware variant generation matching executor.ts/labelVariants. */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cutleryapp/agent",
3
- "version": "1.0.36",
3
+ "version": "1.0.38",
4
4
  "description": "Local agent that connects your machine to the Cutlery QA platform and runs UI tests via Playwright",
5
5
  "main": "dist/cli.js",
6
6
  "bin": {