@matware/e2e-runner 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/.claude-plugin/marketplace.json +37 -6
  2. package/.claude-plugin/plugin.json +17 -3
  3. package/LICENSE +190 -0
  4. package/README.md +151 -527
  5. package/agents/test-creator.md +4 -2
  6. package/agents/test-improver.md +5 -3
  7. package/bin/cli.js +84 -20
  8. package/commands/capture.md +45 -0
  9. package/package.json +3 -2
  10. package/skills/e2e-testing/SKILL.md +3 -2
  11. package/skills/e2e-testing/references/action-types.md +22 -4
  12. package/skills/e2e-testing/references/test-json-format.md +23 -0
  13. package/src/actions.js +321 -14
  14. package/src/ai-generate.js +81 -0
  15. package/src/app-pool.js +339 -0
  16. package/src/config.js +131 -7
  17. package/src/dashboard.js +209 -11
  18. package/src/db.js +74 -7
  19. package/src/index.js +6 -4
  20. package/src/learner-sqlite.js +154 -0
  21. package/src/learner.js +70 -3
  22. package/src/mcp-tools.js +259 -34
  23. package/src/module-analysis.js +247 -0
  24. package/src/module-resolver.js +35 -2
  25. package/src/narrate.js +42 -1
  26. package/src/pool-manager.js +68 -17
  27. package/src/pool.js +464 -37
  28. package/src/reporter.js +4 -1
  29. package/src/runner.js +410 -63
  30. package/src/visual-diff.js +515 -0
  31. package/src/websocket.js +14 -3
  32. package/src/wizard.js +184 -0
  33. package/templates/build-dashboard.js +3 -0
  34. package/templates/dashboard/js/api.js +62 -3
  35. package/templates/dashboard/js/init.js +46 -0
  36. package/templates/dashboard/js/keyboard.js +8 -7
  37. package/templates/dashboard/js/quicksearch.js +277 -0
  38. package/templates/dashboard/js/state.js +61 -7
  39. package/templates/dashboard/js/toast.js +1 -1
  40. package/templates/dashboard/js/utils.js +20 -0
  41. package/templates/dashboard/js/view-live.js +240 -9
  42. package/templates/dashboard/js/view-runs.js +540 -94
  43. package/templates/dashboard/js/view-tests.js +157 -16
  44. package/templates/dashboard/js/view-tools.js +234 -0
  45. package/templates/dashboard/js/view-watch.js +2 -2
  46. package/templates/dashboard/js/websocket.js +36 -0
  47. package/templates/dashboard/styles/base.css +489 -53
  48. package/templates/dashboard/styles/components.css +719 -77
  49. package/templates/dashboard/styles/view-live.css +463 -59
  50. package/templates/dashboard/styles/view-runs.css +793 -155
  51. package/templates/dashboard/styles/view-tests.css +440 -77
  52. package/templates/dashboard/styles/view-tools.css +206 -0
  53. package/templates/dashboard/styles/view-watch.css +198 -41
  54. package/templates/dashboard/template.html +369 -56
  55. package/templates/dashboard.html +5375 -901
  56. package/templates/docker-compose-lightpanda.yml +7 -0
package/src/actions.js CHANGED
@@ -8,6 +8,50 @@
8
8
  */
9
9
 
10
10
  import path from 'path';
11
+ import fs from 'fs';
12
+ import { assertVisualMatch } from './visual-diff.js';
13
+
14
+ /**
15
+ * Returns false when the page has nothing useful to capture — used to
16
+ * skip screenshots that would otherwise be saved as pure-color PNGs
17
+ * (about:blank, fresh tab before navigation, DOM-only drivers that
18
+ * never paint, etc). Fails open: on any evaluation error we assume
19
+ * there *is* content so we don't lose legitimate captures.
20
+ */
21
+ export async function pageHasRenderableContent(page) {
22
+ try {
23
+ const url = page.url();
24
+ if (!url || url === 'about:blank' || url === 'about:srcdoc') return false;
25
+ return await page
26
+ .evaluate(() => {
27
+ if (!document.body) return false;
28
+ if (document.body.children.length > 0) return true;
29
+ return (document.body.innerText || '').trim().length > 0;
30
+ })
31
+ .catch(() => true);
32
+ } catch {
33
+ return true;
34
+ }
35
+ }
36
+
37
+ /**
38
+ * Heuristic post-capture guard: PNGs compressed under this size at
39
+ * typical viewport resolutions are almost certainly near-uniform color
40
+ * (about:blank, default Chrome BG, broken render). Catches cases the
41
+ * pre-capture DOM check can't — e.g. browserless rendering example.com
42
+ * to a 99%-gray frame even though navigation succeeded.
43
+ *
44
+ * 20 KB sits cleanly between the observed blank cluster (5 KB – 18 KB)
45
+ * and the smallest real captures in this project (~23 KB+).
46
+ */
47
+ export const BLANK_PNG_BYTE_THRESHOLD = 20000;
48
+ export const BLANK_JPEG_BYTE_THRESHOLD = 8000;
49
+
50
+ export function looksLikeBlankCapture(buf, format = 'png') {
51
+ if (!Buffer.isBuffer(buf)) return false;
52
+ const threshold = format === 'jpeg' ? BLANK_JPEG_BYTE_THRESHOLD : BLANK_PNG_BYTE_THRESHOLD;
53
+ return buf.length < threshold;
54
+ }
11
55
 
12
56
  /** All recognized action types — single source of truth for validation. */
13
57
  export const KNOWN_ACTION_TYPES = new Set([
@@ -18,10 +62,12 @@ export const KNOWN_ACTION_TYPES = new Set([
18
62
  'assert_no_network_errors', 'assert_storage',
19
63
  'get_text', 'select', 'clear', 'clear_cookies', 'press', 'scroll', 'hover',
20
64
  'navigate', 'evaluate',
21
- 'type_react', 'click_regex', 'click_option', 'focus_autocomplete', 'click_chip',
65
+ 'type_react', 'click_regex', 'click_option', 'select_combobox', 'focus_autocomplete', 'click_chip',
22
66
  'set_storage', 'click_icon', 'click_menu_item', 'click_in_context',
23
67
  'assert_text_in', 'assert_no_text',
24
68
  'gql', 'wait_network_idle',
69
+ 'open_tab', 'switch_tab', 'close_tab', 'assert_tab_count', 'wait_for_tab',
70
+ 'assert_visual',
25
71
  ]);
26
72
 
27
73
  function sleep(ms) {
@@ -46,16 +92,35 @@ export async function executeAction(page, action, config) {
46
92
  await page.click(selector);
47
93
  } else if (text) {
48
94
  const clickTextSelector = 'button, a, [role="button"], [role="tab"], [role="menuitem"], [role="option"], [role="listitem"], div[class*="cursor"], span, li, td, th, label, p, h1, h2, h3, h4, h5, h6, dd, dt';
95
+ // Optional refinements (backward-compatible — defaults match old behavior):
96
+ // scope: "dialog" → only match inside an open [role=dialog]/MuiDialog
97
+ // visible: true → skip hidden/zero-size matches (implied by scope:dialog)
98
+ // last: true → click the LAST match instead of the first
99
+ const scopeSel = action.scope === 'dialog' ? '[role="dialog"], .MuiDialog-root' : null;
100
+ const wantVisible = action.visible === true || action.scope === 'dialog';
101
+ const wantLast = action.last === true;
49
102
  await page.waitForFunction(
50
- (t, sel) => [...document.querySelectorAll(sel)]
51
- .find(el => el.textContent.includes(t)),
103
+ (t, sel, scope, vis) => {
104
+ const roots = scope ? [...document.querySelectorAll(scope)] : [document];
105
+ const isVis = el => { if (!vis) return true; const r = el.getBoundingClientRect(); const s = getComputedStyle(el); return r.width > 0 && r.height > 0 && s.display !== 'none' && s.visibility !== 'hidden'; };
106
+ for (const root of roots) {
107
+ if ([...root.querySelectorAll(sel)].some(el => el.textContent.includes(t) && isVis(el))) return true;
108
+ }
109
+ return false;
110
+ },
52
111
  { timeout },
53
- text, clickTextSelector
112
+ text, clickTextSelector, scopeSel, wantVisible
54
113
  );
55
- await page.$$eval(clickTextSelector, (els, t) => {
56
- const el = els.find(e => e.textContent.includes(t));
57
- if (el) el.click();
58
- }, text);
114
+ const clicked = await page.evaluate((t, sel, scope, vis, last) => {
115
+ const roots = scope ? [...document.querySelectorAll(scope)] : [document];
116
+ const isVis = el => { if (!vis) return true; const r = el.getBoundingClientRect(); const s = getComputedStyle(el); return r.width > 0 && r.height > 0 && s.display !== 'none' && s.visibility !== 'hidden'; };
117
+ const matches = [];
118
+ for (const root of roots) matches.push(...[...root.querySelectorAll(sel)].filter(el => el.textContent.includes(t) && isVis(el)));
119
+ const el = last ? matches[matches.length - 1] : matches[0];
120
+ if (el) { el.click(); return true; }
121
+ return false;
122
+ }, text, clickTextSelector, scopeSel, wantVisible, wantLast);
123
+ if (!clicked) throw new Error(`click failed: no element containing "${text}"${scopeSel ? ' in an open dialog' : ''} found`);
59
124
  }
60
125
  break;
61
126
 
@@ -67,8 +132,34 @@ export async function executeAction(page, action, config) {
67
132
  await page.type(selector, value, { delay: 20 });
68
133
  break;
69
134
 
70
- case 'wait':
71
- if (selector) {
135
+ case 'wait': {
136
+ // Condition waits (preferred over fixed sleeps):
137
+ // { selector } → wait until it appears
138
+ // { text } → wait until text appears in the page
139
+ // { gone: "<css>" } → wait until that selector disappears/hides (e.g. spinner)
140
+ // { gone: true, selector }→ same, selector form
141
+ // { gone: true, text } → wait until text disappears
142
+ // { value: "<ms>" } → fixed sleep (last resort)
143
+ const goneSel = typeof action.gone === 'string' ? action.gone : (action.gone === true ? selector : null);
144
+ const goneTxt = action.gone === true && !selector ? text : null;
145
+ if (goneSel) {
146
+ try {
147
+ await page.waitForFunction((sel) => {
148
+ const el = document.querySelector(sel);
149
+ if (!el) return true;
150
+ const r = el.getBoundingClientRect(); const s = getComputedStyle(el);
151
+ return (r.width === 0 && r.height === 0) || s.display === 'none' || s.visibility === 'hidden' || s.opacity === '0';
152
+ }, { timeout }, goneSel);
153
+ } catch (e) {
154
+ throw new Error(`wait failed: "${goneSel}" still present/visible after ${timeout}ms`);
155
+ }
156
+ } else if (goneTxt) {
157
+ try {
158
+ await page.waitForFunction((t) => !document.body.innerText.includes(t), { timeout }, goneTxt);
159
+ } catch (e) {
160
+ throw new Error(`wait failed: text "${goneTxt}" still present after ${timeout}ms`);
161
+ }
162
+ } else if (selector) {
72
163
  try {
73
164
  await page.waitForSelector(selector, { timeout });
74
165
  } catch (e) {
@@ -88,6 +179,7 @@ export async function executeAction(page, action, config) {
88
179
  await sleep(parseInt(value));
89
180
  }
90
181
  break;
182
+ }
91
183
 
92
184
  case 'screenshot': {
93
185
  let filename = value || `screenshot-${Date.now()}.png`;
@@ -104,7 +196,20 @@ export async function executeAction(page, action, config) {
104
196
  filename = `${base}-${Date.now()}${ext}`;
105
197
  }
106
198
  const filepath = path.join(screenshotsDir, filename);
107
- await page.screenshot({ path: filepath, fullPage: action.fullPage || false });
199
+ // Skip capture when page is at about:blank or DOM is empty — these
200
+ // produce uniform-color PNGs that pollute screenshotsDir with no
201
+ // diagnostic value.
202
+ if (!(await pageHasRenderableContent(page))) {
203
+ return { screenshot: null, skipped: 'blank-page' };
204
+ }
205
+ // Capture to buffer first so we can post-filter near-uniform frames
206
+ // (e.g. browserless returning a 99%-gray render). Only persist if
207
+ // the encoded PNG carries enough entropy to be informative.
208
+ const ssBuf = await page.screenshot({ fullPage: action.fullPage || false });
209
+ if (looksLikeBlankCapture(ssBuf, 'png')) {
210
+ return { screenshot: null, skipped: 'blank-render', bytes: ssBuf.length };
211
+ }
212
+ fs.writeFileSync(filepath, ssBuf);
108
213
  return { screenshot: filepath };
109
214
  }
110
215
 
@@ -352,8 +457,11 @@ export async function executeAction(page, action, config) {
352
457
  case 'type_react': {
353
458
  // Types into React controlled inputs using the native value setter.
354
459
  // This bypasses React's synthetic event system which ignores programmatic .value changes.
460
+ // Optional: blur (commit on blur for fields that validate then),
461
+ // waitAfter (ms to wait after — e.g. for debounced autocomplete dropdowns).
355
462
  await page.waitForSelector(selector, { timeout });
356
- await page.evaluate((sel, val) => {
463
+ const trBlur = action.blur === true;
464
+ await page.evaluate((sel, val, doBlur) => {
357
465
  const input = document.querySelector(sel);
358
466
  if (!input) throw new Error(`type_react: element "${sel}" not found`);
359
467
  const proto = input instanceof HTMLTextAreaElement
@@ -363,11 +471,13 @@ export async function executeAction(page, action, config) {
363
471
  if (!descriptor || !descriptor.set) {
364
472
  throw new Error(`type_react: element "${sel}" has no writable value property`);
365
473
  }
474
+ input.focus();
366
475
  descriptor.set.call(input, val);
367
476
  input.dispatchEvent(new Event('input', { bubbles: true }));
368
477
  input.dispatchEvent(new Event('change', { bubbles: true }));
369
- input.focus();
370
- }, selector, value);
478
+ if (doBlur) input.blur();
479
+ }, selector, value, trBlur);
480
+ if (action.waitAfter) await sleep(parseInt(action.waitAfter));
371
481
  break;
372
482
  }
373
483
 
@@ -414,6 +524,56 @@ export async function executeAction(page, action, config) {
414
524
  break;
415
525
  }
416
526
 
527
+ case 'select_combobox': {
528
+ // Open a MUI Autocomplete / Select, optionally type to filter, then click the
529
+ // option matching `text` (case-insensitive substring). Falls back across
530
+ // [role=option], MuiAutocomplete-option and MuiMenuItem so it works for both
531
+ // Autocomplete listboxes and Select dropdowns.
532
+ // selector: combobox input (default input[role='combobox'])
533
+ // text: option to pick (required)
534
+ // filter: text typed into the input before picking (optional)
535
+ // openWait/filterWait: ms tuning for async/debounced option loaders
536
+ const cbInput = selector || "input[role='combobox']";
537
+ const cbOption = text || action.option;
538
+ if (!cbOption) throw new Error("select_combobox requires 'text' (option to pick)");
539
+ const cbFilter = action.filter || '';
540
+ const cbOpenWait = action.openWait ? parseInt(action.openWait) : 400;
541
+ const cbFilterWait = action.filterWait ? parseInt(action.filterWait) : 600;
542
+ await page.waitForSelector(cbInput, { timeout });
543
+ await page.evaluate((sel, flt) => {
544
+ const input = document.querySelector(sel);
545
+ if (!input) throw new Error(`select_combobox: input "${sel}" not found`);
546
+ input.focus();
547
+ if (typeof input.click === 'function') input.click();
548
+ if (flt) {
549
+ const proto = input instanceof HTMLTextAreaElement ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
550
+ const setter = Object.getOwnPropertyDescriptor(proto, 'value').set;
551
+ setter.call(input, flt);
552
+ input.dispatchEvent(new Event('input', { bubbles: true }));
553
+ input.dispatchEvent(new Event('change', { bubbles: true }));
554
+ }
555
+ }, cbInput, cbFilter);
556
+ await sleep(cbFilter ? cbFilterWait : cbOpenWait);
557
+ const cbOptionSel = '[role="option"], .MuiAutocomplete-option, li.MuiMenuItem-root, .MuiList-root li';
558
+ try {
559
+ await page.waitForFunction(
560
+ (sels, t) => [...document.querySelectorAll(sels)].some(o => (o.textContent || '').toLowerCase().includes(t.toLowerCase())),
561
+ { timeout }, cbOptionSel, cbOption
562
+ );
563
+ } catch (e) {
564
+ throw new Error(`select_combobox: no option matching "${cbOption}" appeared (filter="${cbFilter}")`);
565
+ }
566
+ const cbPicked = await page.evaluate((sels, t) => {
567
+ const c = [...document.querySelectorAll(sels)];
568
+ const m = c.find(o => (o.textContent || '').toLowerCase().includes(t.toLowerCase()));
569
+ if (m) { m.click(); return (m.textContent || '').trim().slice(0, 80); }
570
+ return null;
571
+ }, cbOptionSel, cbOption);
572
+ if (cbPicked === null) throw new Error(`select_combobox: option "${cbOption}" vanished before click`);
573
+ if (action.waitAfter) await sleep(parseInt(action.waitAfter));
574
+ break;
575
+ }
576
+
417
577
  case 'focus_autocomplete': {
418
578
  // Focus an autocomplete/combobox input by its label text.
419
579
  // Supports MUI Autocomplete (.MuiAutocomplete-root) and generic [role="combobox"].
@@ -752,6 +912,153 @@ export async function executeAction(page, action, config) {
752
912
  break;
753
913
  }
754
914
 
915
+ // ── Visual regression ───────────────────────────────────────────────────
916
+
917
+ case 'assert_visual': {
918
+ // Compares a live screenshot against a golden reference image.
919
+ //
920
+ // value: golden image filename (relative to screenshotsDir or goldenDir) — required
921
+ // selector: optional CSS selector — screenshot only that element instead of full page
922
+ // text: optional max diff percentage as string, e.g. "0.02" (default: config.verificationThreshold or 0.02)
923
+ //
924
+ // Additional fields via action object:
925
+ // fullPage: boolean (default: true)
926
+ // maskRegions: [{ x, y, width, height }] — regions to ignore (timestamps, avatars, etc.)
927
+ // threshold: number — pixel color sensitivity 0-1 (default: 0.1)
928
+ //
929
+ // Returns: { diffPercentage, differentPixels, totalPixels, diffImagePath, baselinePath, currentPath }
930
+
931
+ if (!value) throw new Error('assert_visual requires "value" (golden image filename)');
932
+
933
+ // Resolve golden image path
934
+ const goldenDir = config.goldenDir || path.join(config.screenshotsDir, 'golden');
935
+ const goldenPath = path.isAbsolute(value) ? value : path.join(goldenDir, value);
936
+
937
+ if (!fs.existsSync(goldenPath)) {
938
+ // First run: save current screenshot as the golden reference
939
+ if (!fs.existsSync(goldenDir)) fs.mkdirSync(goldenDir, { recursive: true });
940
+ const screenshotOpts = { path: goldenPath, fullPage: action.fullPage !== false };
941
+ if (selector) {
942
+ const el = await page.$(selector);
943
+ if (!el) throw new Error(`assert_visual: selector "${selector}" not found`);
944
+ await el.screenshot(screenshotOpts);
945
+ } else {
946
+ await page.screenshot(screenshotOpts);
947
+ }
948
+ return {
949
+ goldenCreated: true,
950
+ goldenPath,
951
+ message: `Golden image saved: ${path.basename(goldenPath)}. Re-run to compare.`,
952
+ };
953
+ }
954
+
955
+ // Capture current screenshot
956
+ const safeName = path.basename(value, path.extname(value));
957
+ const currentPath = path.join(screenshotsDir, `current-${safeName}-${Date.now()}.png`);
958
+ const screenshotOpts = { path: currentPath, fullPage: action.fullPage !== false };
959
+ if (selector) {
960
+ const el = await page.$(selector);
961
+ if (!el) throw new Error(`assert_visual: selector "${selector}" not found`);
962
+ await el.screenshot(screenshotOpts);
963
+ } else {
964
+ await page.screenshot(screenshotOpts);
965
+ }
966
+
967
+ // Compare
968
+ const maxDiff = text ? parseFloat(text) : (config.verificationThreshold || 0.02);
969
+ const diffPath = path.join(screenshotsDir, `diff-${safeName}-${Date.now()}.png`);
970
+ const compareResult = assertVisualMatch(goldenPath, currentPath, maxDiff, {
971
+ threshold: action.threshold || 0.1,
972
+ maskRegions: action.maskRegions || [],
973
+ diffOutputPath: diffPath,
974
+ includeAntiAlias: action.includeAntiAlias || false,
975
+ });
976
+
977
+ if (!compareResult.passed) {
978
+ const pct = (compareResult.diffPercentage * 100).toFixed(2);
979
+ const maxPct = (maxDiff * 100).toFixed(2);
980
+ throw new Error(
981
+ `assert_visual failed: ${pct}% pixels differ (threshold: ${maxPct}%). ` +
982
+ `${compareResult.differentPixels}/${compareResult.totalPixels} pixels changed. ` +
983
+ `Diff: ${path.basename(diffPath)}`
984
+ );
985
+ }
986
+
987
+ return {
988
+ diffPercentage: compareResult.diffPercentage,
989
+ differentPixels: compareResult.differentPixels,
990
+ totalPixels: compareResult.totalPixels,
991
+ diffImagePath: compareResult.diffImagePath,
992
+ baselinePath: goldenPath,
993
+ currentPath,
994
+ screenshot: diffPath,
995
+ };
996
+ }
997
+
998
+ // ── Multi-tab actions ─────────────────────────────────────────────────────
999
+ // These actions are intercepted by the runner (runTest) which manages the
1000
+ // tab registry and swaps the active page. The actual tab lifecycle happens
1001
+ // in runner.js — these cases handle the in-page parts only.
1002
+
1003
+ case 'open_tab': {
1004
+ // Opens a new tab and navigates to the given URL.
1005
+ // value: URL (absolute or relative to baseUrl) — required
1006
+ // text: optional label for the tab (used by switch_tab)
1007
+ // The runner intercepts this to create the page and register it.
1008
+ // If we reach here, it means the runner already created the page and
1009
+ // we just need to navigate.
1010
+ const tabUrl = value.startsWith('http') ? value : `${baseUrl}${value}`;
1011
+ await page.goto(tabUrl, { waitUntil: 'domcontentloaded', timeout: 60000 });
1012
+ break;
1013
+ }
1014
+
1015
+ case 'switch_tab': {
1016
+ // Switches to another open tab. The runner handles the actual page swap.
1017
+ // This case is a no-op — the runner already switched the page reference.
1018
+ break;
1019
+ }
1020
+
1021
+ case 'close_tab': {
1022
+ // Closes the current tab. The runner handles page cleanup and switching.
1023
+ // This case is a no-op — the runner closes the page and swaps back.
1024
+ break;
1025
+ }
1026
+
1027
+ case 'assert_tab_count': {
1028
+ // Asserts the number of open tabs.
1029
+ // value: expected count (number or operator expression like ">=2")
1030
+ // The runner injects __tabCount into the action result before we get here.
1031
+ // If we reach here directly, we use browser context pages.
1032
+ const tabCount = action.__tabCount;
1033
+ if (tabCount === undefined) {
1034
+ throw new Error('assert_tab_count: tab count not available (action must be run via runner)');
1035
+ }
1036
+ const opMatch = value.match(/^(>=|<=|>|<)\s*(\d+)$/);
1037
+ if (opMatch) {
1038
+ const [, op, numStr] = opMatch;
1039
+ const expected = parseInt(numStr);
1040
+ const passed = op === '>' ? tabCount > expected
1041
+ : op === '>=' ? tabCount >= expected
1042
+ : op === '<' ? tabCount < expected
1043
+ : tabCount <= expected;
1044
+ if (!passed) {
1045
+ throw new Error(`assert_tab_count failed: ${tabCount} tabs open, expected ${op}${expected}`);
1046
+ }
1047
+ } else {
1048
+ const expected = parseInt(value);
1049
+ if (tabCount !== expected) {
1050
+ throw new Error(`assert_tab_count failed: ${tabCount} tabs open, expected ${expected}`);
1051
+ }
1052
+ }
1053
+ break;
1054
+ }
1055
+
1056
+ case 'wait_for_tab': {
1057
+ // Waits for a new tab/popup to appear. The runner handles this.
1058
+ // This case is a no-op — the runner already waited and registered the new tab.
1059
+ break;
1060
+ }
1061
+
755
1062
  default:
756
1063
  throw new Error(`Unknown action type: "${type}"`);
757
1064
  }
@@ -87,6 +87,16 @@ Smart interaction actions:
87
87
  - click_menu_item: click a menu item by text. Searches [role="menuitem"], .dropdown-item, .menu-item, [class*="MenuItem"]. Optional "selector" scopes the search
88
88
  - click_in_context: click a child element within a container identified by text. "text" finds the container, "selector" is the child to click. Picks the smallest matching container
89
89
 
90
+ Visual regression:
91
+ - assert_visual: compare current page against a golden reference screenshot. "value" is the golden filename (e.g. "login-page.png"). First run auto-saves the golden. "text" is optional max diff percentage (default "0.02" = 2%). "selector" captures only that element. "maskRegions" ignores dynamic areas: [{ "x": 10, "y": 5, "width": 200, "height": 30 }]. Example: { "type": "assert_visual", "value": "dashboard.png", "text": "0.05" }
92
+
93
+ Multi-tab actions (for OAuth, popups, admin+user flows):
94
+ - open_tab: open a new tab with URL in "value". Optional "text" assigns a label for switch_tab. Example: { "type": "open_tab", "value": "/admin", "text": "admin" }
95
+ - switch_tab: switch to a tab by label, title regex, URL substring, or index. Example: { "type": "switch_tab", "value": "admin" }
96
+ - close_tab: close current tab or a named tab ("value" = label). Automatically switches to previous tab. Example: { "type": "close_tab", "value": "admin" }
97
+ - wait_for_tab: wait for a popup/new tab opened by the page (window.open, target=_blank). Optional "text" labels it. Example: { "type": "wait_for_tab", "text": "oauth" }
98
+ - assert_tab_count: verify number of open tabs. "value" is count or operator. Example: { "type": "assert_tab_count", "value": "2" }
99
+
90
100
  Assertion action reference:
91
101
  - assert_text: checks if text appears anywhere in the page body
92
102
  - assert_element_text: checks textContent of a specific element (use "value": "exact" for strict match)
@@ -239,6 +249,77 @@ Existing suites: ${existingSuites.join(', ') || 'none'}`;
239
249
  };
240
250
  }
241
251
 
252
+ /**
253
+ * Generates a hindsight hint for a failed test result.
254
+ * Sends the error + action context to Claude API and returns a concrete fix suggestion.
255
+ * Returns null if API key is unavailable or on any error.
256
+ */
257
+ export async function generateHindsightHint(failedResult, config = {}) {
258
+ const apiKey = config.anthropicApiKey || process.env.ANTHROPIC_API_KEY;
259
+ if (!apiKey) return null;
260
+
261
+ const model = config.hintsModel || config.anthropicModel || 'claude-sonnet-4-5-20250929';
262
+ const lastActions = (failedResult.actions || []).slice(-8);
263
+ const failedAction = lastActions.find(a => a.success === false);
264
+
265
+ const consoleErrors = (failedResult.consoleLogs || [])
266
+ .filter(l => l.type === 'error')
267
+ .slice(-5)
268
+ .map(l => l.text);
269
+
270
+ const networkErrors = (failedResult.networkErrors || [])
271
+ .slice(-5)
272
+ .map(e => `${e.url} (${e.error})`);
273
+
274
+ const prompt = `Analyze this failed E2E test and suggest a concrete fix.
275
+
276
+ TEST: "${failedResult.name}"
277
+ ERROR: ${failedResult.error}
278
+
279
+ LAST ACTIONS:
280
+ ${lastActions.map((a, i) => ` ${i + 1}. ${a.type}${a.selector ? ' selector=' + a.selector : ''}${a.text ? ' text=' + a.text : ''}${a.value ? ' value=' + (a.value.length > 80 ? a.value.slice(0, 80) + '...' : a.value) : ''} → ${a.success === false ? 'FAILED: ' + a.error : 'OK'} (${a.duration}ms)`).join('\n')}
281
+
282
+ ${failedAction ? `FAILED ACTION: ${JSON.stringify({ type: failedAction.type, selector: failedAction.selector, text: failedAction.text, value: failedAction.value?.slice?.(0, 200) })}` : ''}
283
+ ${consoleErrors.length ? `CONSOLE ERRORS:\n${consoleErrors.join('\n')}` : ''}
284
+ ${networkErrors.length ? `NETWORK ERRORS:\n${networkErrors.join('\n')}` : ''}
285
+
286
+ Respond with ONLY a JSON object: { "suggestion": "concrete fix description", "confidence": "high"|"medium"|"low", "fixType": "selector"|"wait"|"timeout"|"logic"|"infra"|"data" }`;
287
+
288
+ try {
289
+ const controller = new AbortController();
290
+ const timeout = setTimeout(() => controller.abort(), 15000);
291
+
292
+ const response = await fetch('https://api.anthropic.com/v1/messages', {
293
+ method: 'POST',
294
+ headers: {
295
+ 'Content-Type': 'application/json',
296
+ 'x-api-key': apiKey,
297
+ 'anthropic-version': '2023-06-01',
298
+ },
299
+ body: JSON.stringify({
300
+ model,
301
+ max_tokens: 1024,
302
+ system: 'You are an E2E test debugging expert. Given a failed test, suggest the most likely fix. Be specific: name exact selectors, wait times, or code changes. Keep suggestions under 100 words.',
303
+ messages: [{ role: 'user', content: prompt }],
304
+ }),
305
+ signal: controller.signal,
306
+ });
307
+
308
+ clearTimeout(timeout);
309
+
310
+ if (!response.ok) return null;
311
+ const result = await response.json();
312
+ const text = result.content?.[0]?.text;
313
+ if (!text) return null;
314
+
315
+ const cleaned = text.replace(/^```(?:json)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
316
+ const hint = JSON.parse(cleaned);
317
+ return { test: failedResult.name, ...hint };
318
+ } catch {
319
+ return null;
320
+ }
321
+ }
322
+
242
323
  /**
243
324
  * Checks if the Anthropic API key is available.
244
325
  * @returns {boolean}