@matware/e2e-runner 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/.claude-plugin/marketplace.json +21 -0
  2. package/.mcp.json +2 -2
  3. package/.opencode/commands/create-test.md +63 -0
  4. package/.opencode/commands/run.md +50 -0
  5. package/.opencode/commands/verify-issue.md +62 -0
  6. package/.opencode/skills/e2e-testing/SKILL.md +181 -0
  7. package/.opencode/skills/e2e-testing/references/action-types.md +143 -0
  8. package/.opencode/skills/e2e-testing/references/auth-strategies.md +91 -0
  9. package/.opencode/skills/e2e-testing/references/graphql.md +59 -0
  10. package/.opencode/skills/e2e-testing/references/issue-verification.md +59 -0
  11. package/.opencode/skills/e2e-testing/references/multi-pool.md +60 -0
  12. package/.opencode/skills/e2e-testing/references/network-debugging.md +62 -0
  13. package/.opencode/skills/e2e-testing/references/test-json-format.md +163 -0
  14. package/.opencode/skills/e2e-testing/references/troubleshooting.md +224 -0
  15. package/.opencode/skills/e2e-testing/references/variables.md +41 -0
  16. package/.opencode/skills/e2e-testing/references/visual-verification.md +89 -0
  17. package/OPENCODE.md +166 -0
  18. package/README.md +581 -55
  19. package/agents/test-creator.md +54 -1
  20. package/agents/test-improver.md +37 -0
  21. package/bin/cli.js +408 -16
  22. package/commands/create-test.md +16 -1
  23. package/opencode.json +11 -0
  24. package/package.json +7 -2
  25. package/scripts/setup-opencode.sh +113 -0
  26. package/skills/e2e-testing/SKILL.md +10 -3
  27. package/skills/e2e-testing/references/action-types.md +48 -5
  28. package/skills/e2e-testing/references/auth-strategies.md +91 -0
  29. package/skills/e2e-testing/references/graphql.md +59 -0
  30. package/skills/e2e-testing/references/issue-verification.md +59 -0
  31. package/skills/e2e-testing/references/multi-pool.md +60 -0
  32. package/skills/e2e-testing/references/network-debugging.md +62 -0
  33. package/skills/e2e-testing/references/test-json-format.md +4 -0
  34. package/skills/e2e-testing/references/troubleshooting.md +44 -2
  35. package/skills/e2e-testing/references/variables.md +41 -0
  36. package/skills/e2e-testing/references/visual-verification.md +89 -0
  37. package/src/actions.js +324 -2
  38. package/src/ai-generate.js +58 -8
  39. package/src/config.js +143 -0
  40. package/src/dashboard.js +145 -13
  41. package/src/db.js +130 -2
  42. package/src/index.js +7 -6
  43. package/src/learner-sqlite.js +304 -0
  44. package/src/learner.js +8 -3
  45. package/src/mcp-tools.js +1121 -43
  46. package/src/module-resolver.js +37 -0
  47. package/src/narrate.js +37 -0
  48. package/src/pool-manager.js +223 -0
  49. package/src/reporter.js +82 -1
  50. package/src/runner.js +157 -28
  51. package/src/sync/auth.js +354 -0
  52. package/src/sync/client.js +572 -0
  53. package/src/sync/hub-routes.js +816 -0
  54. package/src/sync/index.js +68 -0
  55. package/src/sync/middleware.js +347 -0
  56. package/src/sync/queue.js +209 -0
  57. package/src/sync/schema.js +540 -0
  58. package/src/verify.js +10 -7
  59. package/src/watch.js +384 -0
  60. package/templates/build-dashboard.js +47 -6
  61. package/templates/dashboard/js/api.js +60 -0
  62. package/templates/dashboard/js/init.js +13 -0
  63. package/templates/dashboard/js/keyboard.js +46 -0
  64. package/templates/dashboard/js/state.js +40 -0
  65. package/templates/dashboard/js/toast.js +41 -0
  66. package/templates/dashboard/js/utils.js +196 -0
  67. package/templates/dashboard/js/view-live.js +143 -0
  68. package/templates/dashboard/js/view-runs.js +572 -0
  69. package/templates/dashboard/js/view-tests.js +294 -0
  70. package/templates/dashboard/js/view-watch.js +242 -0
  71. package/templates/dashboard/js/websocket.js +110 -0
  72. package/templates/dashboard/styles/base.css +69 -0
  73. package/templates/dashboard/styles/components.css +110 -0
  74. package/templates/dashboard/styles/view-live.css +74 -0
  75. package/templates/dashboard/styles/view-runs.css +207 -0
  76. package/templates/dashboard/styles/view-tests.css +96 -0
  77. package/templates/dashboard/styles/view-watch.css +53 -0
  78. package/templates/dashboard/template.html +165 -99
  79. package/templates/dashboard.html +1596 -541
  80. package/templates/sample-test.json +0 -8
  81. package/templates/dashboard/app.js +0 -1152
  82. package/templates/dashboard/styles.css +0 -413
@@ -148,8 +148,8 @@ Or use `assert_no_network_errors` at specific points:
148
148
  Use network log drill-down:
149
149
  ```
150
150
  e2e_network_logs(runDbId, errorsOnly: true) → see all failed requests
151
- e2e_network_logs(runDbId, urlPattern: "/api/patients") → filter by URL
152
- e2e_network_logs(runDbId, testName: "create-patient", includeBodies: true) → full request/response
151
+ e2e_network_logs(runDbId, urlPattern: "/api/users") → filter by URL
152
+ e2e_network_logs(runDbId, testName: "create-user", includeBodies: true) → full request/response
153
153
  ```
154
154
 
155
155
  ## Common Mistakes
@@ -180,3 +180,45 @@ When checking paths, use path-only format (starts with `/`):
180
180
  { "type": "assert_url", "value": "/dashboard" }
181
181
  ```
182
182
  This compares against the pathname only, ignoring the `host.docker.internal` origin.
183
+
184
+ ## Action Type Pre-Validation
185
+
186
+ All action types are validated at **load time** (before any browser connections). If a test file contains an unknown action type (e.g., a typo like `"clik"`), loading throws immediately with the location:
187
+
188
+ ```
189
+ Unknown action type(s) in auth.json: "clik" in test "login-test"
190
+ ```
191
+
192
+ The `KNOWN_ACTION_TYPES` Set in `src/actions.js` is the single source of truth. Unknown actions also throw at runtime as a safety net.
193
+
194
+ ## Screenshot Hashes
195
+
196
+ Every screenshot captured during a run is assigned a short hash (`ss:a3f2b1c9`) — the first 8 hex chars of the SHA-256 of its file path. Hashes are deterministic and computed identically on the server (Node `crypto`) and in the browser (Web Crypto API).
197
+
198
+ **Flow**: screenshot saved on disk → `saveRun()` registers hash in SQLite `screenshot_hashes` table → dashboard shows `[ss:XXXXXXXX]` badge (click to copy) → user pastes hash in Claude Code → `e2e_screenshot` MCP tool looks up hash, reads file, returns the image.
199
+
200
+ - Hashes are registered inside the `saveRun()` transaction (covers action, error, verification, and baseline screenshots)
201
+ - The `ss:` prefix is optional when calling `e2e_screenshot` — stripped during lookup
202
+ - Dashboard computes hashes client-side (Web Crypto) for the Live view (before `persistRun()` writes to DB)
203
+ - Run detail API (`/api/db/runs/:id`) includes `screenshotHashes` map per test result
204
+ - Dashboard endpoint `/api/screenshot-hash/:hash` serves the image by hash
205
+ - Dashboard Screenshots view has a **search bar** — type a hash to find and display the screenshot
206
+
207
+ ## Web Dashboard
208
+
209
+ **`src/dashboard.js`** — HTTP server, REST API, WebSocket broadcast, pool polling.
210
+ **`templates/dashboard.html`** — SPA, dark theme, vanilla JS, safe DOM (textContent + createEl helper).
211
+
212
+ **Features:**
213
+ - Live test execution with WebSocket updates
214
+ - Run history with inline detail expansion
215
+ - Screenshots gallery with hash badges and hash search
216
+ - Network request logs with clickable expandable rows (full request/response detail)
217
+ - Pool status monitoring
218
+ - Multi-project support via project selector
219
+ - Variables tab with masked values, inline edit, add, and delete
220
+
221
+ **CLI:** `e2e-runner dashboard [--port 8484]`
222
+ **MCP tools:** `e2e_dashboard_start`, `e2e_dashboard_stop`
223
+
224
+ Config defaults: `dashboardPort: 8484`, `maxHistoryRuns: 100`
@@ -0,0 +1,41 @@
1
+ # Variables Reference
2
+
3
+ Variables replace hardcoded sensitive values (JWT tokens, user IDs, API keys, etc.) in test JSON. Stored in SQLite (`~/.e2e-runner/dashboard.db`), scoped per project and per suite, editable from the dashboard UI.
4
+
5
+ ## Syntax
6
+
7
+ ```
8
+ {{var.TOKEN}} → resolves from DB (suite scope → project scope)
9
+ {{env.MY_VAR}} → resolves from process.env
10
+ {{param}} → existing module param substitution (unchanged)
11
+ ```
12
+
13
+ **Resolution priority:** suite vars > project vars > error if not found.
14
+
15
+ ## Usage in Test JSON
16
+
17
+ ```json
18
+ { "$use": "auth-jwt", "params": { "token": "{{var.JWT_TOKEN}}", "orgId": "{{var.ORG_ID}}" } }
19
+ { "type": "goto", "value": "/users/{{var.USER_ID}}/profile" }
20
+ { "type": "gql", "value": "{ user(id: \"{{var.USER_ID}}\") { name } }" }
21
+ ```
22
+
23
+ ## MCP Tool (`e2e_vars`)
24
+
25
+ ```
26
+ e2e_vars({ action: "set", key: "TOKEN", value: "abc123", scope: "project" })
27
+ e2e_vars({ action: "set", key: "TOKEN", value: "xyz789", scope: "auth" }) // suite-specific override
28
+ e2e_vars({ action: "list" })
29
+ e2e_vars({ action: "get", key: "TOKEN" })
30
+ e2e_vars({ action: "delete", key: "TOKEN", scope: "project" })
31
+ ```
32
+
33
+ ## Dashboard UI
34
+
35
+ Variables tab shows all variables grouped by scope. Values are masked by default (click to reveal). Inline edit, add new, and delete are supported.
36
+
37
+ ## REST API
38
+
39
+ - `GET /api/db/projects/:id/variables` — list all vars for project
40
+ - `PUT /api/db/projects/:id/variables` — set a variable `{ scope, key, value }`
41
+ - `DELETE /api/db/projects/:id/variables/:scope/:key` — delete a variable
@@ -0,0 +1,89 @@
1
+ # Visual Verification Reference
2
+
3
+ Tests can include an `expect` field for AI-powered visual verification. No API key required — Claude Code itself does the visual judgment.
4
+
5
+ ## Expect Field Formats
6
+
7
+ ### String form — free-form description
8
+ ```json
9
+ {
10
+ "name": "dashboard-loads",
11
+ "expect": "Should show the data table with at least 3 rows, no error messages, and the sidebar with navigation links",
12
+ "actions": [
13
+ { "type": "goto", "value": "/dashboard" },
14
+ { "type": "wait", "selector": ".data-table" }
15
+ ]
16
+ }
17
+ ```
18
+
19
+ ### Array form — per-criterion checklist (each evaluated independently as PASS/FAIL)
20
+ ```json
21
+ {
22
+ "name": "dashboard-loads",
23
+ "expect": [
24
+ "Data table visible with at least 3 rows",
25
+ "No error messages or red banners",
26
+ "Sidebar shows navigation links"
27
+ ],
28
+ "actions": [
29
+ { "type": "goto", "value": "/dashboard" },
30
+ { "type": "wait", "selector": ".data-table" }
31
+ ]
32
+ }
33
+ ```
34
+
35
+ ## Double Screenshot (Before/After)
36
+
37
+ When `expect` is present, the runner captures TWO screenshots:
38
+ 1. **Baseline** (`baseline-{name}-{timestamp}.png`) — captured BEFORE test actions run (after `beforeEach` hooks)
39
+ 2. **Verification** (`verify-{name}-{timestamp}.png`) — captured AFTER all actions complete
40
+
41
+ Both hashes are registered in SQLite and returned in the MCP response for before/after comparison.
42
+
43
+ ## Verification Strictness
44
+
45
+ Controls how strictly Claude Code evaluates visual verification. Set via:
46
+ - Config: `verificationStrictness: 'moderate'`
47
+ - CLI: `--verification-strictness strict`
48
+ - Env: `VERIFICATION_STRICTNESS=strict`
49
+ - MCP: `verificationStrictness: 'strict'` in `e2e_run` args
50
+
51
+ | Level | Behavior |
52
+ |-------|----------|
53
+ | **`strict`** | No ambiguity allowed. If any criterion is unclear, not fully visible, or doubtful → FAIL. |
54
+ | **`moderate`** (default) | Reasonable judgment. Minor cosmetic differences acceptable, functional mismatches → FAIL. |
55
+ | **`lenient`** | Only fail on clear, obvious contradictions. |
56
+
57
+ ## MCP Response Format
58
+
59
+ The `e2e_run` response includes a `verifications` array:
60
+ ```json
61
+ {
62
+ "verifications": [
63
+ {
64
+ "name": "dashboard-loads",
65
+ "expect": ["Data table visible...", "No error messages..."],
66
+ "success": true,
67
+ "screenshotHash": "ss:a3f2b1c9",
68
+ "baselineScreenshotHash": "ss:b4e1c2d8",
69
+ "isChecklist": true
70
+ }
71
+ ],
72
+ "verificationInstructions": "Verification strictness: MODERATE — ..."
73
+ }
74
+ ```
75
+
76
+ ## Verdict Format
77
+
78
+ After calling `e2e_screenshot` for each hash (after + baseline), Claude Code reports a structured verdict:
79
+
80
+ ```
81
+ TEST: dashboard-loads
82
+ VERDICT: PASS
83
+ STATE CHANGE: Page loaded from blank to populated dashboard
84
+ CRITERIA:
85
+ - "Data table visible with at least 3 rows": PASS
86
+ - "No error messages or red banners": PASS
87
+ - "Sidebar shows navigation links": PASS
88
+ REASON: All criteria met, dashboard fully loaded with expected content
89
+ ```
package/src/actions.js CHANGED
@@ -8,7 +8,21 @@
8
8
  */
9
9
 
10
10
  import path from 'path';
11
- import { log } from './logger.js';
11
+
12
+ /** All recognized action types — single source of truth for validation. */
13
+ export const KNOWN_ACTION_TYPES = new Set([
14
+ 'goto', 'click', 'type', 'fill', 'wait', 'screenshot',
15
+ 'assert_text', 'assert_url', 'assert_visible', 'assert_count',
16
+ 'assert_element_text', 'assert_attribute', 'assert_class',
17
+ 'assert_not_visible', 'assert_input_value', 'assert_matches',
18
+ 'assert_no_network_errors', 'assert_storage',
19
+ 'get_text', 'select', 'clear', 'clear_cookies', 'press', 'scroll', 'hover',
20
+ 'navigate', 'evaluate',
21
+ 'type_react', 'click_regex', 'click_option', 'focus_autocomplete', 'click_chip',
22
+ 'set_storage', 'click_icon', 'click_menu_item', 'click_in_context',
23
+ 'assert_text_in', 'assert_no_text',
24
+ 'gql', 'wait_network_idle',
25
+ ]);
12
26
 
13
27
  function sleep(ms) {
14
28
  return new Promise(resolve => setTimeout(resolve, ms));
@@ -102,6 +116,16 @@ export async function executeAction(page, action, config) {
102
116
  break;
103
117
  }
104
118
 
119
+ case 'assert_no_text': {
120
+ // Assert that text does NOT appear anywhere on the page.
121
+ // text: substring to check for absence (required)
122
+ const bodyTextNo = await page.evaluate(() => document.body.innerText);
123
+ if (bodyTextNo.includes(text)) {
124
+ throw new Error(`assert_no_text failed: "${text}" was found on the page but should not be present`);
125
+ }
126
+ break;
127
+ }
128
+
105
129
  case 'assert_url': {
106
130
  const currentUrl = page.url();
107
131
  let match = false;
@@ -240,6 +264,30 @@ export async function executeAction(page, action, config) {
240
264
  break;
241
265
  }
242
266
 
267
+ case 'assert_text_in': {
268
+ // Assert that text exists inside a scoped container element.
269
+ // selector: CSS selector for the container (required)
270
+ // text: substring or regex pattern to match against container's textContent (required)
271
+ // value: "i" for case-insensitive regex (default), "exact" for case-sensitive substring
272
+ if (!selector) throw new Error('assert_text_in requires "selector"');
273
+ if (!text) throw new Error('assert_text_in requires "text"');
274
+ await page.waitForSelector(selector, { timeout });
275
+ const containerText = await page.$$eval(selector, els => els.map(el => el.textContent).join(' '));
276
+ const flags = value === 'exact' ? '' : 'i';
277
+ if (value === 'exact') {
278
+ if (!containerText.includes(text)) {
279
+ const preview = containerText.length > 200 ? containerText.slice(0, 200) + '...' : containerText;
280
+ throw new Error(`assert_text_in failed: "${text}" not found in "${selector}"\n Content: ${preview}`);
281
+ }
282
+ } else {
283
+ if (!new RegExp(text, flags).test(containerText)) {
284
+ const preview = containerText.length > 200 ? containerText.slice(0, 200) + '...' : containerText;
285
+ throw new Error(`assert_text_in failed: /${text}/${flags} not found in "${selector}"\n Content: ${preview}`);
286
+ }
287
+ }
288
+ break;
289
+ }
290
+
243
291
  case 'get_text': {
244
292
  await page.waitForSelector(selector, { timeout });
245
293
  const getText = await page.$eval(selector, el => el.textContent.trim());
@@ -409,6 +457,273 @@ export async function executeAction(page, action, config) {
409
457
  break;
410
458
  }
411
459
 
460
+ case 'set_storage': {
461
+ // Set a localStorage or sessionStorage key.
462
+ // value: "key=val", selector: "session" for sessionStorage (default: localStorage)
463
+ const eqIdx = value.indexOf('=');
464
+ if (eqIdx === -1) {
465
+ throw new Error(`set_storage: value must be "key=value", got "${value}"`);
466
+ }
467
+ const storageKey = value.slice(0, eqIdx);
468
+ const storageVal = value.slice(eqIdx + 1);
469
+ const storageType = selector === 'session' ? 'sessionStorage' : 'localStorage';
470
+ await page.evaluate((sType, k, v) => {
471
+ window[sType].setItem(k, v);
472
+ }, storageType, storageKey, storageVal);
473
+ break;
474
+ }
475
+
476
+ case 'assert_storage': {
477
+ // Assert a localStorage or sessionStorage key exists or has a specific value.
478
+ // value: "key" (existence) or "key=expected" (value match)
479
+ // selector: "session" for sessionStorage (default: localStorage)
480
+ const storageType = selector === 'session' ? 'sessionStorage' : 'localStorage';
481
+ const eqIdx = value.indexOf('=');
482
+ if (eqIdx === -1) {
483
+ // Existence check
484
+ const exists = await page.evaluate((sType, k) => window[sType].getItem(k) !== null, storageType, value);
485
+ if (!exists) {
486
+ throw new Error(`assert_storage failed: ${storageType} key "${value}" does not exist`);
487
+ }
488
+ } else {
489
+ const storageKey = value.slice(0, eqIdx);
490
+ const expectedVal = value.slice(eqIdx + 1);
491
+ const actual = await page.evaluate((sType, k) => window[sType].getItem(k), storageType, storageKey);
492
+ if (actual === null) {
493
+ throw new Error(`assert_storage failed: ${storageType} key "${storageKey}" does not exist`);
494
+ }
495
+ if (actual !== expectedVal) {
496
+ throw new Error(`assert_storage failed: ${storageType} key "${storageKey}" is "${actual}", expected "${expectedVal}"`);
497
+ }
498
+ }
499
+ break;
500
+ }
501
+
502
+ case 'click_icon': {
503
+ // Click an icon element by identifier — works with MUI, FontAwesome, Heroicons, Bootstrap Icons, etc.
504
+ // value: icon identifier (data-testid fragment, class fragment, aria-label, or SVG text/title)
505
+ // selector: optional CSS scope to narrow the search
506
+ const iconId = value;
507
+ const iconScope = selector || null;
508
+ await page.waitForFunction(
509
+ (id, scope) => {
510
+ const root = scope ? document.querySelector(scope) : document;
511
+ if (!root) return false;
512
+ // Search by common icon attribute patterns
513
+ const attrSelectors = [
514
+ `[data-testid*="${id}"]`,
515
+ `[data-icon*="${id}"]`,
516
+ `[aria-label*="${id}"]`,
517
+ `svg[class*="${id}"]`,
518
+ `i[class*="${id}"]`,
519
+ `span[class*="${id}"]`,
520
+ ];
521
+ for (const sel of attrSelectors) {
522
+ if (root.querySelector(sel)) return true;
523
+ }
524
+ // Search all SVGs for matching text content or title
525
+ for (const svg of root.querySelectorAll('svg')) {
526
+ const title = svg.querySelector('title');
527
+ if (title && title.textContent.toLowerCase().includes(id.toLowerCase())) return true;
528
+ if (svg.getAttribute('aria-label')?.toLowerCase().includes(id.toLowerCase())) return true;
529
+ }
530
+ return false;
531
+ },
532
+ { timeout },
533
+ iconId, iconScope
534
+ );
535
+ const clicked = await page.evaluate(
536
+ (id, scope) => {
537
+ const root = scope ? document.querySelector(scope) : document;
538
+ if (!root) return false;
539
+ let icon = null;
540
+ const attrSelectors = [
541
+ `[data-testid*="${id}"]`,
542
+ `[data-icon*="${id}"]`,
543
+ `[aria-label*="${id}"]`,
544
+ `svg[class*="${id}"]`,
545
+ `i[class*="${id}"]`,
546
+ `span[class*="${id}"]`,
547
+ ];
548
+ for (const sel of attrSelectors) {
549
+ icon = root.querySelector(sel);
550
+ if (icon) break;
551
+ }
552
+ // Fallback: search SVGs by title/aria-label text
553
+ if (!icon) {
554
+ for (const svg of root.querySelectorAll('svg')) {
555
+ const title = svg.querySelector('title');
556
+ if (title && title.textContent.toLowerCase().includes(id.toLowerCase())) { icon = svg; break; }
557
+ if (svg.getAttribute('aria-label')?.toLowerCase().includes(id.toLowerCase())) { icon = svg; break; }
558
+ }
559
+ }
560
+ if (!icon) return false;
561
+ // Walk up to nearest clickable ancestor
562
+ const clickableSelector = 'button, a, [role="button"], [role="tab"], [role="menuitem"]';
563
+ const clickable = icon.closest(clickableSelector);
564
+ (clickable || icon).click();
565
+ return true;
566
+ },
567
+ iconId, iconScope
568
+ );
569
+ if (!clicked) {
570
+ throw new Error(`click_icon failed: no icon matching "${iconId}" found${iconScope ? ` in "${iconScope}"` : ''}`);
571
+ }
572
+ break;
573
+ }
574
+
575
+ case 'click_menu_item': {
576
+ // Click a menu item by text content.
577
+ // text: menu item text to match (case-sensitive, substring)
578
+ // selector: optional CSS scope
579
+ const menuSelector = [
580
+ '[role="menuitem"]',
581
+ '[role="menuitemradio"]',
582
+ '[role="menuitemcheckbox"]',
583
+ '.dropdown-item',
584
+ '.menu-item',
585
+ '[class*="MenuItem"]',
586
+ '[role="menu"] > li',
587
+ ].join(', ');
588
+ const menuScope = selector || null;
589
+ await page.waitForFunction(
590
+ (t, sel, scope) => {
591
+ const root = scope ? document.querySelector(scope) : document;
592
+ if (!root) return false;
593
+ return [...root.querySelectorAll(sel)].some(el => el.textContent.includes(t));
594
+ },
595
+ { timeout },
596
+ text, menuSelector, menuScope
597
+ );
598
+ const clicked = await page.evaluate(
599
+ (t, sel, scope) => {
600
+ const root = scope ? document.querySelector(scope) : document;
601
+ if (!root) return false;
602
+ const match = [...root.querySelectorAll(sel)].find(el => el.textContent.includes(t));
603
+ if (match) { match.click(); return true; }
604
+ return false;
605
+ },
606
+ text, menuSelector, menuScope
607
+ );
608
+ if (!clicked) {
609
+ throw new Error(`click_menu_item failed: no menu item containing "${text}" found${menuScope ? ` in "${menuScope}"` : ''}`);
610
+ }
611
+ break;
612
+ }
613
+
614
+ case 'click_in_context': {
615
+ // Click a child element within a container identified by text content.
616
+ // text: text to find the container (required)
617
+ // selector: CSS selector for the child to click within that container (required)
618
+ if (!text || !selector) {
619
+ throw new Error('click_in_context requires both "text" (container text) and "selector" (child to click)');
620
+ }
621
+ const containerSelectors = [
622
+ 'section', 'article',
623
+ '[class*="card"]', '[class*="Card"]',
624
+ '[class*="panel"]', '[class*="Panel"]',
625
+ '[class*="item"]', '[class*="Item"]',
626
+ '.MuiGrid-item', '[class*="MuiGrid2"]',
627
+ '[class*="row"]', '[class*="Row"]',
628
+ 'details', 'fieldset',
629
+ '[role="region"]', '[role="group"]', '[role="listitem"]',
630
+ 'li', 'tr', 'div[class]',
631
+ ].join(', ');
632
+ await page.waitForFunction(
633
+ (t, childSel, containerSels) => {
634
+ const containers = [...document.querySelectorAll(containerSels)]
635
+ .filter(el => el.textContent.includes(t));
636
+ // Sort by innerHTML length (smallest = most specific)
637
+ containers.sort((a, b) => a.innerHTML.length - b.innerHTML.length);
638
+ for (const c of containers) {
639
+ if (c.querySelector(childSel)) return true;
640
+ }
641
+ return false;
642
+ },
643
+ { timeout },
644
+ text, selector, containerSelectors
645
+ );
646
+ const clicked = await page.evaluate(
647
+ (t, childSel, containerSels) => {
648
+ const containers = [...document.querySelectorAll(containerSels)]
649
+ .filter(el => el.textContent.includes(t));
650
+ containers.sort((a, b) => a.innerHTML.length - b.innerHTML.length);
651
+ for (const c of containers) {
652
+ const child = c.querySelector(childSel);
653
+ if (child) { child.click(); return true; }
654
+ }
655
+ return false;
656
+ },
657
+ text, selector, containerSelectors
658
+ );
659
+ if (!clicked) {
660
+ throw new Error(`click_in_context failed: no "${selector}" found in container with text "${text}"`);
661
+ }
662
+ break;
663
+ }
664
+
665
+ case 'gql': {
666
+ // Execute a GraphQL query/mutation via browser fetch.
667
+ // Reads auth token from localStorage and sends it as a configurable header.
668
+ // Installs window.__e2eGql(query, vars) helper for use in subsequent evaluate actions.
669
+ //
670
+ // value: GraphQL query/mutation string (required)
671
+ // text: variables as JSON string (optional)
672
+ // selector: JS expression assertion — receives response as `r` (optional)
673
+ const gqlEndpoint = config.gqlEndpoint || '/api/graphql';
674
+ const gqlAuthHeader = config.gqlAuthHeader || 'Authorization';
675
+ const gqlAuthKey = config.gqlAuthKey || 'accessToken';
676
+ const gqlAuthPrefix = config.gqlAuthPrefix ?? 'Bearer ';
677
+ const gqlVars = text || undefined;
678
+
679
+ const gqlResult = await page.evaluate(async (query, varsJson, endpoint, authHdr, authKey, authPfx) => {
680
+ // Install reusable helper on first call
681
+ if (!window.__e2eGql) {
682
+ window.__e2eGqlConfig = { endpoint, authHeader: authHdr, authKey, authPrefix: authPfx };
683
+ window.__e2eGql = async (q, v) => {
684
+ const cfg = window.__e2eGqlConfig;
685
+ const token = localStorage.getItem(cfg.authKey);
686
+ const headers = { 'Content-Type': 'application/json' };
687
+ if (token) headers[cfg.authHeader] = cfg.authPrefix + token;
688
+ const resp = await fetch(location.origin + cfg.endpoint, {
689
+ method: 'POST', headers,
690
+ body: JSON.stringify({ query: q, variables: v }),
691
+ });
692
+ return resp.json();
693
+ };
694
+ }
695
+
696
+ const vars = varsJson ? JSON.parse(varsJson) : undefined;
697
+ const response = await window.__e2eGql(query, vars);
698
+ window.__e2eLastGql = response;
699
+ return response;
700
+ }, value, gqlVars, gqlEndpoint, gqlAuthHeader, gqlAuthKey, gqlAuthPrefix);
701
+
702
+ // Check for GraphQL errors
703
+ if (gqlResult.errors?.length) {
704
+ throw new Error(`gql failed: ${gqlResult.errors.map(e => e.message).join('; ')}`);
705
+ }
706
+
707
+ // Optional assertion via selector field (JS expression, `r` = full response)
708
+ // Intentional: runs JS in browser page context from team-authored JSON test files,
709
+ // same security model as the 'evaluate' action type.
710
+ if (selector) {
711
+ const assertResult = await page.evaluate((code, r) => {
712
+ const fn = new Function('r', `return (${code})`); // eslint-disable-line no-new-func
713
+ return fn(r);
714
+ }, selector, gqlResult);
715
+
716
+ if (typeof assertResult === 'string' && /^(FAIL|ERROR|FAILED)[\s:]/i.test(assertResult)) {
717
+ throw new Error(`gql assertion: ${assertResult}`);
718
+ }
719
+ if (assertResult === false) {
720
+ throw new Error(`gql assertion returned false`);
721
+ }
722
+ }
723
+
724
+ return { value: gqlResult.data };
725
+ }
726
+
412
727
  case 'evaluate': {
413
728
  // Intentional: runs JS in browser page context (from test JSON files)
414
729
  const jsSnippet = value.length > 120 ? value.slice(0, 120) + '...' : value;
@@ -430,8 +745,15 @@ export async function executeAction(page, action, config) {
430
745
  return evalResult !== undefined && evalResult !== null ? { value: evalResult } : null;
431
746
  }
432
747
 
748
+ case 'wait_network_idle': {
749
+ const idleTime = value ? parseInt(value) : 500;
750
+ const maxTimeout = action.timeout ? parseInt(action.timeout) : 30000;
751
+ await page.waitForNetworkIdle({ idleTime, timeout: maxTimeout });
752
+ break;
753
+ }
754
+
433
755
  default:
434
- log('⚠️', `Unknown action: ${type}`);
756
+ throw new Error(`Unknown action type: "${type}"`);
435
757
  }
436
758
 
437
759
  return null;
@@ -52,7 +52,18 @@ The test format is:
52
52
  { "type": "click_regex", "text": "submit order", "selector": "button", "value": "last" },
53
53
  { "type": "click_option", "text": "Option Label" },
54
54
  { "type": "focus_autocomplete", "text": "Search by label" },
55
- { "type": "click_chip", "text": "Tag Name" }
55
+ { "type": "click_chip", "text": "Tag Name" },
56
+ { "type": "set_storage", "value": "token=abc123" },
57
+ { "type": "set_storage", "value": "theme=dark", "selector": "session" },
58
+ { "type": "assert_storage", "value": "token" },
59
+ { "type": "assert_storage", "value": "theme=dark", "selector": "session" },
60
+ { "type": "click_icon", "value": "edit" },
61
+ { "type": "click_icon", "value": "delete", "selector": ".user-card" },
62
+ { "type": "click_menu_item", "text": "Delete" },
63
+ { "type": "click_menu_item", "text": "Export", "selector": ".actions-menu" },
64
+ { "type": "click_in_context", "text": "John Doe", "selector": "button.edit" },
65
+ { "type": "gql", "value": "{ users { id name } }" },
66
+ { "type": "gql", "value": "query($id: ID) { user(id: $id) { name } }", "text": "{\"id\": \"123\"}" }
56
67
  ]
57
68
  }
58
69
  ]
@@ -64,6 +75,18 @@ Framework-aware action reference (prefer these over evaluate for React/MUI apps)
64
75
  - focus_autocomplete: focus an autocomplete input by its label text (supports MUI .MuiAutocomplete-root and [role="combobox"])
65
76
  - click_chip: click a chip/tag element by text (searches [class*="Chip"], [data-chip])
66
77
 
78
+ Storage actions:
79
+ - set_storage: set a localStorage key. "value": "key=val". Use "selector": "session" for sessionStorage
80
+ - assert_storage: assert a storage key exists ("value": "key") or has a value ("value": "key=expected"). Use "selector": "session" for sessionStorage
81
+
82
+ GraphQL action:
83
+ - gql: execute a GraphQL query/mutation via browser fetch. Auth token is read from localStorage automatically (configurable via gqlAuthHeader, gqlAuthKey, gqlAuthPrefix). "value" is the query string. "text" is variables as JSON string. "selector" is an optional JS assertion expression (receives response as "r"). Throws on GraphQL errors automatically. Also installs window.__e2eGql(query, vars) for use in subsequent evaluate actions
84
+
85
+ Smart interaction actions:
86
+ - click_icon: click an icon by identifier (data-testid fragment, class fragment, aria-label, SVG title). Walks up to nearest clickable parent (button, a, etc.). Optional "selector" scopes the search
87
+ - click_menu_item: click a menu item by text. Searches [role="menuitem"], .dropdown-item, .menu-item, [class*="MenuItem"]. Optional "selector" scopes the search
88
+ - click_in_context: click a child element within a container identified by text. "text" finds the container, "selector" is the child to click. Picks the smallest matching container
89
+
67
90
  Assertion action reference:
68
91
  - assert_text: checks if text appears anywhere in the page body
69
92
  - assert_element_text: checks textContent of a specific element (use "value": "exact" for strict match)
@@ -80,8 +103,15 @@ Reusable modules:
80
103
  - Tests can reference shared action sequences: { "$use": "module-name", "params": { "key": "value" } }
81
104
  - Use modules for repeated flows like login, navigation, or setup
82
105
 
106
+ Hooks and DRY patterns:
107
+ - When multiple tests share the same setup (e.g. authentication), use beforeEach instead of repeating it per test
108
+ - Object format with hooks: { "beforeEach": [...], "tests": [{ "name": "...", "actions": [...] }] }
109
+ - Array format (no hooks): [{ "name": "...", "actions": [...] }]
110
+ - If 3+ tests repeat the same action sequence (e.g. goto + wait + screenshot), extract it into a module
111
+ - NEVER repeat the same $use call with identical params across all tests — move it to beforeEach
112
+
83
113
  Rules:
84
- - Output a JSON array of test objects
114
+ - Output valid JSON: either a plain array of test objects, or an object with "beforeEach"/"tests" keys when hooks are needed
85
115
  - NEVER use evaluate with inline JS for assertions that can be done with native action types:
86
116
  * Use assert_element_text instead of evaluate to check element textContent
87
117
  * Use assert_attribute instead of evaluate to check HTML attributes
@@ -94,6 +124,12 @@ Rules:
94
124
  * Use click_option instead of evaluate with querySelectorAll('[role="option"]') patterns
95
125
  * Use focus_autocomplete instead of evaluate with MuiAutocomplete-root label search patterns
96
126
  * Use click_chip instead of evaluate with querySelectorAll('[class*="Chip"]') patterns
127
+ * Use set_storage instead of evaluate with localStorage.setItem or sessionStorage.setItem
128
+ * Use assert_storage instead of evaluate with localStorage.getItem or sessionStorage.getItem checks
129
+ * Use click_icon instead of evaluate with querySelector('svg[data-testid]').closest('button').click() patterns
130
+ * Use click_menu_item instead of evaluate with querySelectorAll('[role="menuitem"]') patterns
131
+ * Use click_in_context instead of evaluate that finds a container by text then clicks a child element
132
+ * Use gql instead of evaluate with fetch + JSON.stringify + GraphQL queries/mutations
97
133
  * Reserve evaluate ONLY for complex logic that cannot be expressed with existing action types
98
134
  - "click" with "text" (no selector) finds buttons/links by visible text
99
135
  - "goto" values starting with "/" are relative to the app's base URL
@@ -117,9 +153,11 @@ CRITICAL — UI-first testing rules:
117
153
 
118
154
  const API_RULES = `
119
155
  API testing rules:
120
- - Tests verify backend API behavior directly via evaluate actions
156
+ - Tests verify backend API behavior directly via gql actions (preferred) or evaluate actions
121
157
  - Each test should: set up context → call API → assert response shape and values
122
- - Use evaluate for GraphQL mutations, queries, and REST calls
158
+ - PREFER the gql action for GraphQL queries/mutations it handles auth and error checking automatically
159
+ - Use gql with "selector" field for inline assertions on the response (JS expression where "r" is the response)
160
+ - Use evaluate with window.__e2eGql() for complex multi-step GraphQL operations (the helper is installed by any gql action)
123
161
  - Name tests clearly describing the API operation (e.g. "createUser-returns-new-user")
124
162
  - Include error case tests (invalid input, missing fields, auth failures)
125
163
  - No need for goto/click/type — this is not UI testing
@@ -247,7 +285,7 @@ Test Category: ${testType}
247
285
  ${categoryRules}
248
286
  Base URL: ${config.baseUrl}
249
287
 
250
- Output a JSON array of test objects. Nothing else.`;
288
+ Output ONLY valid JSON. Either a plain array of test objects, or an object with "beforeEach" and "tests" keys if hooks are needed. Nothing else.`;
251
289
 
252
290
  const response = await fetch('https://api.anthropic.com/v1/messages', {
253
291
  method: 'POST',
@@ -288,9 +326,21 @@ Output a JSON array of test objects. Nothing else.`;
288
326
  throw new Error(`Failed to parse generated tests as JSON: ${err.message}\n\nRaw output:\n${text}`);
289
327
  }
290
328
 
291
- if (!Array.isArray(tests)) {
292
- throw new Error('Generated tests must be a JSON array');
329
+ // Accept both array format and object format with hooks
330
+ let hooks;
331
+ if (Array.isArray(tests)) {
332
+ // Plain array: [{ name, actions }]
333
+ } else if (tests && Array.isArray(tests.tests)) {
334
+ // Object with hooks: { beforeEach: [...], tests: [...] }
335
+ hooks = {};
336
+ for (const key of ['beforeAll', 'afterAll', 'beforeEach', 'afterEach']) {
337
+ if (Array.isArray(tests[key])) hooks[key] = tests[key];
338
+ }
339
+ if (Object.keys(hooks).length === 0) hooks = undefined;
340
+ tests = tests.tests;
341
+ } else {
342
+ throw new Error('Generated tests must be a JSON array or an object with a "tests" array');
293
343
  }
294
344
 
295
- return { tests, suiteName };
345
+ return { tests, hooks, suiteName };
296
346
  }