@matware/e2e-runner 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/.claude-plugin/marketplace.json +21 -0
  2. package/.mcp.json +2 -2
  3. package/.opencode/commands/create-test.md +63 -0
  4. package/.opencode/commands/run.md +50 -0
  5. package/.opencode/commands/verify-issue.md +62 -0
  6. package/.opencode/skills/e2e-testing/SKILL.md +181 -0
  7. package/.opencode/skills/e2e-testing/references/action-types.md +143 -0
  8. package/.opencode/skills/e2e-testing/references/auth-strategies.md +91 -0
  9. package/.opencode/skills/e2e-testing/references/graphql.md +59 -0
  10. package/.opencode/skills/e2e-testing/references/issue-verification.md +59 -0
  11. package/.opencode/skills/e2e-testing/references/multi-pool.md +60 -0
  12. package/.opencode/skills/e2e-testing/references/network-debugging.md +62 -0
  13. package/.opencode/skills/e2e-testing/references/test-json-format.md +163 -0
  14. package/.opencode/skills/e2e-testing/references/troubleshooting.md +224 -0
  15. package/.opencode/skills/e2e-testing/references/variables.md +41 -0
  16. package/.opencode/skills/e2e-testing/references/visual-verification.md +89 -0
  17. package/OPENCODE.md +166 -0
  18. package/README.md +581 -55
  19. package/agents/test-creator.md +54 -1
  20. package/agents/test-improver.md +37 -0
  21. package/bin/cli.js +408 -16
  22. package/commands/create-test.md +16 -1
  23. package/opencode.json +11 -0
  24. package/package.json +7 -2
  25. package/scripts/setup-opencode.sh +113 -0
  26. package/skills/e2e-testing/SKILL.md +10 -3
  27. package/skills/e2e-testing/references/action-types.md +48 -5
  28. package/skills/e2e-testing/references/auth-strategies.md +91 -0
  29. package/skills/e2e-testing/references/graphql.md +59 -0
  30. package/skills/e2e-testing/references/issue-verification.md +59 -0
  31. package/skills/e2e-testing/references/multi-pool.md +60 -0
  32. package/skills/e2e-testing/references/network-debugging.md +62 -0
  33. package/skills/e2e-testing/references/test-json-format.md +4 -0
  34. package/skills/e2e-testing/references/troubleshooting.md +44 -2
  35. package/skills/e2e-testing/references/variables.md +41 -0
  36. package/skills/e2e-testing/references/visual-verification.md +89 -0
  37. package/src/actions.js +324 -2
  38. package/src/ai-generate.js +58 -8
  39. package/src/config.js +143 -0
  40. package/src/dashboard.js +145 -13
  41. package/src/db.js +130 -2
  42. package/src/index.js +7 -6
  43. package/src/learner-sqlite.js +304 -0
  44. package/src/learner.js +8 -3
  45. package/src/mcp-tools.js +1121 -43
  46. package/src/module-resolver.js +37 -0
  47. package/src/narrate.js +37 -0
  48. package/src/pool-manager.js +223 -0
  49. package/src/reporter.js +82 -1
  50. package/src/runner.js +157 -28
  51. package/src/sync/auth.js +354 -0
  52. package/src/sync/client.js +572 -0
  53. package/src/sync/hub-routes.js +816 -0
  54. package/src/sync/index.js +68 -0
  55. package/src/sync/middleware.js +347 -0
  56. package/src/sync/queue.js +209 -0
  57. package/src/sync/schema.js +540 -0
  58. package/src/verify.js +10 -7
  59. package/src/watch.js +384 -0
  60. package/templates/build-dashboard.js +47 -6
  61. package/templates/dashboard/js/api.js +60 -0
  62. package/templates/dashboard/js/init.js +13 -0
  63. package/templates/dashboard/js/keyboard.js +46 -0
  64. package/templates/dashboard/js/state.js +40 -0
  65. package/templates/dashboard/js/toast.js +41 -0
  66. package/templates/dashboard/js/utils.js +196 -0
  67. package/templates/dashboard/js/view-live.js +143 -0
  68. package/templates/dashboard/js/view-runs.js +572 -0
  69. package/templates/dashboard/js/view-tests.js +294 -0
  70. package/templates/dashboard/js/view-watch.js +242 -0
  71. package/templates/dashboard/js/websocket.js +110 -0
  72. package/templates/dashboard/styles/base.css +69 -0
  73. package/templates/dashboard/styles/components.css +110 -0
  74. package/templates/dashboard/styles/view-live.css +74 -0
  75. package/templates/dashboard/styles/view-runs.css +207 -0
  76. package/templates/dashboard/styles/view-tests.css +96 -0
  77. package/templates/dashboard/styles/view-watch.css +53 -0
  78. package/templates/dashboard/template.html +165 -99
  79. package/templates/dashboard.html +1596 -541
  80. package/templates/sample-test.json +0 -8
  81. package/templates/dashboard/app.js +0 -1152
  82. package/templates/dashboard/styles.css +0 -413
package/src/mcp-tools.js CHANGED
@@ -13,17 +13,18 @@ import path from 'path';
13
13
  import http from 'http';
14
14
 
15
15
  import { loadConfig } from './config.js';
16
- import { waitForPool, getPoolStatus, connectToPool } from './pool.js';
16
+ import { connectToPool } from './pool.js';
17
+ import { waitForAnyPool, getPoolUrls, getAggregatedPoolStatus, selectPool } from './pool-manager.js';
17
18
  import { runTestsParallel, loadTestFile, loadTestSuite, loadAllSuites, listSuites } from './runner.js';
18
19
  import { generateReport, saveReport, persistRun } from './reporter.js';
19
20
  import { narrateTest } from './narrate.js';
20
21
  import { startDashboard, stopDashboard } from './dashboard.js';
21
- import { lookupScreenshotHash, ensureProject, computeScreenshotHash, registerScreenshotHash, getNetworkLogs } from './db.js';
22
+ import { lookupScreenshotHash, ensureProject, computeScreenshotHash, registerScreenshotHash, getNetworkLogs, setVariable, getVariables, deleteVariable, listVariables } from './db.js';
22
23
  import { fetchIssue, checkCliAuth, detectProvider } from './issues.js';
23
24
  import { buildPrompt, hasApiKey } from './ai-generate.js';
24
25
  import { verifyIssue } from './verify.js';
25
26
  import { listModules } from './module-resolver.js';
26
- import { getLearningsSummary, getFlakySummary, getSelectorStability, getPageHealth, getApiHealth, getErrorPatterns, getTestTrends, getRunInsights, getTestHistory, getPageHistory, getSelectorHistory } from './learner-sqlite.js';
27
+ import { getLearningsSummary, getFlakySummary, getSelectorStability, getPageHealth, getApiHealth, getErrorPatterns, getTestTrends, getRunInsights, getTestHistory, getPageHistory, getSelectorHistory, getHealthSnapshot, getTestCreationContext, generateImprovements } from './learner-sqlite.js';
27
28
  import { queryGraph } from './learner-neo4j.js';
28
29
  import { startNeo4j, stopNeo4j, getNeo4jStatus } from './neo4j-pool.js';
29
30
 
@@ -65,6 +66,11 @@ export const TOOLS = [
65
66
  type: 'boolean',
66
67
  description: 'Fail tests when network requests fail (e.g. ERR_CONNECTION_REFUSED). Default: false.',
67
68
  },
69
+ verificationStrictness: {
70
+ type: 'string',
71
+ enum: ['strict', 'moderate', 'lenient'],
72
+ description: 'Visual verification strictness. strict: no ambiguity allowed, any doubt = FAIL. moderate: reasonable judgment (default). lenient: only fail on clear contradictions.',
73
+ },
68
74
  cwd: {
69
75
  type: 'string',
70
76
  description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
@@ -89,35 +95,94 @@ export const TOOLS = [
89
95
  {
90
96
  name: 'e2e_create_test',
91
97
  description:
92
- 'Create a new E2E test JSON file. Provide the suite name and an array of test objects, each with a name and actions array. Actions can include { "$use": "module-name", "params": {...} } to reference reusable modules.',
98
+ `Create a new E2E test JSON file. IMPORTANT: prefer built-in actions over evaluate blocks.
99
+
100
+ ## Action selection guide (use instead of evaluate)
101
+
102
+ **Clicking elements by text** — DON'T write evaluate to find+click elements:
103
+ click: { type: "click", text: "Submit" } — searches button, a, [role=tab], span, etc.
104
+ click_regex: { type: "click_regex", text: "save|guardar" } — regex match, case-insensitive
105
+ click_menu_item: { type: "click_menu_item", text: "Delete" } — [role=menuitem], .MenuItem, etc.
106
+ click_option: { type: "click_option", text: "Option A" } — [role=option] in dropdowns
107
+ click_chip: { type: "click_chip", text: "Active" } — MUI Chip / tag elements
108
+ click_icon: { type: "click_icon", value: "edit" } — SVG/icon by data-testid, aria-label, class
109
+ click_in_context:{ type: "click_in_context", text: "Row text", selector: "button" } — child within container
110
+
111
+ **Asserting text presence/absence** — DON'T write evaluate with body.includes():
112
+ assert_text: { type: "assert_text", text: "Welcome" } — text IS on page (case-sensitive). Uses: text
113
+ assert_no_text: { type: "assert_no_text", text: "Error" } — text is NOT on page. Uses: text
114
+ assert_text_in: { type: "assert_text_in", selector: "[class*='Drawer']", text: "profesional|doctor" }
115
+ — scoped regex in container (case-insensitive default). Uses: selector + text (+ value:"exact")
116
+
117
+ **Asserting elements** — DON'T write evaluate to count or check visibility:
118
+ assert_visible: { type: "assert_visible", selector: ".modal" } — Uses: selector (NOT text)
119
+ assert_not_visible: { type: "assert_not_visible", selector: ".loader" } — Uses: selector (NOT text)
120
+ assert_count: { type: "assert_count", selector: "input", value: ">= 2" } — Uses: selector + value
121
+ assert_element_text: { type: "assert_element_text", selector: "h1", text: "Dashboard" } — Uses: selector + text
122
+ assert_matches: { type: "assert_matches", selector: ".date", value: "\\\\d{2}/\\\\d{2}" } — Uses: selector + value (regex)
123
+ assert_attribute: { type: "assert_attribute", selector: "button", value: "disabled" } — Uses: selector + value
124
+ assert_url: { type: "assert_url", value: "/dashboard" } — Uses: value
125
+ assert_input_value: { type: "assert_input_value", selector: "#email", value: "@" } — Uses: selector + value
126
+
127
+ IMPORTANT field rules:
128
+ - assert_text / assert_no_text: use "text" field only (checks full page body)
129
+ - assert_visible / assert_not_visible: use "selector" field only (CSS selector, NOT text)
130
+ - To verify text absence: use assert_no_text (NOT assert_not_visible with text)
131
+
132
+ **Navigation & waiting** — DON'T write evaluate with setTimeout polling:
133
+ goto: { type: "goto", value: "/login" } — full page navigation
134
+ navigate: { type: "navigate", value: "/settings" } — SPA-friendly (won't fail if no page load)
135
+ wait: { type: "wait", text: "Loading complete" } — wait for text to appear in body
136
+ wait: { type: "wait", selector: ".results" } — wait for element to appear
137
+ wait: { type: "wait", value: "2000" } — fixed delay (avoid when possible)
138
+ wait_network_idle: { type: "wait_network_idle", value: "500" } — wait until no network for N ms
139
+
140
+ **Form interaction** — DON'T write evaluate with native value setters (unless React):
141
+ type: { type: "type", selector: "#email", value: "a@b.com" } — clears + types
142
+ type_react: { type: "type_react", selector: "#email", value: "a@b.com" } — for React controlled inputs
143
+ select: { type: "select", selector: "select#country", value: "US" }
144
+ clear: { type: "clear", selector: "#search" }
145
+ press: { type: "press", value: "Enter" }
146
+ focus_autocomplete: { type: "focus_autocomplete", text: "City" } — focus MUI Autocomplete by label
147
+
148
+ **When evaluate IS appropriate**: computed styles, complex conditional logic, GraphQL via window.__e2eGql, math calculations, reading window/app state.
149
+
150
+ ## Modules
151
+ Use { "$use": "module-name", "params": {...} } to reference reusable modules from e2e/modules/. Modules compose — a module can $use other modules. Check e2e_list to see available modules for the project.`,
93
152
  inputSchema: {
94
153
  type: 'object',
95
154
  properties: {
96
155
  name: {
97
156
  type: 'string',
98
- description: 'Suite file name without .json extension (e.g. "login", "05-checkout")',
157
+ description: 'Suite file name without .json extension (e.g. "login-flow", "issue-1743-sidebar")',
99
158
  },
100
159
  tests: {
101
160
  type: 'array',
102
- description: 'Array of test objects with { name, actions }',
161
+ description: 'Array of test objects with { name, actions, expect }',
103
162
  items: {
104
163
  type: 'object',
105
164
  properties: {
106
- name: { type: 'string', description: 'Test name' },
107
- expect: { type: 'string', description: 'Human-readable description of the expected visual outcome. After the test runs, a verification screenshot is captured and Claude Code judges pass/fail against this description.' },
165
+ name: { type: 'string', description: 'Test name — descriptive of what is being verified' },
166
+ expect: {
167
+ oneOf: [
168
+ { type: 'string', description: 'Single description of expected visual outcome.' },
169
+ { type: 'array', items: { type: 'string' }, description: 'Checklist of criteria — each evaluated independently as PASS/FAIL.' },
170
+ ],
171
+ description: 'Expected visual outcome. String for free-form, array for per-criterion checklist.',
172
+ },
108
173
  actions: {
109
174
  type: 'array',
110
- description: 'Sequential browser actions',
175
+ description: 'Sequential browser actions. Prefer built-in action types over evaluate — see tool description for the full guide.',
111
176
  items: {
112
177
  type: 'object',
113
178
  properties: {
114
179
  type: {
115
180
  type: 'string',
116
- description: 'Action type: goto, click, click_regex, click_option, click_chip, type, type_react, focus_autocomplete, wait, assert_text, assert_element_text, assert_attribute, assert_class, assert_visible, assert_not_visible, assert_input_value, assert_matches, assert_url, assert_count, assert_no_network_errors, get_text, screenshot, select, clear, clear_cookies, press, scroll, hover, evaluate, navigate',
181
+ description: 'Action type. Prefer declarative actions (assert_text, assert_no_text, click, assert_visible, assert_count, assert_text_in, click_menu_item, etc.) over evaluate.',
117
182
  },
118
- selector: { type: 'string', description: 'CSS selector' },
119
- value: { type: 'string', description: 'Value for the action' },
120
- text: { type: 'string', description: 'Text content to match' },
183
+ selector: { type: 'string', description: 'CSS selector (supports compound selectors like "[class*=\'Drawer\'], [role=\'presentation\']")' },
184
+ value: { type: 'string', description: 'Value — varies by action type (URL for goto, ms for wait, regex for assert_matches, ">= N" for assert_count)' },
185
+ text: { type: 'string', description: 'Text to match — used by click (substring), assert_text/assert_no_text (substring on body), assert_text_in (regex), click_regex (regex). NOT used by assert_visible/assert_not_visible (use selector instead).' },
121
186
  },
122
187
  required: ['type'],
123
188
  },
@@ -128,7 +193,7 @@ export const TOOLS = [
128
193
  },
129
194
  hooks: {
130
195
  type: 'object',
131
- description: 'Optional hooks: beforeAll, afterAll, beforeEach, afterEach (each an array of actions)',
196
+ description: 'Optional hooks: beforeAll, afterAll, beforeEach, afterEach (each an array of actions). Note: beforeAll runs on a SEPARATE page that is closed before tests — use beforeEach for auth/setup.',
132
197
  properties: {
133
198
  beforeAll: { type: 'array', items: { type: 'object' } },
134
199
  afterAll: { type: 'array', items: { type: 'object' } },
@@ -280,10 +345,59 @@ export const TOOLS = [
280
345
  required: ['url'],
281
346
  },
282
347
  },
348
+ {
349
+ name: 'e2e_analyze',
350
+ description:
351
+ 'Analyze a page\'s structure and return all interactive elements (forms, buttons, links, navigation, tables, modals, etc.) with their CSS selectors, plus suggested test scaffolds. One call replaces the entire screenshot→guess-selectors→retry cycle.',
352
+ inputSchema: {
353
+ type: 'object',
354
+ properties: {
355
+ url: {
356
+ type: 'string',
357
+ description: 'Full URL to analyze (e.g. "https://example.com" or "http://host.docker.internal:3000/dashboard")',
358
+ },
359
+ scope: {
360
+ type: 'string',
361
+ description: 'CSS selector to limit analysis to a section (e.g. "#sidebar", ".modal-content")',
362
+ },
363
+ maxElements: {
364
+ type: 'number',
365
+ description: 'Max elements per category (default: 50). Lower values produce smaller responses.',
366
+ },
367
+ includeScreenshot: {
368
+ type: 'boolean',
369
+ description: 'Include a screenshot alongside the JSON analysis (default: true)',
370
+ },
371
+ selector: {
372
+ type: 'string',
373
+ description: 'Wait for this CSS selector before analyzing',
374
+ },
375
+ delay: {
376
+ type: 'number',
377
+ description: 'Wait N milliseconds after page load before analyzing (default: 0)',
378
+ },
379
+ authToken: {
380
+ type: 'string',
381
+ description: 'JWT or auth token to inject into localStorage before navigating (for authenticated pages)',
382
+ },
383
+ authStorageKey: {
384
+ type: 'string',
385
+ description: 'localStorage key name for the auth token (default: "accessToken")',
386
+ },
387
+ cwd: {
388
+ type: 'string',
389
+ description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
390
+ },
391
+ },
392
+ required: ['url'],
393
+ },
394
+ },
283
395
  {
284
396
  name: 'e2e_create_module',
285
397
  description:
286
- 'Create a reusable module for E2E tests. Modules define action sequences that can be referenced from tests via { "$use": "module-name", "params": {...} }. Useful for auth setup, navigation patterns, and other repeated sequences.',
398
+ `Create a reusable module for E2E tests. Modules encapsulate repeated action sequences referenced via { "$use": "module-name", "params": {...} }.
399
+
400
+ Good module candidates: auth setup, page navigation, tab clicking, opening sidebars/drawers, form fill sequences, cleanup routines. Modules can compose — a module can $use other modules. Params use {{paramName}} mustache syntax in action fields. Extract a module when you see the same 2+ action sequence in multiple tests.`,
287
401
  inputSchema: {
288
402
  type: 'object',
289
403
  properties: {
@@ -420,6 +534,38 @@ export const TOOLS = [
420
534
  required: ['runDbId'],
421
535
  },
422
536
  },
537
+ {
538
+ name: 'e2e_vars',
539
+ description:
540
+ 'Manage project variables stored in SQLite. Variables can be referenced in test JSON as {{var.KEY}}. Supports project-wide and per-suite scoping.',
541
+ inputSchema: {
542
+ type: 'object',
543
+ properties: {
544
+ action: {
545
+ type: 'string',
546
+ enum: ['set', 'get', 'list', 'delete'],
547
+ description: 'Action to perform: set (upsert), get (one key), list (all), delete',
548
+ },
549
+ key: {
550
+ type: 'string',
551
+ description: 'Variable name (required for set, get, delete)',
552
+ },
553
+ value: {
554
+ type: 'string',
555
+ description: 'Variable value (required for set)',
556
+ },
557
+ scope: {
558
+ type: 'string',
559
+ description: 'Scope: "project" (default) or a suite name for suite-specific override',
560
+ },
561
+ cwd: {
562
+ type: 'string',
563
+ description: 'Absolute path to the project root directory.',
564
+ },
565
+ },
566
+ required: ['action'],
567
+ },
568
+ },
423
569
  ];
424
570
 
425
571
  /** Tools exposed on the dashboard — excludes dashboard start/stop (already running). */
@@ -469,11 +615,12 @@ async function handleRun(args) {
469
615
  if (args.baseUrl) configOverrides.baseUrl = args.baseUrl;
470
616
  if (args.retries !== undefined) configOverrides.retries = args.retries;
471
617
  if (args.failOnNetworkError !== undefined) configOverrides.failOnNetworkError = args.failOnNetworkError;
618
+ if (args.verificationStrictness) configOverrides.verificationStrictness = args.verificationStrictness;
472
619
 
473
620
  const config = await loadConfig(configOverrides, args.cwd);
474
621
  config.triggeredBy = 'mcp';
475
622
 
476
- await waitForPool(config.poolUrl);
623
+ await waitForAnyPool(getPoolUrls(config));
477
624
 
478
625
  let tests, hooks;
479
626
 
@@ -506,7 +653,12 @@ async function handleRun(args) {
506
653
 
507
654
  const report = generateReport(results);
508
655
  saveReport(report, config.screenshotsDir, config);
509
- const { runDbId } = persistRun(report, config, args.suite || null);
656
+ // Derive suite name: explicit suite > file basename > null (for "all")
657
+ let suiteName = args.suite || null;
658
+ if (!suiteName && args.file) {
659
+ suiteName = path.basename(args.file, '.json');
660
+ }
661
+ const { runDbId } = await persistRun(report, config, suiteName);
510
662
 
511
663
  const failures = report.results
512
664
  .filter(r => !r.success)
@@ -563,12 +715,21 @@ async function handleRun(args) {
563
715
 
564
716
  const verifications = report.results
565
717
  .filter(r => r.expect && r.verificationScreenshot)
566
- .map(r => ({
567
- name: r.name,
568
- expect: r.expect,
569
- success: r.success,
570
- screenshotHash: 'ss:' + computeScreenshotHash(r.verificationScreenshot),
571
- }));
718
+ .map(r => {
719
+ const entry = {
720
+ name: r.name,
721
+ expect: r.expect,
722
+ success: r.success,
723
+ screenshotHash: 'ss:' + computeScreenshotHash(r.verificationScreenshot),
724
+ };
725
+ if (r.baselineScreenshot) {
726
+ entry.baselineScreenshotHash = 'ss:' + computeScreenshotHash(r.baselineScreenshot);
727
+ }
728
+ if (Array.isArray(r.expect)) {
729
+ entry.isChecklist = true;
730
+ }
731
+ return entry;
732
+ });
572
733
 
573
734
  if (flaky.length > 0) summary.flaky = flaky;
574
735
  if (failures.length > 0) summary.failures = failures;
@@ -590,7 +751,9 @@ async function handleRun(args) {
590
751
  }
591
752
  if (verifications.length > 0) {
592
753
  summary.verifications = verifications;
593
- summary.verificationInstructions = 'For each verification, call e2e_screenshot with the screenshotHash to view the screenshot. Then compare what you see against the "expect" description. Report any mismatches as FAIL.';
754
+ const hasBaselines = verifications.some(v => v.baselineScreenshotHash);
755
+ const hasChecklists = verifications.some(v => v.isChecklist);
756
+ summary.verificationInstructions = buildVerificationInstructions(config.verificationStrictness || 'moderate', hasBaselines, hasChecklists);
594
757
  }
595
758
 
596
759
  // Build per-test narrative: a step-by-step human-readable story of what happened
@@ -601,10 +764,19 @@ async function handleRun(args) {
601
764
  }));
602
765
  if (narratives.length > 0) summary.narratives = narratives;
603
766
 
604
- // Enrich with learning insights (fire-and-forget — never fails the response)
767
+ // Enrich with learning insights + health snapshot (fire-and-forget — never fails the response)
605
768
  if (config.learningsEnabled !== false) {
606
769
  try {
607
770
  const projectId = ensureProject(config._cwd, config.projectName, config.screenshotsDir, config.testsDir);
771
+
772
+ // Always include health snapshot (~200 bytes) for project context
773
+ const health = getHealthSnapshot(projectId);
774
+ if (health) {
775
+ summary.healthSnapshot = health;
776
+ summary.learningsHint = "Use e2e_learnings tool with query 'summary' for full analysis.";
777
+ }
778
+
779
+ // Contextual insights for this specific run
608
780
  const insights = getRunInsights(projectId, report);
609
781
  if (insights.length > 0) {
610
782
  summary.learnings = {
@@ -618,6 +790,12 @@ async function handleRun(args) {
618
790
  : null,
619
791
  };
620
792
  }
793
+
794
+ // Actionable improvements from cross-referencing this run with historical data
795
+ const improvements = generateImprovements(projectId, report);
796
+ if (improvements.length > 0) {
797
+ summary.improvements = improvements;
798
+ }
621
799
  } catch { /* never fail the run response */ }
622
800
  }
623
801
 
@@ -660,6 +838,14 @@ async function handleCreateTest(args) {
660
838
  }
661
839
 
662
840
  const safeName = path.basename(args.name);
841
+
842
+ // Reject generic/ambiguous suite names
843
+ const baseName = safeName.replace(/\.json$/, '').replace(/^\d+-/, '');
844
+ const FORBIDDEN_NAMES = ['all', 'test', 'tests', 'debug', 'new', 'temp', 'tmp', 'main', 'suite', 'run', 'e2e', 'default', 'untitled'];
845
+ if (FORBIDDEN_NAMES.includes(baseName.toLowerCase())) {
846
+ return errorResult(`Suite name "${baseName}" is too generic. Use a descriptive name specific to the feature or issue being tested (e.g. "login-valid-credentials", "issue-1743-auth-redirect").`);
847
+ }
848
+
663
849
  const filename = safeName.endsWith('.json') ? safeName : `${safeName}.json`;
664
850
  const filePath = path.join(config.testsDir, filename);
665
851
 
@@ -676,36 +862,298 @@ async function handleCreateTest(args) {
676
862
 
677
863
  fs.writeFileSync(filePath, JSON.stringify(content, null, 2) + '\n');
678
864
 
679
- // Warn about beforeAll pitfall
680
- let warning = '';
865
+ // ── Collect all actions (tests + hooks) for analysis ──
866
+ const allActions = [];
867
+ for (const test of args.tests) {
868
+ if (test.actions) allActions.push(...test.actions);
869
+ }
870
+ if (args.hooks) {
871
+ for (const hookActions of Object.values(args.hooks)) {
872
+ if (Array.isArray(hookActions)) allActions.push(...hookActions);
873
+ }
874
+ }
875
+
876
+ const warnings = [];
877
+
878
+ // ── Warn about beforeAll pitfall ──
681
879
  const beforeAll = args.hooks?.beforeAll;
682
880
  if (beforeAll?.length) {
683
881
  const stateActions = beforeAll.filter(a =>
684
882
  ['evaluate', 'goto', 'navigate', 'clear_cookies', 'type', 'click', 'select'].includes(a.type)
685
883
  );
686
884
  if (stateActions.length > 0) {
687
- warning = '\n\n⚠️ Warning: beforeAll runs on a separate browser page that is closed before tests start. ' +
688
- 'Actions that set browser state (evaluate, goto, cookies, etc.) will NOT carry over to individual tests. ' +
689
- 'Use beforeEach instead if tests need this setup.';
885
+ warnings.push('⚠️ beforeAll runs on a separate browser page that is closed before tests start. ' +
886
+ 'Actions that set browser state (evaluate, goto, cookies, etc.) will NOT carry over. ' +
887
+ 'Use beforeEach instead if tests need this setup.');
888
+ }
889
+ }
890
+
891
+ // ── Detect evaluate blocks that could use built-in actions ──
892
+ const suggestions = analyzeEvaluateUsage(allActions);
893
+ if (suggestions.length > 0) {
894
+ warnings.push(`💡 ${suggestions.length} evaluate action(s) could potentially use built-in actions instead:\n` +
895
+ suggestions.map(s => ` • ${s}`).join('\n'));
896
+ }
897
+
898
+ // ── Detect suite-level issues: fixed waits, cross-test dependencies ──
899
+ const actionWarnings = analyzeActionPatterns(args.tests);
900
+ if (actionWarnings.length > 0) {
901
+ warnings.push(...actionWarnings);
902
+ }
903
+
904
+ // ── List available modules ──
905
+ let modulesInfo = '';
906
+ try {
907
+ const modules = listModules(config.modulesDir);
908
+ if (modules.length > 0) {
909
+ modulesInfo = '\n\n📦 Available modules: ' + modules.map(m => {
910
+ const params = m.params.filter(p => p.required).map(p => p.name);
911
+ return m.name + (params.length ? `(${params.join(', ')})` : '');
912
+ }).join(', ');
913
+ }
914
+ } catch { /* modules dir may not exist */ }
915
+
916
+ const warningBlock = warnings.length > 0 ? '\n\n' + warnings.join('\n\n') : '';
917
+
918
+ // Enrich with learnings context for smarter test authoring
919
+ let learningsBlock = '';
920
+ try {
921
+ const projectId = ensureProject(config._cwd, config.projectName, config.screenshotsDir, config.testsDir);
922
+ const ctx = getTestCreationContext(projectId);
923
+ if (ctx) {
924
+ const lines = ['\n\n⚠ LEARNINGS FROM PREVIOUS RUNS:'];
925
+
926
+ if (ctx.unstableSelectors?.length) {
927
+ lines.push(' Unstable selectors (avoid these):');
928
+ for (const s of ctx.unstableSelectors) {
929
+ lines.push(` - ${s.selector} (${s.failRate}% fail rate) → ${s.suggestion}`);
930
+ }
931
+ }
932
+
933
+ if (ctx.errorPatterns?.length) {
934
+ lines.push(' Common errors:');
935
+ for (const e of ctx.errorPatterns) {
936
+ lines.push(` - ${e.category || 'unknown'} (${e.count}x) — ${e.pattern}`);
937
+ }
938
+ }
939
+
940
+ if (ctx.slowPages?.length) {
941
+ lines.push(' Slow pages (add extra waits):');
942
+ for (const p of ctx.slowPages) {
943
+ lines.push(` - ${p.page} (avg ${(p.avgLoadMs / 1000).toFixed(1)}s load)`);
944
+ }
945
+ }
946
+
947
+ if (ctx.stableSelectors?.length) {
948
+ lines.push(' Reliable selectors (safe to use):');
949
+ for (const s of ctx.stableSelectors) {
950
+ lines.push(` - ${s.selector} (100% success, ${s.uses} uses)`);
951
+ }
952
+ }
953
+
954
+ if (ctx.flakyTests?.length) {
955
+ lines.push(' Flaky tests (consider retries):');
956
+ for (const f of ctx.flakyTests) {
957
+ lines.push(` - ${f.name} (${f.flakyCount} flaky runs out of ${f.totalRuns})`);
958
+ }
959
+ }
960
+
961
+ if (ctx.apiIssues?.length) {
962
+ lines.push(' Unreliable API endpoints:');
963
+ for (const a of ctx.apiIssues) {
964
+ lines.push(` - ${a.endpoint} (${a.errorRate}% error rate)`);
965
+ }
966
+ }
967
+
968
+ if (ctx.passRate !== undefined) {
969
+ lines.push(` Overall project pass rate: ${ctx.passRate}%`);
970
+ }
971
+
972
+ learningsBlock = lines.join('\n');
973
+ }
974
+ } catch { /* never fail test creation */ }
975
+
976
+ return textResult(`Created test file: ${filePath}\n\n${args.tests.length} test(s) defined.${warningBlock}${modulesInfo}${learningsBlock}`);
977
+ }
978
+
979
+ /**
980
+ * Analyze evaluate actions and suggest built-in replacements.
981
+ * Returns an array of human-readable suggestion strings.
982
+ */
983
+ function analyzeEvaluateUsage(actions) {
984
+ const suggestions = [];
985
+
986
+ for (const action of actions) {
987
+ if (action.type !== 'evaluate' || !action.value) continue;
988
+ const code = action.value;
989
+
990
+ // Pattern: clicking elements by text — .click() after finding by textContent
991
+ if (/\.textContent[^]*\.click\(\)/s.test(code) || /\.find\([^)]*textContent[^)]*\)[^]*\.click/s.test(code)) {
992
+ if (/tab/i.test(code)) {
993
+ suggestions.push('Tab click via evaluate → use { type: "click", text: "Tab Name" } (click searches [role="tab"] natively)');
994
+ } else if (/menu/i.test(code)) {
995
+ suggestions.push('Menu item click via evaluate → use { type: "click_menu_item", text: "Item Name" }');
996
+ } else {
997
+ suggestions.push('Element click via evaluate → use { type: "click", text: "..." } or click_regex/click_in_context');
998
+ }
999
+ }
1000
+
1001
+ // Pattern: body.innerText.includes() for text presence
1002
+ if (/document\.body\.innerText[^]*\.includes\(/s.test(code) || /body\.includes\(/s.test(code)) {
1003
+ // Detect negation patterns (!includes) that should use assert_no_text
1004
+ const hasNegation = /!\s*body\.includes\(|!\s*\w+\.includes\(|!body\.includes\(/s.test(code)
1005
+ || /=\s*!.*\.includes\(/s.test(code);
1006
+ const includeCount = (code.match(/\.includes\(/g) || []).length;
1007
+
1008
+ if (hasNegation) {
1009
+ suggestions.push(`🚨 Text negation check (!includes) → use { type: "assert_no_text", text: "..." } for absent text, and { type: "assert_text", text: "..." } for present text`);
1010
+ } else if (includeCount <= 3) {
1011
+ suggestions.push(`Text presence check (${includeCount} includes) → use ${includeCount}x { type: "assert_text", text: "..." }`);
1012
+ } else {
1013
+ suggestions.push(`Text presence check (${includeCount} includes) → use assert_text for each, or assert_text_in with regex: { type: "assert_text_in", selector: "body", text: "word1|word2" }`);
1014
+ }
1015
+ }
1016
+
1017
+ // Pattern: querySelectorAll(...).length checks
1018
+ if (/querySelectorAll\([^)]+\)\.length/s.test(code) && !/getComputedStyle/.test(code)) {
1019
+ suggestions.push('Element counting via evaluate → use { type: "assert_count", selector: "...", value: ">= N" }');
1020
+ }
1021
+
1022
+ // Pattern: checking element visibility/existence without computed styles
1023
+ if (/querySelector\([^)]+\)\s*;?\s*(if\s*\(!\s*\w+\)|===?\s*null)/s.test(code) && !/getComputedStyle/.test(code)) {
1024
+ suggestions.push('Element existence check via evaluate → use { type: "assert_visible", selector: "..." }');
1025
+ }
1026
+
1027
+ // Pattern: return JSON.stringify for debug info (no throw/Error)
1028
+ if (/return\s+JSON\.stringify/s.test(code) && !/throw\s+new\s+Error/s.test(code) && !/FAIL/s.test(code)) {
1029
+ suggestions.push('Informational evaluate (returns JSON, never throws) → remove or replace with specific assertions');
1030
+ }
1031
+
1032
+ // Pattern: setTimeout polling loop
1033
+ if (/setTimeout|setInterval/s.test(code) && /while|Date\.now/s.test(code)) {
1034
+ suggestions.push('Polling loop in evaluate → use { type: "wait", text: "..." } or { type: "wait", selector: "..." } with timeout');
1035
+ }
1036
+
1037
+ // Pattern: return static string with no checks
1038
+ if (/^\(\(\)\s*=>\s*\{\s*return\s+['"`][^]*['"`];\s*\}\)\(\)$/.test(code.trim())) {
1039
+ suggestions.push('No-op evaluate (returns static string) → remove entirely');
1040
+ }
1041
+
1042
+ // 🚨 Pattern: evaluate returns template string interpolating booleans but never throws/fails
1043
+ // e.g. return `Foo: ${hasFoo}, Bar: ${hasBar}` — always truthy, never fails
1044
+ if (!(/throw\s+new\s+Error/s.test(code) || /\bFAIL[:\s]/s.test(code) || /\bERROR[:\s]/s.test(code)
1045
+ || /return\s+false\b/s.test(code) || /return\s+'FAIL/s.test(code) || /return\s+`FAIL/s.test(code))) {
1046
+ // Check for template returns with ${var} interpolation (informational, never fails)
1047
+ if (/return\s+`[^`]*\$\{[^}]+\}[^`]*`/s.test(code)) {
1048
+ // Heuristic: does the template interpolate boolean-like variables?
1049
+ const hasConditionInterpolation = /\$\{(has\w+|is\w+|no\w+|found|exists|present|visible|loaded)\}/i.test(code);
1050
+ const hasComparisonInterpolation = /\$\{[^}]*(===|!==|>|<|&&|\|\|)[^}]*\}/s.test(code);
1051
+ if (hasConditionInterpolation || hasComparisonInterpolation) {
1052
+ suggestions.push(
1053
+ '🚨 Evaluate returns informational template string with boolean/condition values but NEVER throws or returns false — ' +
1054
+ 'this test will ALWAYS PASS. Either throw new Error("FAIL: ...") when conditions are not met, or replace with built-in assert actions'
1055
+ );
1056
+ }
1057
+ }
1058
+ }
1059
+
1060
+ // 🚨 Pattern: sets window.__e2e_* globals for cross-test state sharing
1061
+ if (/window\.__e2e_\w+\s*=/.test(code) && !/window\.__e2e\./.test(code.replace(/window\.__e2e_\w+\s*=/g, ''))) {
1062
+ suggestions.push(
1063
+ '⚠️ Cross-test state via window.__e2e_* — if test retries are enabled, retried tests get a fresh page and lose this state. ' +
1064
+ 'Make each test self-contained by re-querying data, or disable retries for this suite'
1065
+ );
1066
+ }
1067
+ }
1068
+
1069
+ return suggestions;
1070
+ }
1071
+
1072
+ /**
1073
+ * Analyze all actions in a suite for non-evaluate issues:
1074
+ * fixed numeric waits, cross-test dependencies, etc.
1075
+ */
1076
+ function analyzeActionPatterns(tests) {
1077
+ const warnings = [];
1078
+
1079
+ // Detect fixed numeric waits (could be text/selector-based)
1080
+ for (const test of tests) {
1081
+ if (!test.actions) continue;
1082
+ for (const action of test.actions) {
1083
+ if (action.type === 'wait' && /^\d+$/.test(String(action.value))) {
1084
+ const ms = parseInt(action.value, 10);
1085
+ if (ms >= 3000) {
1086
+ warnings.push(
1087
+ `⏱️ Fixed ${ms}ms wait in "${test.name}" — prefer { type: "wait", text: "..." } or { type: "wait", selector: "..." } ` +
1088
+ `which retries until the condition is met. Fixed waits are either too short (flaky) or too long (slow).`
1089
+ );
1090
+ break; // one warning per test is enough
1091
+ }
1092
+ }
1093
+ }
1094
+ }
1095
+
1096
+ // Detect cross-test state: test N writes window.__e2e_*, test M reads it
1097
+ const writers = new Map(); // varName → test name
1098
+ const readers = new Map(); // varName → [test names]
1099
+ for (const test of tests) {
1100
+ if (!test.actions) continue;
1101
+ for (const action of test.actions) {
1102
+ if (action.type !== 'evaluate' || !action.value) continue;
1103
+ const code = action.value;
1104
+ // Find writes: window.__e2e_foo = ...
1105
+ const writeMatches = code.matchAll(/window\.(__e2e_\w+)\s*=/g);
1106
+ for (const m of writeMatches) {
1107
+ if (!writers.has(m[1])) writers.set(m[1], test.name);
1108
+ }
1109
+ // Find reads: window.__e2e_foo (not followed by =)
1110
+ const readMatches = code.matchAll(/window\.(__e2e_\w+)(?!\s*=)/g);
1111
+ for (const m of readMatches) {
1112
+ if (!readers.has(m[1])) readers.set(m[1], []);
1113
+ if (!readers.get(m[1]).includes(test.name)) readers.get(m[1]).push(test.name);
1114
+ }
1115
+ }
1116
+ }
1117
+
1118
+ for (const [varName, writerTest] of writers) {
1119
+ const readerTests = (readers.get(varName) || []).filter(t => t !== writerTest);
1120
+ if (readerTests.length > 0) {
1121
+ warnings.push(
1122
+ `🔗 Cross-test dependency: "${writerTest}" sets ${varName}, read by: ${readerTests.map(t => `"${t}"`).join(', ')}. ` +
1123
+ `If "${writerTest}" fails, dependent tests will cascade-fail with confusing errors. ` +
1124
+ `Consider re-querying data in each test or combining them into a single test.`
1125
+ );
690
1126
  }
691
1127
  }
692
1128
 
693
- return textResult(`Created test file: ${filePath}\n\n${args.tests.length} test(s) defined.${warning}`);
1129
+ return warnings;
694
1130
  }
695
1131
 
696
1132
  async function handlePoolStatus(args) {
697
1133
  const config = await loadConfig({}, args.cwd);
698
- const status = await getPoolStatus(config.poolUrl);
1134
+ const poolUrls = getPoolUrls(config);
1135
+ const aggregated = await getAggregatedPoolStatus(poolUrls);
699
1136
 
700
- const lines = [
701
- `Available: ${status.available ? 'yes' : 'no'}`,
702
- `Running: ${status.running}/${status.maxConcurrent}`,
703
- `Queued: ${status.queued}`,
704
- `Sessions: ${status.sessions.length}`,
705
- ];
1137
+ const lines = [];
706
1138
 
707
- if (status.error) {
708
- lines.push(`Error: ${status.error}`);
1139
+ if (poolUrls.length > 1) {
1140
+ lines.push(`Pools: ${aggregated.totalPools} (${aggregated.availableCount} available)`);
1141
+ lines.push(`Running: ${aggregated.totalRunning}/${aggregated.totalMaxConcurrent}`);
1142
+ lines.push(`Queued: ${aggregated.totalQueued}`);
1143
+ lines.push('');
1144
+ for (const pool of aggregated.pools) {
1145
+ const status = pool.available ? 'available' : pool.error ? `offline (${pool.error})` : 'busy';
1146
+ lines.push(` ${pool.url}: ${status} (${pool.running}/${pool.maxConcurrent}, ${pool.queued} queued)`);
1147
+ }
1148
+ } else {
1149
+ const pool = aggregated.pools[0];
1150
+ lines.push(`Available: ${pool.available ? 'yes' : 'no'}`);
1151
+ lines.push(`Running: ${pool.running}/${pool.maxConcurrent}`);
1152
+ lines.push(`Queued: ${pool.queued}`);
1153
+ lines.push(`Sessions: ${pool.sessions?.length ?? 0}`);
1154
+ if (pool.error) {
1155
+ lines.push(`Error: ${pool.error}`);
1156
+ }
709
1157
  }
710
1158
 
711
1159
  return textResult(lines.join('\n'));
@@ -831,16 +1279,521 @@ async function handleCreateModule(args) {
831
1279
  return textResult(`Created module: ${filePath}\n\nName: ${args.name}\nParams: ${paramNames.length ? paramNames.join(', ') : 'none'}\nActions: ${args.actions.length}\n\nUsage in tests: { "$use": "${args.name}", "params": { ... } }`);
832
1280
  }
833
1281
 
834
- async function handleCapture(args) {
1282
+ // ── Page analysis helpers ─────────────────────────────────────────────────────
1283
+
1284
+ /**
1285
+ * Browser-side function passed to page.evaluate().
1286
+ * Extracts the complete interactive structure of a page in a single DOM pass.
1287
+ */
1288
+ function extractPageStructure(scopeSelector, maxElements) {
1289
+ const MAX = maxElements || 50;
1290
+ const root = scopeSelector ? document.querySelector(scopeSelector) : document.body;
1291
+ if (!root) return { error: `Scope selector not found: ${scopeSelector}` };
1292
+
1293
+ // ── bestSelector: generate the most reliable CSS selector for an element ──
1294
+ const FRAMEWORK_CLASS_RE = /^(css-|sc-|jss\d|Mui|emotion-|chakra-|ant-|el-|v-|ng-|_|svelte-|tw-)/;
1295
+
1296
+ function bestSelector(el) {
1297
+ // 1. ID (if unique)
1298
+ if (el.id && document.querySelectorAll(`#${CSS.escape(el.id)}`).length === 1) {
1299
+ return `#${CSS.escape(el.id)}`;
1300
+ }
1301
+ // 2. data-testid
1302
+ const testId = el.getAttribute('data-testid');
1303
+ if (testId) return `[data-testid="${testId}"]`;
1304
+ // 3. aria-label
1305
+ const ariaLabel = el.getAttribute('aria-label');
1306
+ if (ariaLabel && document.querySelectorAll(`[aria-label="${CSS.escape(ariaLabel)}"]`).length === 1) {
1307
+ return `[aria-label="${CSS.escape(ariaLabel)}"]`;
1308
+ }
1309
+ // 4. name attribute
1310
+ const name = el.getAttribute('name');
1311
+ if (name && document.querySelectorAll(`[name="${CSS.escape(name)}"]`).length === 1) {
1312
+ return `[name="${CSS.escape(name)}"]`;
1313
+ }
1314
+ // 5. Unique CSS class (filter framework-generated)
1315
+ const tag = el.tagName.toLowerCase();
1316
+ const classes = [...el.classList].filter(c => !FRAMEWORK_CLASS_RE.test(c));
1317
+ for (const cls of classes) {
1318
+ const sel = `${tag}.${CSS.escape(cls)}`;
1319
+ if (document.querySelectorAll(sel).length === 1) return sel;
1320
+ }
1321
+ // 6. Two-class combination
1322
+ for (let i = 0; i < classes.length; i++) {
1323
+ for (let j = i + 1; j < classes.length; j++) {
1324
+ const sel = `${tag}.${CSS.escape(classes[i])}.${CSS.escape(classes[j])}`;
1325
+ if (document.querySelectorAll(sel).length === 1) return sel;
1326
+ }
1327
+ }
1328
+ // 7. Parent with ID + tag:nth-of-type
1329
+ let parent = el.parentElement;
1330
+ while (parent && parent !== document.body) {
1331
+ if (parent.id) {
1332
+ const siblings = [...parent.querySelectorAll(`:scope > ${tag}`)];
1333
+ const idx = siblings.indexOf(el);
1334
+ if (idx !== -1) {
1335
+ const sel = `#${CSS.escape(parent.id)} > ${tag}:nth-of-type(${idx + 1})`;
1336
+ if (document.querySelectorAll(sel).length === 1) return sel;
1337
+ }
1338
+ break;
1339
+ }
1340
+ parent = parent.parentElement;
1341
+ }
1342
+ // 8. Fallback: tag:nth-of-type within parent
1343
+ if (el.parentElement) {
1344
+ const siblings = [...el.parentElement.querySelectorAll(`:scope > ${tag}`)];
1345
+ const idx = siblings.indexOf(el);
1346
+ if (idx !== -1) return `${tag}:nth-of-type(${idx + 1})`;
1347
+ }
1348
+ return tag;
1349
+ }
1350
+
1351
+ function getLabel(el) {
1352
+ // Check for associated label
1353
+ if (el.id) {
1354
+ const label = root.querySelector(`label[for="${CSS.escape(el.id)}"]`);
1355
+ if (label) return label.textContent.trim();
1356
+ }
1357
+ // Check for wrapping label
1358
+ const parentLabel = el.closest('label');
1359
+ if (parentLabel) return parentLabel.textContent.trim();
1360
+ // aria-label
1361
+ if (el.getAttribute('aria-label')) return el.getAttribute('aria-label');
1362
+ // placeholder
1363
+ if (el.placeholder) return el.placeholder;
1364
+ return '';
1365
+ }
1366
+
1367
+ function isVisible(el) {
1368
+ const style = getComputedStyle(el);
1369
+ return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0';
1370
+ }
1371
+
1372
+ function truncate(arr) {
1373
+ return arr.slice(0, MAX);
1374
+ }
1375
+
1376
+ // ── Extract forms ──
1377
+ const forms = [];
1378
+ for (const form of root.querySelectorAll('form')) {
1379
+ if (!isVisible(form)) continue;
1380
+ const fields = [];
1381
+ for (const input of form.querySelectorAll('input, select, textarea')) {
1382
+ if (!isVisible(input) || input.type === 'hidden') continue;
1383
+ fields.push({
1384
+ selector: bestSelector(input),
1385
+ tag: input.tagName.toLowerCase(),
1386
+ type: input.type || input.tagName.toLowerCase(),
1387
+ name: input.name || undefined,
1388
+ label: getLabel(input) || undefined,
1389
+ required: input.required || undefined,
1390
+ placeholder: input.placeholder || undefined,
1391
+ });
1392
+ }
1393
+ const submitBtn = form.querySelector('button[type="submit"], input[type="submit"]');
1394
+ forms.push({
1395
+ selector: bestSelector(form),
1396
+ action: form.action || undefined,
1397
+ method: form.method || undefined,
1398
+ fields: truncate(fields),
1399
+ submitButton: submitBtn ? { selector: bestSelector(submitBtn), text: submitBtn.textContent?.trim() || submitBtn.value } : undefined,
1400
+ });
1401
+ if (forms.length >= MAX) break;
1402
+ }
1403
+
1404
+ // ── Standalone inputs (outside forms) ──
1405
+ const standaloneInputs = [];
1406
+ for (const input of root.querySelectorAll('input, select, textarea')) {
1407
+ if (!isVisible(input) || input.type === 'hidden' || input.closest('form')) continue;
1408
+ standaloneInputs.push({
1409
+ selector: bestSelector(input),
1410
+ tag: input.tagName.toLowerCase(),
1411
+ type: input.type || input.tagName.toLowerCase(),
1412
+ name: input.name || undefined,
1413
+ label: getLabel(input) || undefined,
1414
+ placeholder: input.placeholder || undefined,
1415
+ });
1416
+ if (standaloneInputs.length >= MAX) break;
1417
+ }
1418
+
1419
+ // ── Buttons ──
1420
+ const buttons = [];
1421
+ for (const btn of root.querySelectorAll('button, [role="button"], input[type="button"], input[type="submit"]')) {
1422
+ if (!isVisible(btn)) continue;
1423
+ buttons.push({
1424
+ selector: bestSelector(btn),
1425
+ text: btn.textContent?.trim() || btn.value || '',
1426
+ type: btn.type || undefined,
1427
+ disabled: btn.disabled || undefined,
1428
+ ariaLabel: btn.getAttribute('aria-label') || undefined,
1429
+ });
1430
+ if (buttons.length >= MAX) break;
1431
+ }
1432
+
1433
+ // ── Links ──
1434
+ const links = [];
1435
+ for (const a of root.querySelectorAll('a[href]')) {
1436
+ if (!isVisible(a)) continue;
1437
+ links.push({
1438
+ selector: bestSelector(a),
1439
+ text: a.textContent?.trim() || '',
1440
+ href: a.getAttribute('href'),
1441
+ });
1442
+ if (links.length >= MAX) break;
1443
+ }
1444
+
1445
+ // ── Navigation regions ──
1446
+ const navigation = [];
1447
+ for (const nav of root.querySelectorAll('nav, [role="navigation"]')) {
1448
+ if (!isVisible(nav)) continue;
1449
+ const items = [];
1450
+ for (const link of nav.querySelectorAll('a, button, [role="tab"], [role="menuitem"]')) {
1451
+ if (!isVisible(link)) continue;
1452
+ items.push({
1453
+ selector: bestSelector(link),
1454
+ text: link.textContent?.trim() || '',
1455
+ href: link.getAttribute('href') || undefined,
1456
+ active: link.classList.contains('active') || link.getAttribute('aria-current') === 'page' || undefined,
1457
+ });
1458
+ }
1459
+ navigation.push({
1460
+ selector: bestSelector(nav),
1461
+ ariaLabel: nav.getAttribute('aria-label') || undefined,
1462
+ items: truncate(items),
1463
+ });
1464
+ if (navigation.length >= MAX) break;
1465
+ }
1466
+
1467
+ // ── Tabs ──
1468
+ const tabs = [];
1469
+ for (const tab of root.querySelectorAll('[role="tab"]')) {
1470
+ if (!isVisible(tab)) continue;
1471
+ tabs.push({
1472
+ selector: bestSelector(tab),
1473
+ text: tab.textContent?.trim() || '',
1474
+ selected: tab.getAttribute('aria-selected') === 'true' || undefined,
1475
+ });
1476
+ if (tabs.length >= MAX) break;
1477
+ }
1478
+
1479
+ // ── Headings ──
1480
+ const headings = [];
1481
+ for (const h of root.querySelectorAll('h1, h2, h3, h4, h5, h6')) {
1482
+ if (!isVisible(h)) continue;
1483
+ headings.push({
1484
+ level: parseInt(h.tagName[1]),
1485
+ text: h.textContent?.trim() || '',
1486
+ selector: bestSelector(h),
1487
+ });
1488
+ if (headings.length >= MAX) break;
1489
+ }
1490
+
1491
+ // ── Tables ──
1492
+ const tables = [];
1493
+ for (const table of root.querySelectorAll('table')) {
1494
+ if (!isVisible(table)) continue;
1495
+ const headers = [...table.querySelectorAll('th')].map(th => th.textContent?.trim());
1496
+ tables.push({
1497
+ selector: bestSelector(table),
1498
+ headers: truncate(headers),
1499
+ rowCount: table.querySelectorAll('tbody tr, tr').length,
1500
+ hasHeader: headers.length > 0,
1501
+ });
1502
+ if (tables.length >= MAX) break;
1503
+ }
1504
+
1505
+ // ── Modals/Dialogs ──
1506
+ const modals = [];
1507
+ for (const modal of root.querySelectorAll('[role="dialog"], dialog, .modal, [class*="modal"], [class*="Modal"]')) {
1508
+ if (!isVisible(modal)) continue;
1509
+ const title = modal.querySelector('[class*="title"], [class*="Title"], h1, h2, h3, [role="heading"]');
1510
+ const closeBtn = modal.querySelector('[aria-label="close"], [aria-label="Close"], button.close, [class*="close"]');
1511
+ modals.push({
1512
+ selector: bestSelector(modal),
1513
+ title: title?.textContent?.trim() || undefined,
1514
+ hasCloseButton: !!closeBtn,
1515
+ closeSelector: closeBtn ? bestSelector(closeBtn) : undefined,
1516
+ });
1517
+ if (modals.length >= MAX) break;
1518
+ }
1519
+
1520
+ // ── Menus/Dropdowns ──
1521
+ const menus = [];
1522
+ for (const menu of root.querySelectorAll('[role="menu"], .dropdown-menu, [class*="dropdown"]')) {
1523
+ if (!isVisible(menu)) continue;
1524
+ const items = [];
1525
+ for (const item of menu.querySelectorAll('[role="menuitem"], [role="menuitemradio"], [role="menuitemcheckbox"], .dropdown-item, [class*="MenuItem"]')) {
1526
+ if (!isVisible(item)) continue;
1527
+ items.push({ text: item.textContent?.trim() || '', selector: bestSelector(item) });
1528
+ }
1529
+ menus.push({
1530
+ selector: bestSelector(menu),
1531
+ items: truncate(items),
1532
+ });
1533
+ if (menus.length >= MAX) break;
1534
+ }
1535
+
1536
+ // ── Alerts/Banners ──
1537
+ const alerts = [];
1538
+ for (const alert of root.querySelectorAll('[role="alert"], [role="status"], .alert, [class*="banner"], [class*="Banner"], [class*="toast"], [class*="Toast"], [class*="notification"], [class*="Notification"]')) {
1539
+ if (!isVisible(alert)) continue;
1540
+ alerts.push({
1541
+ selector: bestSelector(alert),
1542
+ text: alert.textContent?.trim().slice(0, 200) || '',
1543
+ role: alert.getAttribute('role') || undefined,
1544
+ });
1545
+ if (alerts.length >= MAX) break;
1546
+ }
1547
+
1548
+ // ── Significant images (>50px) ──
1549
+ const images = [];
1550
+ for (const img of root.querySelectorAll('img, svg[role="img"], [role="img"]')) {
1551
+ if (!isVisible(img)) continue;
1552
+ const rect = img.getBoundingClientRect();
1553
+ if (rect.width < 50 && rect.height < 50) continue;
1554
+ images.push({
1555
+ selector: bestSelector(img),
1556
+ alt: img.alt || img.getAttribute('aria-label') || undefined,
1557
+ width: Math.round(rect.width),
1558
+ height: Math.round(rect.height),
1559
+ src: img.src ? img.src.slice(0, 200) : undefined,
1560
+ });
1561
+ if (images.length >= MAX) break;
1562
+ }
1563
+
1564
+ return {
1565
+ forms,
1566
+ standaloneInputs: standaloneInputs.length > 0 ? standaloneInputs : undefined,
1567
+ buttons,
1568
+ links,
1569
+ navigation: navigation.length > 0 ? navigation : undefined,
1570
+ tabs: tabs.length > 0 ? tabs : undefined,
1571
+ headings,
1572
+ tables: tables.length > 0 ? tables : undefined,
1573
+ modals: modals.length > 0 ? modals : undefined,
1574
+ menus: menus.length > 0 ? menus : undefined,
1575
+ alerts: alerts.length > 0 ? alerts : undefined,
1576
+ images: images.length > 0 ? images : undefined,
1577
+ stats: {
1578
+ totalForms: forms.length,
1579
+ totalButtons: buttons.length,
1580
+ totalLinks: links.length,
1581
+ totalInputs: forms.reduce((n, f) => n + f.fields.length, 0) + standaloneInputs.length,
1582
+ totalHeadings: headings.length,
1583
+ totalTables: tables.length,
1584
+ totalNavRegions: navigation.length,
1585
+ totalTabs: tabs.length,
1586
+ totalModals: modals.length,
1587
+ totalImages: images.length,
1588
+ },
1589
+ };
1590
+ }
1591
+
1592
+ /**
1593
+ * Analyzes extracted page structure and generates ready-to-use test scaffolds.
1594
+ * Runs on the Node.js side after page.evaluate returns.
1595
+ */
1596
+ function buildSuggestedTests(structure, pageUrl) {
1597
+ const tests = [];
1598
+ const urlPath = (() => { try { return new URL(pageUrl).pathname; } catch { return '/'; } })();
1599
+
1600
+ // Login form detection
1601
+ for (const form of structure.forms || []) {
1602
+ const fields = form.fields || [];
1603
+ const hasPassword = fields.some(f => f.type === 'password');
1604
+ const hasEmail = fields.some(f => f.type === 'email' || f.name === 'email' || (f.label || '').toLowerCase().includes('email'));
1605
+ const hasUsername = fields.some(f => f.name === 'username' || (f.label || '').toLowerCase().includes('user'));
1606
+
1607
+ if (hasPassword && (hasEmail || hasUsername)) {
1608
+ const actions = [{ type: 'goto', value: urlPath }];
1609
+ const emailField = fields.find(f => f.type === 'email' || f.name === 'email' || (f.label || '').toLowerCase().includes('email'));
1610
+ const usernameField = fields.find(f => f.name === 'username' || (f.label || '').toLowerCase().includes('user'));
1611
+ const passwordField = fields.find(f => f.type === 'password');
1612
+ const credential = emailField || usernameField;
1613
+ if (credential) actions.push({ type: 'type', selector: credential.selector, value: 'test@example.com' });
1614
+ if (passwordField) actions.push({ type: 'type', selector: passwordField.selector, value: 'password123' });
1615
+ if (form.submitButton) actions.push({ type: 'click', selector: form.submitButton.selector });
1616
+ actions.push({ type: 'wait', value: '2000' });
1617
+ tests.push({ name: 'login-form-submission', actions });
1618
+ continue;
1619
+ }
1620
+
1621
+ // Generic form fill + submit
1622
+ if (fields.length > 0) {
1623
+ const actions = [{ type: 'goto', value: urlPath }];
1624
+ for (const field of fields.slice(0, 10)) {
1625
+ const val = field.type === 'email' ? 'test@example.com'
1626
+ : field.type === 'number' ? '42'
1627
+ : field.type === 'tel' ? '555-0100'
1628
+ : field.type === 'date' ? '2025-01-15'
1629
+ : field.tag === 'select' ? undefined
1630
+ : field.tag === 'textarea' ? 'Sample text input'
1631
+ : 'Test value';
1632
+ if (val && field.tag !== 'select') {
1633
+ actions.push({ type: 'type', selector: field.selector, value: val });
1634
+ }
1635
+ }
1636
+ if (form.submitButton) actions.push({ type: 'click', selector: form.submitButton.selector });
1637
+ actions.push({ type: 'wait', value: '1000' });
1638
+ tests.push({ name: `form-submission-${tests.length + 1}`, actions });
1639
+ }
1640
+ }
1641
+
1642
+ // Navigation test
1643
+ const navItems = (structure.navigation || []).flatMap(n => n.items || []).filter(i => i.href && i.text);
1644
+ if (navItems.length > 0) {
1645
+ const actions = [{ type: 'goto', value: urlPath }];
1646
+ for (const item of navItems.slice(0, 5)) {
1647
+ actions.push({ type: 'click', selector: item.selector });
1648
+ actions.push({ type: 'wait', value: '1000' });
1649
+ if (item.href && item.href !== '#' && !item.href.startsWith('javascript:')) {
1650
+ actions.push({ type: 'assert_url', value: item.href });
1651
+ }
1652
+ actions.push({ type: 'goto', value: urlPath });
1653
+ }
1654
+ tests.push({ name: 'navigation-links', actions });
1655
+ }
1656
+
1657
+ // Table data assertion
1658
+ for (const table of structure.tables || []) {
1659
+ if (table.rowCount > 0) {
1660
+ tests.push({
1661
+ name: `table-has-data`,
1662
+ actions: [
1663
+ { type: 'goto', value: urlPath },
1664
+ { type: 'wait', selector: table.selector },
1665
+ { type: 'assert_count', selector: `${table.selector} tbody tr`, value: '>=1' },
1666
+ ],
1667
+ });
1668
+ break;
1669
+ }
1670
+ }
1671
+
1672
+ // Tab switching test
1673
+ if ((structure.tabs || []).length >= 2) {
1674
+ const actions = [{ type: 'goto', value: urlPath }];
1675
+ for (const tab of structure.tabs.slice(0, 5)) {
1676
+ actions.push({ type: 'click', selector: tab.selector });
1677
+ actions.push({ type: 'wait', value: '500' });
1678
+ }
1679
+ tests.push({ name: 'tab-switching', actions });
1680
+ }
1681
+
1682
+ // Page structure verification (always generated)
1683
+ const verifyActions = [{ type: 'goto', value: urlPath }];
1684
+ for (const h of (structure.headings || []).filter(h => h.level <= 2).slice(0, 3)) {
1685
+ verifyActions.push({ type: 'assert_text', text: h.text });
1686
+ }
1687
+ if (structure.stats.totalButtons > 0) {
1688
+ const visibleBtns = (structure.buttons || []).filter(b => b.text);
1689
+ for (const btn of visibleBtns.slice(0, 3)) {
1690
+ verifyActions.push({ type: 'assert_visible', selector: btn.selector });
1691
+ }
1692
+ }
1693
+ tests.push({ name: 'page-structure-verification', actions: verifyActions });
1694
+
1695
+ return tests;
1696
+ }
1697
+
1698
+ async function handleAnalyze(args) {
835
1699
  if (!args.url) return errorResult('Missing required parameter: url');
836
1700
 
837
1701
  const config = await loadConfig({}, args.cwd);
1702
+ const poolUrls = getPoolUrls(config);
1703
+ const chosenPool = await selectPool(poolUrls);
838
1704
 
839
- await waitForPool(config.poolUrl);
1705
+ let browser;
1706
+ try {
1707
+ browser = await connectToPool(chosenPool);
1708
+ const page = await browser.newPage();
1709
+ await page.setViewport(config.viewport);
1710
+
1711
+ // Inject auth token into localStorage before navigation
1712
+ const authToken = args.authToken || config.authToken;
1713
+ if (authToken) {
1714
+ const storageKey = args.authStorageKey || config.authStorageKey || 'accessToken';
1715
+ const origin = new URL(args.url).origin;
1716
+ await page.goto(origin, { waitUntil: 'domcontentloaded', timeout: 15000 });
1717
+ await page.evaluate((key, token) => { localStorage.setItem(key, token); }, storageKey, authToken);
1718
+ }
1719
+
1720
+ await page.goto(args.url, { waitUntil: 'networkidle2', timeout: 30000 });
1721
+
1722
+ if (args.selector) {
1723
+ await page.waitForSelector(args.selector, { timeout: 10000 });
1724
+ }
1725
+
1726
+ if (args.delay && args.delay > 0) {
1727
+ await new Promise(r => setTimeout(r, args.delay));
1728
+ }
1729
+
1730
+ // Extract page structure
1731
+ const structure = await page.evaluate(extractPageStructure, args.scope || null, args.maxElements || 50);
1732
+
1733
+ if (structure.error) {
1734
+ return errorResult(structure.error);
1735
+ }
1736
+
1737
+ // Build meta
1738
+ const title = await page.title();
1739
+ const meta = {
1740
+ url: args.url,
1741
+ title,
1742
+ viewport: config.viewport,
1743
+ scope: args.scope || undefined,
1744
+ };
1745
+
1746
+ // Build suggested tests
1747
+ const suggestedTests = buildSuggestedTests(structure, args.url);
1748
+
1749
+ // Optional screenshot (default: true)
1750
+ const includeScreenshot = args.includeScreenshot !== false;
1751
+ let screenshotHash;
1752
+ let screenshotBase64;
1753
+
1754
+ if (includeScreenshot) {
1755
+ const filename = `analyze-${Date.now()}.png`;
1756
+ if (!fs.existsSync(config.screenshotsDir)) {
1757
+ fs.mkdirSync(config.screenshotsDir, { recursive: true });
1758
+ }
1759
+ const screenshotPath = path.join(config.screenshotsDir, filename);
1760
+ await page.screenshot({ path: screenshotPath, fullPage: false });
1761
+
1762
+ const cwd = args.cwd || process.cwd();
1763
+ const projectName = config.projectName || path.basename(cwd);
1764
+ const projectId = ensureProject(cwd, projectName, config.screenshotsDir, config.testsDir);
1765
+ const hash = computeScreenshotHash(screenshotPath);
1766
+ registerScreenshotHash(hash, screenshotPath, projectId, null);
1767
+ screenshotHash = `ss:${hash}`;
1768
+ meta.screenshotHash = screenshotHash;
1769
+
1770
+ const data = fs.readFileSync(screenshotPath);
1771
+ screenshotBase64 = data.toString('base64');
1772
+ }
1773
+
1774
+ const result = { meta, ...structure, suggestedTests };
1775
+ const content = [{ type: 'text', text: JSON.stringify(result, null, 2) }];
1776
+
1777
+ if (screenshotBase64) {
1778
+ content.push({ type: 'image', data: screenshotBase64, mimeType: 'image/png' });
1779
+ }
1780
+
1781
+ return { content };
1782
+ } finally {
1783
+ if (browser) browser.disconnect();
1784
+ }
1785
+ }
1786
+
1787
+ async function handleCapture(args) {
1788
+ if (!args.url) return errorResult('Missing required parameter: url');
1789
+
1790
+ const config = await loadConfig({}, args.cwd);
1791
+ const capturePoolUrls = getPoolUrls(config);
1792
+ const capturePool = await selectPool(capturePoolUrls);
840
1793
 
841
1794
  let browser;
842
1795
  try {
843
- browser = await connectToPool(config.poolUrl);
1796
+ browser = await connectToPool(capturePool);
844
1797
  const page = await browser.newPage();
845
1798
  await page.setViewport(config.viewport);
846
1799
 
@@ -1039,6 +1992,127 @@ async function handleNetworkLogs(args) {
1039
1992
  return textResult(JSON.stringify(results, null, 2));
1040
1993
  }
1041
1994
 
1995
+ async function handleVars(args) {
1996
+ const action = args.action;
1997
+ if (!action) return errorResult('Missing required parameter: action');
1998
+
1999
+ const cwd = args.cwd || process.cwd();
2000
+ const config = await loadConfig({}, cwd);
2001
+ const projectName = config.projectName || cwd.split('/').pop() || 'default';
2002
+ const projectId = ensureProject(cwd, projectName, config.screenshotsDir, config.testsDir);
2003
+ const scope = args.scope || 'project';
2004
+
2005
+ switch (action) {
2006
+ case 'set': {
2007
+ if (!args.key) return errorResult('Missing required parameter: key');
2008
+ if (args.value === undefined) return errorResult('Missing required parameter: value');
2009
+ setVariable(projectId, scope, args.key, args.value);
2010
+ return textResult(`Variable set: ${args.key} (scope: ${scope})`);
2011
+ }
2012
+ case 'get': {
2013
+ if (!args.key) return errorResult('Missing required parameter: key');
2014
+ const vars = getVariables(projectId, scope);
2015
+ if (vars[args.key] !== undefined) {
2016
+ return textResult(JSON.stringify({ key: args.key, value: vars[args.key], scope }));
2017
+ }
2018
+ // Fall back to project scope if not found in specific scope
2019
+ if (scope !== 'project') {
2020
+ const projectVars = getVariables(projectId, 'project');
2021
+ if (projectVars[args.key] !== undefined) {
2022
+ return textResult(JSON.stringify({ key: args.key, value: projectVars[args.key], scope: 'project' }));
2023
+ }
2024
+ }
2025
+ return errorResult(`Variable not found: ${args.key} (scope: ${scope})`);
2026
+ }
2027
+ case 'list': {
2028
+ const all = listVariables(projectId);
2029
+ if (Object.keys(all).length === 0) {
2030
+ return textResult('No variables set for this project.');
2031
+ }
2032
+ return textResult(JSON.stringify(all, null, 2));
2033
+ }
2034
+ case 'delete': {
2035
+ if (!args.key) return errorResult('Missing required parameter: key');
2036
+ const deleted = deleteVariable(projectId, scope, args.key);
2037
+ if (deleted) {
2038
+ return textResult(`Variable deleted: ${args.key} (scope: ${scope})`);
2039
+ }
2040
+ return errorResult(`Variable not found: ${args.key} (scope: ${scope})`);
2041
+ }
2042
+ default:
2043
+ return errorResult(`Unknown action: ${action}. Use set, get, list, or delete.`);
2044
+ }
2045
+ }
2046
+
2047
+ // ── Verification instructions builder ─────────────────────────────────────────
2048
+
2049
+ function buildVerificationInstructions(strictness, hasBaselines, hasChecklists) {
2050
+ const levels = {
2051
+ strict: 'STRICT — No ambiguity allowed. If ANY criterion is unclear, not fully visible, or doubtful, verdict is FAIL. Err on the side of failing.',
2052
+ moderate: 'MODERATE — Use reasonable judgment. Minor cosmetic differences are acceptable, but functional mismatches or missing elements are FAIL.',
2053
+ lenient: 'LENIENT — Only fail on clear, obvious contradictions. Partial matches and minor discrepancies are acceptable.',
2054
+ };
2055
+
2056
+ const lines = [
2057
+ `Verification strictness: ${levels[strictness] || levels.moderate}`,
2058
+ '',
2059
+ 'For each entry in the verifications array:',
2060
+ '',
2061
+ '1. RETRIEVE SCREENSHOTS',
2062
+ ' - Call e2e_screenshot with the screenshotHash (after-state).',
2063
+ ];
2064
+
2065
+ if (hasBaselines) {
2066
+ lines.push(' - If baselineScreenshotHash is present, also call e2e_screenshot with it (before-state).');
2067
+ }
2068
+
2069
+ lines.push(
2070
+ '',
2071
+ '2. EVALUATE',
2072
+ );
2073
+
2074
+ if (hasChecklists) {
2075
+ lines.push(
2076
+ ' - If isChecklist is true, evaluate EACH item in the expect array independently as PASS or FAIL.',
2077
+ ' - If isChecklist is false (or absent), evaluate the single expect description as a whole.',
2078
+ );
2079
+ } else {
2080
+ lines.push(' - Compare the screenshot against the expect description.');
2081
+ }
2082
+
2083
+ if (hasBaselines) {
2084
+ lines.push(
2085
+ '',
2086
+ '3. COMPARE BEFORE/AFTER',
2087
+ ' - If a baseline screenshot was retrieved, describe the state change between baseline and after screenshots.',
2088
+ ' - Verify the state change is consistent with what the test actions intended.',
2089
+ );
2090
+ }
2091
+
2092
+ lines.push(
2093
+ '',
2094
+ `${hasBaselines ? '4' : '3'}. REPORT VERDICT — use this exact format for each test:`,
2095
+ '',
2096
+ ' TEST: <test-name>',
2097
+ ' VERDICT: PASS | FAIL',
2098
+ );
2099
+
2100
+ if (hasBaselines) {
2101
+ lines.push(' STATE CHANGE: <one-line description of what changed from baseline to after>');
2102
+ }
2103
+
2104
+ if (hasChecklists) {
2105
+ lines.push(
2106
+ ' CRITERIA:',
2107
+ ' - "<criterion text>": PASS | FAIL (reason if FAIL)',
2108
+ );
2109
+ }
2110
+
2111
+ lines.push(' REASON: <brief explanation of the verdict>');
2112
+
2113
+ return lines.join('\n');
2114
+ }
2115
+
1042
2116
  // ── Helpers ───────────────────────────────────────────────────────────────────
1043
2117
 
1044
2118
  export function textResult(text) {
@@ -1074,12 +2148,16 @@ export async function dispatchTool(name, args = {}) {
1074
2148
  return await handleCreateModule(args);
1075
2149
  case 'e2e_capture':
1076
2150
  return await handleCapture(args);
2151
+ case 'e2e_analyze':
2152
+ return await handleAnalyze(args);
1077
2153
  case 'e2e_learnings':
1078
2154
  return await handleLearnings(args);
1079
2155
  case 'e2e_neo4j':
1080
2156
  return await handleNeo4j(args);
1081
2157
  case 'e2e_network_logs':
1082
2158
  return await handleNetworkLogs(args);
2159
+ case 'e2e_vars':
2160
+ return await handleVars(args);
1083
2161
  default:
1084
2162
  return errorResult(`Unknown tool: ${name}`);
1085
2163
  }