@matware/e2e-runner 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/.claude-plugin/marketplace.json +21 -0
  2. package/.claude-plugin/plugin.json +9 -0
  3. package/.mcp.json +9 -0
  4. package/.opencode/commands/create-test.md +63 -0
  5. package/.opencode/commands/run.md +50 -0
  6. package/.opencode/commands/verify-issue.md +62 -0
  7. package/.opencode/skills/e2e-testing/SKILL.md +181 -0
  8. package/.opencode/skills/e2e-testing/references/action-types.md +143 -0
  9. package/.opencode/skills/e2e-testing/references/auth-strategies.md +91 -0
  10. package/.opencode/skills/e2e-testing/references/graphql.md +59 -0
  11. package/.opencode/skills/e2e-testing/references/issue-verification.md +59 -0
  12. package/.opencode/skills/e2e-testing/references/multi-pool.md +60 -0
  13. package/.opencode/skills/e2e-testing/references/network-debugging.md +62 -0
  14. package/.opencode/skills/e2e-testing/references/test-json-format.md +163 -0
  15. package/.opencode/skills/e2e-testing/references/troubleshooting.md +224 -0
  16. package/.opencode/skills/e2e-testing/references/variables.md +41 -0
  17. package/.opencode/skills/e2e-testing/references/visual-verification.md +89 -0
  18. package/OPENCODE.md +166 -0
  19. package/README.md +990 -296
  20. package/agents/test-analyzer.md +81 -0
  21. package/agents/test-creator.md +155 -0
  22. package/agents/test-improver.md +177 -0
  23. package/bin/cli.js +602 -22
  24. package/commands/create-test.md +65 -0
  25. package/commands/run.md +49 -0
  26. package/commands/verify-issue.md +63 -0
  27. package/opencode.json +11 -0
  28. package/package.json +15 -2
  29. package/scripts/setup-opencode.sh +113 -0
  30. package/skills/e2e-testing/SKILL.md +173 -0
  31. package/skills/e2e-testing/references/action-types.md +143 -0
  32. package/skills/e2e-testing/references/auth-strategies.md +91 -0
  33. package/skills/e2e-testing/references/graphql.md +59 -0
  34. package/skills/e2e-testing/references/issue-verification.md +59 -0
  35. package/skills/e2e-testing/references/multi-pool.md +60 -0
  36. package/skills/e2e-testing/references/network-debugging.md +62 -0
  37. package/skills/e2e-testing/references/test-json-format.md +163 -0
  38. package/skills/e2e-testing/references/troubleshooting.md +224 -0
  39. package/skills/e2e-testing/references/variables.md +41 -0
  40. package/skills/e2e-testing/references/visual-verification.md +89 -0
  41. package/src/actions.js +597 -20
  42. package/src/ai-generate.js +142 -12
  43. package/src/config.js +171 -0
  44. package/src/dashboard.js +299 -17
  45. package/src/db.js +335 -13
  46. package/src/index.js +15 -8
  47. package/src/learner-markdown.js +177 -0
  48. package/src/learner-neo4j.js +255 -0
  49. package/src/learner-sqlite.js +658 -0
  50. package/src/learner.js +418 -0
  51. package/src/mcp-tools.js +1558 -50
  52. package/src/module-resolver.js +310 -0
  53. package/src/narrate.js +262 -0
  54. package/src/neo4j-pool.js +124 -0
  55. package/src/pool-manager.js +223 -0
  56. package/src/reporter.js +117 -3
  57. package/src/runner.js +274 -71
  58. package/src/sync/auth.js +354 -0
  59. package/src/sync/client.js +572 -0
  60. package/src/sync/hub-routes.js +816 -0
  61. package/src/sync/index.js +68 -0
  62. package/src/sync/middleware.js +347 -0
  63. package/src/sync/queue.js +209 -0
  64. package/src/sync/schema.js +540 -0
  65. package/src/verify.js +14 -9
  66. package/src/watch.js +384 -0
  67. package/templates/build-dashboard.js +69 -0
  68. package/templates/dashboard/js/api.js +60 -0
  69. package/templates/dashboard/js/init.js +13 -0
  70. package/templates/dashboard/js/keyboard.js +46 -0
  71. package/templates/dashboard/js/state.js +40 -0
  72. package/templates/dashboard/js/toast.js +41 -0
  73. package/templates/dashboard/js/utils.js +196 -0
  74. package/templates/dashboard/js/view-live.js +143 -0
  75. package/templates/dashboard/js/view-runs.js +572 -0
  76. package/templates/dashboard/js/view-tests.js +294 -0
  77. package/templates/dashboard/js/view-watch.js +242 -0
  78. package/templates/dashboard/js/websocket.js +110 -0
  79. package/templates/dashboard/styles/base.css +69 -0
  80. package/templates/dashboard/styles/components.css +110 -0
  81. package/templates/dashboard/styles/view-live.css +74 -0
  82. package/templates/dashboard/styles/view-runs.css +207 -0
  83. package/templates/dashboard/styles/view-tests.css +96 -0
  84. package/templates/dashboard/styles/view-watch.css +53 -0
  85. package/templates/dashboard/template.html +267 -0
  86. package/templates/dashboard.html +2171 -530
  87. package/templates/docker-compose-neo4j.yml +19 -0
  88. package/templates/e2e.config.js +3 -0
  89. package/templates/sample-test.json +0 -8
package/src/mcp-tools.js CHANGED
@@ -13,14 +13,20 @@ import path from 'path';
13
13
  import http from 'http';
14
14
 
15
15
  import { loadConfig } from './config.js';
16
- import { waitForPool, getPoolStatus, connectToPool } from './pool.js';
16
+ import { connectToPool } from './pool.js';
17
+ import { waitForAnyPool, getPoolUrls, getAggregatedPoolStatus, selectPool } from './pool-manager.js';
17
18
  import { runTestsParallel, loadTestFile, loadTestSuite, loadAllSuites, listSuites } from './runner.js';
18
19
  import { generateReport, saveReport, persistRun } from './reporter.js';
20
+ import { narrateTest } from './narrate.js';
19
21
  import { startDashboard, stopDashboard } from './dashboard.js';
20
- import { lookupScreenshotHash, ensureProject, computeScreenshotHash, registerScreenshotHash } from './db.js';
22
+ import { lookupScreenshotHash, ensureProject, computeScreenshotHash, registerScreenshotHash, getNetworkLogs, setVariable, getVariables, deleteVariable, listVariables } from './db.js';
21
23
  import { fetchIssue, checkCliAuth, detectProvider } from './issues.js';
22
24
  import { buildPrompt, hasApiKey } from './ai-generate.js';
23
25
  import { verifyIssue } from './verify.js';
26
+ import { listModules } from './module-resolver.js';
27
+ import { getLearningsSummary, getFlakySummary, getSelectorStability, getPageHealth, getApiHealth, getErrorPatterns, getTestTrends, getRunInsights, getTestHistory, getPageHistory, getSelectorHistory, getHealthSnapshot, getTestCreationContext, generateImprovements } from './learner-sqlite.js';
28
+ import { queryGraph } from './learner-neo4j.js';
29
+ import { startNeo4j, stopNeo4j, getNeo4jStatus } from './neo4j-pool.js';
24
30
 
25
31
  // ── Tool definitions ──────────────────────────────────────────────────────────
26
32
 
@@ -60,6 +66,11 @@ export const TOOLS = [
60
66
  type: 'boolean',
61
67
  description: 'Fail tests when network requests fail (e.g. ERR_CONNECTION_REFUSED). Default: false.',
62
68
  },
69
+ verificationStrictness: {
70
+ type: 'string',
71
+ enum: ['strict', 'moderate', 'lenient'],
72
+ description: 'Visual verification strictness. strict: no ambiguity allowed, any doubt = FAIL. moderate: reasonable judgment (default). lenient: only fail on clear contradictions.',
73
+ },
63
74
  cwd: {
64
75
  type: 'string',
65
76
  description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
@@ -84,35 +95,94 @@ export const TOOLS = [
84
95
  {
85
96
  name: 'e2e_create_test',
86
97
  description:
87
- 'Create a new E2E test JSON file. Provide the suite name and an array of test objects, each with a name and actions array.',
98
+ `Create a new E2E test JSON file. IMPORTANT: prefer built-in actions over evaluate blocks.
99
+
100
+ ## Action selection guide (use instead of evaluate)
101
+
102
+ **Clicking elements by text** — DON'T write evaluate to find+click elements:
103
+ click: { type: "click", text: "Submit" } — searches button, a, [role=tab], span, etc.
104
+ click_regex: { type: "click_regex", text: "save|guardar" } — regex match, case-insensitive
105
+ click_menu_item: { type: "click_menu_item", text: "Delete" } — [role=menuitem], .MenuItem, etc.
106
+ click_option: { type: "click_option", text: "Option A" } — [role=option] in dropdowns
107
+ click_chip: { type: "click_chip", text: "Active" } — MUI Chip / tag elements
108
+ click_icon: { type: "click_icon", value: "edit" } — SVG/icon by data-testid, aria-label, class
109
+ click_in_context:{ type: "click_in_context", text: "Row text", selector: "button" } — child within container
110
+
111
+ **Asserting text presence/absence** — DON'T write evaluate with body.includes():
112
+ assert_text: { type: "assert_text", text: "Welcome" } — text IS on page (case-sensitive). Uses: text
113
+ assert_no_text: { type: "assert_no_text", text: "Error" } — text is NOT on page. Uses: text
114
+ assert_text_in: { type: "assert_text_in", selector: "[class*='Drawer']", text: "profesional|doctor" }
115
+ — scoped regex in container (case-insensitive default). Uses: selector + text (+ value:"exact")
116
+
117
+ **Asserting elements** — DON'T write evaluate to count or check visibility:
118
+ assert_visible: { type: "assert_visible", selector: ".modal" } — Uses: selector (NOT text)
119
+ assert_not_visible: { type: "assert_not_visible", selector: ".loader" } — Uses: selector (NOT text)
120
+ assert_count: { type: "assert_count", selector: "input", value: ">= 2" } — Uses: selector + value
121
+ assert_element_text: { type: "assert_element_text", selector: "h1", text: "Dashboard" } — Uses: selector + text
122
+ assert_matches: { type: "assert_matches", selector: ".date", value: "\\\\d{2}/\\\\d{2}" } — Uses: selector + value (regex)
123
+ assert_attribute: { type: "assert_attribute", selector: "button", value: "disabled" } — Uses: selector + value
124
+ assert_url: { type: "assert_url", value: "/dashboard" } — Uses: value
125
+ assert_input_value: { type: "assert_input_value", selector: "#email", value: "@" } — Uses: selector + value
126
+
127
+ IMPORTANT field rules:
128
+ - assert_text / assert_no_text: use "text" field only (checks full page body)
129
+ - assert_visible / assert_not_visible: use "selector" field only (CSS selector, NOT text)
130
+ - To verify text absence: use assert_no_text (NOT assert_not_visible with text)
131
+
132
+ **Navigation & waiting** — DON'T write evaluate with setTimeout polling:
133
+ goto: { type: "goto", value: "/login" } — full page navigation
134
+ navigate: { type: "navigate", value: "/settings" } — SPA-friendly (won't fail if no page load)
135
+ wait: { type: "wait", text: "Loading complete" } — wait for text to appear in body
136
+ wait: { type: "wait", selector: ".results" } — wait for element to appear
137
+ wait: { type: "wait", value: "2000" } — fixed delay (avoid when possible)
138
+ wait_network_idle: { type: "wait_network_idle", value: "500" } — wait until no network for N ms
139
+
140
+ **Form interaction** — DON'T write evaluate with native value setters (unless React):
141
+ type: { type: "type", selector: "#email", value: "a@b.com" } — clears + types
142
+ type_react: { type: "type_react", selector: "#email", value: "a@b.com" } — for React controlled inputs
143
+ select: { type: "select", selector: "select#country", value: "US" }
144
+ clear: { type: "clear", selector: "#search" }
145
+ press: { type: "press", value: "Enter" }
146
+ focus_autocomplete: { type: "focus_autocomplete", text: "City" } — focus MUI Autocomplete by label
147
+
148
+ **When evaluate IS appropriate**: computed styles, complex conditional logic, GraphQL via window.__e2eGql, math calculations, reading window/app state.
149
+
150
+ ## Modules
151
+ Use { "$use": "module-name", "params": {...} } to reference reusable modules from e2e/modules/. Modules compose — a module can $use other modules. Check e2e_list to see available modules for the project.`,
88
152
  inputSchema: {
89
153
  type: 'object',
90
154
  properties: {
91
155
  name: {
92
156
  type: 'string',
93
- description: 'Suite file name without .json extension (e.g. "login", "05-checkout")',
157
+ description: 'Suite file name without .json extension (e.g. "login-flow", "issue-1743-sidebar")',
94
158
  },
95
159
  tests: {
96
160
  type: 'array',
97
- description: 'Array of test objects with { name, actions }',
161
+ description: 'Array of test objects with { name, actions, expect }',
98
162
  items: {
99
163
  type: 'object',
100
164
  properties: {
101
- name: { type: 'string', description: 'Test name' },
102
- expect: { type: 'string', description: 'Human-readable description of the expected visual outcome. After the test runs, a verification screenshot is captured and Claude Code judges pass/fail against this description.' },
165
+ name: { type: 'string', description: 'Test name — descriptive of what is being verified' },
166
+ expect: {
167
+ oneOf: [
168
+ { type: 'string', description: 'Single description of expected visual outcome.' },
169
+ { type: 'array', items: { type: 'string' }, description: 'Checklist of criteria — each evaluated independently as PASS/FAIL.' },
170
+ ],
171
+ description: 'Expected visual outcome. String for free-form, array for per-criterion checklist.',
172
+ },
103
173
  actions: {
104
174
  type: 'array',
105
- description: 'Sequential browser actions',
175
+ description: 'Sequential browser actions. Prefer built-in action types over evaluate — see tool description for the full guide.',
106
176
  items: {
107
177
  type: 'object',
108
178
  properties: {
109
179
  type: {
110
180
  type: 'string',
111
- description: 'Action type: goto, click, type, wait, assert_text, assert_url, assert_visible, assert_count, screenshot, select, clear, press, scroll, hover, evaluate, navigate',
181
+ description: 'Action type. Prefer declarative actions (assert_text, assert_no_text, click, assert_visible, assert_count, assert_text_in, click_menu_item, etc.) over evaluate.',
112
182
  },
113
- selector: { type: 'string', description: 'CSS selector' },
114
- value: { type: 'string', description: 'Value for the action' },
115
- text: { type: 'string', description: 'Text content to match' },
183
+ selector: { type: 'string', description: 'CSS selector (supports compound selectors like "[class*=\'Drawer\'], [role=\'presentation\']")' },
184
+ value: { type: 'string', description: 'Value — varies by action type (URL for goto, ms for wait, regex for assert_matches, ">= N" for assert_count)' },
185
+ text: { type: 'string', description: 'Text to match — used by click (substring), assert_text/assert_no_text (substring on body), assert_text_in (regex), click_regex (regex). NOT used by assert_visible/assert_not_visible (use selector instead).' },
116
186
  },
117
187
  required: ['type'],
118
188
  },
@@ -123,7 +193,7 @@ export const TOOLS = [
123
193
  },
124
194
  hooks: {
125
195
  type: 'object',
126
- description: 'Optional hooks: beforeAll, afterAll, beforeEach, afterEach (each an array of actions)',
196
+ description: 'Optional hooks: beforeAll, afterAll, beforeEach, afterEach (each an array of actions). Note: beforeAll runs on a SEPARATE page that is closed before tests — use beforeEach for auth/setup.',
127
197
  properties: {
128
198
  beforeAll: { type: 'array', items: { type: 'object' } },
129
199
  afterAll: { type: 'array', items: { type: 'object' } },
@@ -211,6 +281,11 @@ export const TOOLS = [
211
281
  description:
212
282
  'prompt = return issue + prompt for Claude Code to create tests (default). verify = auto-generate tests via Claude API and run them.',
213
283
  },
284
+ testType: {
285
+ type: 'string',
286
+ enum: ['e2e', 'api'],
287
+ description: "Test category: 'e2e' (default) for UI-driven tests, 'api' for backend API tests",
288
+ },
214
289
  authToken: {
215
290
  type: 'string',
216
291
  description: 'JWT or auth token to inject into localStorage before running tests (for authenticated apps)',
@@ -254,6 +329,14 @@ export const TOOLS = [
254
329
  type: 'number',
255
330
  description: 'Wait N milliseconds after page load before capturing (default: 0)',
256
331
  },
332
+ authToken: {
333
+ type: 'string',
334
+ description: 'JWT or auth token to inject into localStorage before navigating (for authenticated pages)',
335
+ },
336
+ authStorageKey: {
337
+ type: 'string',
338
+ description: 'localStorage key name for the auth token (default: "accessToken")',
339
+ },
257
340
  cwd: {
258
341
  type: 'string',
259
342
  description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
@@ -262,6 +345,227 @@ export const TOOLS = [
262
345
  required: ['url'],
263
346
  },
264
347
  },
348
+ {
349
+ name: 'e2e_analyze',
350
+ description:
351
+ 'Analyze a page\'s structure and return all interactive elements (forms, buttons, links, navigation, tables, modals, etc.) with their CSS selectors, plus suggested test scaffolds. One call replaces the entire screenshot→guess-selectors→retry cycle.',
352
+ inputSchema: {
353
+ type: 'object',
354
+ properties: {
355
+ url: {
356
+ type: 'string',
357
+ description: 'Full URL to analyze (e.g. "https://example.com" or "http://host.docker.internal:3000/dashboard")',
358
+ },
359
+ scope: {
360
+ type: 'string',
361
+ description: 'CSS selector to limit analysis to a section (e.g. "#sidebar", ".modal-content")',
362
+ },
363
+ maxElements: {
364
+ type: 'number',
365
+ description: 'Max elements per category (default: 50). Lower values produce smaller responses.',
366
+ },
367
+ includeScreenshot: {
368
+ type: 'boolean',
369
+ description: 'Include a screenshot alongside the JSON analysis (default: true)',
370
+ },
371
+ selector: {
372
+ type: 'string',
373
+ description: 'Wait for this CSS selector before analyzing',
374
+ },
375
+ delay: {
376
+ type: 'number',
377
+ description: 'Wait N milliseconds after page load before analyzing (default: 0)',
378
+ },
379
+ authToken: {
380
+ type: 'string',
381
+ description: 'JWT or auth token to inject into localStorage before navigating (for authenticated pages)',
382
+ },
383
+ authStorageKey: {
384
+ type: 'string',
385
+ description: 'localStorage key name for the auth token (default: "accessToken")',
386
+ },
387
+ cwd: {
388
+ type: 'string',
389
+ description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
390
+ },
391
+ },
392
+ required: ['url'],
393
+ },
394
+ },
395
+ {
396
+ name: 'e2e_create_module',
397
+ description:
398
+ `Create a reusable module for E2E tests. Modules encapsulate repeated action sequences referenced via { "$use": "module-name", "params": {...} }.
399
+
400
+ Good module candidates: auth setup, page navigation, tab clicking, opening sidebars/drawers, form fill sequences, cleanup routines. Modules can compose — a module can $use other modules. Params use {{paramName}} mustache syntax in action fields. Extract a module when you see the same 2+ action sequence in multiple tests.`,
401
+ inputSchema: {
402
+ type: 'object',
403
+ properties: {
404
+ name: {
405
+ type: 'string',
406
+ description: 'Module name (used in $use references, e.g. "auth-jwt", "navigate-patient")',
407
+ },
408
+ description: {
409
+ type: 'string',
410
+ description: 'Human-readable description of what this module does',
411
+ },
412
+ params: {
413
+ type: 'object',
414
+ description: 'Parameter definitions. Each key is a param name, value is { required: boolean, default?: string, description?: string }',
415
+ additionalProperties: {
416
+ type: 'object',
417
+ properties: {
418
+ required: { type: 'boolean' },
419
+ default: { type: 'string' },
420
+ description: { type: 'string' },
421
+ },
422
+ },
423
+ },
424
+ actions: {
425
+ type: 'array',
426
+ description: 'Sequential actions with {{param}} placeholders for substitution',
427
+ items: {
428
+ type: 'object',
429
+ properties: {
430
+ type: { type: 'string', description: 'Action type (goto, click, evaluate, wait, etc.) or omit for $use references' },
431
+ selector: { type: 'string' },
432
+ value: { type: 'string' },
433
+ text: { type: 'string' },
434
+ $use: { type: 'string', description: 'Reference another module by name' },
435
+ params: { type: 'object', description: 'Parameters for nested $use' },
436
+ },
437
+ },
438
+ },
439
+ cwd: {
440
+ type: 'string',
441
+ description: 'Absolute path to the project root directory.',
442
+ },
443
+ },
444
+ required: ['name', 'actions'],
445
+ },
446
+ },
447
+ {
448
+ name: 'e2e_learnings',
449
+ description:
450
+ 'Query the E2E learning system for insights about test stability, flaky tests, selector health, page health, API health, error patterns, and trends. Builds knowledge across runs.',
451
+ inputSchema: {
452
+ type: 'object',
453
+ properties: {
454
+ query: {
455
+ type: 'string',
456
+ description: 'What to query: "summary" (full overview), "flaky" (flaky tests), "selectors" (selector stability), "pages" (page health), "apis" (API health), "errors" (error patterns), "trends" (7-day trend). Drill-down: "test:<name>", "page:<path>", "selector:<value>".',
457
+ },
458
+ days: {
459
+ type: 'number',
460
+ description: 'Analysis window in days (default: 30)',
461
+ },
462
+ cwd: {
463
+ type: 'string',
464
+ description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
465
+ },
466
+ },
467
+ required: ['query'],
468
+ },
469
+ },
470
+ {
471
+ name: 'e2e_neo4j',
472
+ description:
473
+ 'Manage the Neo4j knowledge graph container for E2E learnings. Requires Docker.',
474
+ inputSchema: {
475
+ type: 'object',
476
+ properties: {
477
+ action: {
478
+ type: 'string',
479
+ enum: ['start', 'stop', 'status'],
480
+ description: 'Container lifecycle action',
481
+ },
482
+ cwd: {
483
+ type: 'string',
484
+ description: 'Absolute path to the project root directory.',
485
+ },
486
+ },
487
+ required: ['action'],
488
+ },
489
+ },
490
+ {
491
+ name: 'e2e_network_logs',
492
+ description:
493
+ 'Query network request/response logs for a specific test run. Returns filtered logs from SQLite. Use the runDbId from e2e_run results to drill down into network details on demand.',
494
+ inputSchema: {
495
+ type: 'object',
496
+ properties: {
497
+ runDbId: {
498
+ type: 'number',
499
+ description: 'The run database ID (returned by e2e_run in the summary)',
500
+ },
501
+ testName: {
502
+ type: 'string',
503
+ description: 'Filter by test name',
504
+ },
505
+ method: {
506
+ type: 'string',
507
+ description: 'Filter by HTTP method (GET, POST, etc.)',
508
+ },
509
+ statusMin: {
510
+ type: 'number',
511
+ description: 'Minimum HTTP status code (e.g. 400 for errors only)',
512
+ },
513
+ statusMax: {
514
+ type: 'number',
515
+ description: 'Maximum HTTP status code',
516
+ },
517
+ urlPattern: {
518
+ type: 'string',
519
+ description: 'Regex pattern to match against request URLs',
520
+ },
521
+ errorsOnly: {
522
+ type: 'boolean',
523
+ description: 'Only return requests with status >= 400',
524
+ },
525
+ includeHeaders: {
526
+ type: 'boolean',
527
+ description: 'Include request/response headers (default: false)',
528
+ },
529
+ includeBodies: {
530
+ type: 'boolean',
531
+ description: 'Include request/response bodies (default: false, implies includeHeaders)',
532
+ },
533
+ },
534
+ required: ['runDbId'],
535
+ },
536
+ },
537
+ {
538
+ name: 'e2e_vars',
539
+ description:
540
+ 'Manage project variables stored in SQLite. Variables can be referenced in test JSON as {{var.KEY}}. Supports project-wide and per-suite scoping.',
541
+ inputSchema: {
542
+ type: 'object',
543
+ properties: {
544
+ action: {
545
+ type: 'string',
546
+ enum: ['set', 'get', 'list', 'delete'],
547
+ description: 'Action to perform: set (upsert), get (one key), list (all), delete',
548
+ },
549
+ key: {
550
+ type: 'string',
551
+ description: 'Variable name (required for set, get, delete)',
552
+ },
553
+ value: {
554
+ type: 'string',
555
+ description: 'Variable value (required for set)',
556
+ },
557
+ scope: {
558
+ type: 'string',
559
+ description: 'Scope: "project" (default) or a suite name for suite-specific override',
560
+ },
561
+ cwd: {
562
+ type: 'string',
563
+ description: 'Absolute path to the project root directory.',
564
+ },
565
+ },
566
+ required: ['action'],
567
+ },
568
+ },
265
569
  ];
266
570
 
267
571
  /** Tools exposed on the dashboard — excludes dashboard start/stop (already running). */
@@ -311,22 +615,23 @@ async function handleRun(args) {
311
615
  if (args.baseUrl) configOverrides.baseUrl = args.baseUrl;
312
616
  if (args.retries !== undefined) configOverrides.retries = args.retries;
313
617
  if (args.failOnNetworkError !== undefined) configOverrides.failOnNetworkError = args.failOnNetworkError;
618
+ if (args.verificationStrictness) configOverrides.verificationStrictness = args.verificationStrictness;
314
619
 
315
620
  const config = await loadConfig(configOverrides, args.cwd);
316
621
  config.triggeredBy = 'mcp';
317
622
 
318
- await waitForPool(config.poolUrl);
623
+ await waitForAnyPool(getPoolUrls(config));
319
624
 
320
625
  let tests, hooks;
321
626
 
322
627
  if (args.all) {
323
- ({ tests, hooks } = loadAllSuites(config.testsDir));
628
+ ({ tests, hooks } = loadAllSuites(config.testsDir, config.modulesDir, config.exclude));
324
629
  } else if (args.suite) {
325
- ({ tests, hooks } = loadTestSuite(args.suite, config.testsDir));
630
+ ({ tests, hooks } = loadTestSuite(args.suite, config.testsDir, config.modulesDir));
326
631
  } else if (args.file) {
327
632
  const cwd = args.cwd || process.cwd();
328
633
  const filePath = path.isAbsolute(args.file) ? args.file : path.resolve(cwd, args.file);
329
- ({ tests, hooks } = loadTestFile(filePath));
634
+ ({ tests, hooks } = loadTestFile(filePath, config.modulesDir));
330
635
  } else {
331
636
  return errorResult('Provide one of: all (true), suite (name), or file (path)');
332
637
  }
@@ -348,7 +653,12 @@ async function handleRun(args) {
348
653
 
349
654
  const report = generateReport(results);
350
655
  saveReport(report, config.screenshotsDir, config);
351
- persistRun(report, config, args.suite || null);
656
+ // Derive suite name: explicit suite > file basename > null (for "all")
657
+ let suiteName = args.suite || null;
658
+ if (!suiteName && args.file) {
659
+ suiteName = path.basename(args.file, '.json');
660
+ }
661
+ const { runDbId } = await persistRun(report, config, suiteName);
352
662
 
353
663
  const failures = report.results
354
664
  .filter(r => !r.success)
@@ -366,6 +676,7 @@ async function handleRun(args) {
366
676
  ...report.summary,
367
677
  reportPath: path.join(config.screenshotsDir, 'report.json'),
368
678
  };
679
+ if (runDbId) summary.runDbId = runDbId;
369
680
 
370
681
  const consoleErrors = report.results
371
682
  .filter(r => r.consoleLogs?.some(l => l.type === 'error' || l.type === 'warning'))
@@ -374,27 +685,118 @@ async function handleRun(args) {
374
685
  .filter(r => r.networkErrors?.length > 0)
375
686
  .map(r => ({ name: r.name, errors: r.networkErrors }));
376
687
 
377
- const networkLogs = report.results
688
+ // Compact network summary — full logs available on-demand via e2e_network_logs
689
+ const networkSummary = report.results
378
690
  .filter(r => r.networkLogs?.length > 0)
379
- .map(r => ({ name: r.name, requests: r.networkLogs }));
691
+ .map(r => {
692
+ const logs = r.networkLogs;
693
+ const statusDist = { '2xx': 0, '3xx': 0, '4xx': 0, '5xx': 0, other: 0 };
694
+ let totalDuration = 0;
695
+ for (const l of logs) {
696
+ const s = l.status;
697
+ if (s >= 200 && s < 300) statusDist['2xx']++;
698
+ else if (s >= 300 && s < 400) statusDist['3xx']++;
699
+ else if (s >= 400 && s < 500) statusDist['4xx']++;
700
+ else if (s >= 500 && s < 600) statusDist['5xx']++;
701
+ else statusDist.other++;
702
+ totalDuration += l.duration || 0;
703
+ }
704
+ const failed = logs.filter(l => l.status >= 400).map(l => ({ url: l.url, method: l.method, status: l.status }));
705
+ const slowest = [...logs].sort((a, b) => (b.duration || 0) - (a.duration || 0)).slice(0, 3).map(l => ({ url: l.url, method: l.method, status: l.status, duration: l.duration }));
706
+ return {
707
+ name: r.name,
708
+ totalRequests: logs.length,
709
+ statusDistribution: statusDist,
710
+ avgDurationMs: logs.length > 0 ? Math.round(totalDuration / logs.length) : 0,
711
+ failedRequests: failed,
712
+ slowestRequests: slowest,
713
+ };
714
+ });
380
715
 
381
716
  const verifications = report.results
382
717
  .filter(r => r.expect && r.verificationScreenshot)
383
- .map(r => ({
384
- name: r.name,
385
- expect: r.expect,
386
- success: r.success,
387
- screenshotHash: 'ss:' + computeScreenshotHash(r.verificationScreenshot),
388
- }));
718
+ .map(r => {
719
+ const entry = {
720
+ name: r.name,
721
+ expect: r.expect,
722
+ success: r.success,
723
+ screenshotHash: 'ss:' + computeScreenshotHash(r.verificationScreenshot),
724
+ };
725
+ if (r.baselineScreenshot) {
726
+ entry.baselineScreenshotHash = 'ss:' + computeScreenshotHash(r.baselineScreenshot);
727
+ }
728
+ if (Array.isArray(r.expect)) {
729
+ entry.isChecklist = true;
730
+ }
731
+ return entry;
732
+ });
389
733
 
390
734
  if (flaky.length > 0) summary.flaky = flaky;
391
735
  if (failures.length > 0) summary.failures = failures;
392
736
  if (consoleErrors.length > 0) summary.consoleErrors = consoleErrors;
393
- if (networkErrors.length > 0) summary.networkErrors = networkErrors;
394
- if (networkLogs.length > 0) summary.networkLogs = networkLogs;
737
+ if (networkErrors.length > 0) {
738
+ summary.networkErrors = networkErrors;
739
+ // Warn when tests pass but have network errors and failOnNetworkError is off
740
+ if (!config.failOnNetworkError) {
741
+ const totalNetErrors = networkErrors.reduce((sum, r) => sum + r.errors.length, 0);
742
+ const passingWithErrors = networkErrors.filter(r => report.results.find(rr => rr.name === r.name)?.success).length;
743
+ if (passingWithErrors > 0) {
744
+ summary.networkWarning = `⚠️ ${passingWithErrors} test(s) PASSED but had ${totalNetErrors} network error(s). Set failOnNetworkError: true to fail these tests.`;
745
+ }
746
+ }
747
+ }
748
+ if (networkSummary.length > 0) {
749
+ summary.networkSummary = networkSummary;
750
+ if (runDbId) summary.networkLogsHint = 'Full network logs available via e2e_network_logs tool using the runDbId above.';
751
+ }
395
752
  if (verifications.length > 0) {
396
753
  summary.verifications = verifications;
397
- summary.verificationInstructions = 'For each verification, call e2e_screenshot with the screenshotHash to view the screenshot. Then compare what you see against the "expect" description. Report any mismatches as FAIL.';
754
+ const hasBaselines = verifications.some(v => v.baselineScreenshotHash);
755
+ const hasChecklists = verifications.some(v => v.isChecklist);
756
+ summary.verificationInstructions = buildVerificationInstructions(config.verificationStrictness || 'moderate', hasBaselines, hasChecklists);
757
+ }
758
+
759
+ // Build per-test narrative: a step-by-step human-readable story of what happened
760
+ const narratives = report.results.map(r => ({
761
+ name: r.name,
762
+ status: r.success ? 'PASSED' : 'FAILED',
763
+ steps: narrateTest(r),
764
+ }));
765
+ if (narratives.length > 0) summary.narratives = narratives;
766
+
767
+ // Enrich with learning insights + health snapshot (fire-and-forget — never fails the response)
768
+ if (config.learningsEnabled !== false) {
769
+ try {
770
+ const projectId = ensureProject(config._cwd, config.projectName, config.screenshotsDir, config.testsDir);
771
+
772
+ // Always include health snapshot (~200 bytes) for project context
773
+ const health = getHealthSnapshot(projectId);
774
+ if (health) {
775
+ summary.healthSnapshot = health;
776
+ summary.learningsHint = "Use e2e_learnings tool with query 'summary' for full analysis.";
777
+ }
778
+
779
+ // Contextual insights for this specific run
780
+ const insights = getRunInsights(projectId, report);
781
+ if (insights.length > 0) {
782
+ summary.learnings = {
783
+ insights,
784
+ tip: insights.find(i => i.type === 'new-failure')
785
+ ? 'New test failure detected — this test was previously stable. Check recent code changes.'
786
+ : insights.find(i => i.type === 'unstable-selectors')
787
+ ? 'Unstable selectors detected in this run. Consider using more specific selectors or data-testid attributes.'
788
+ : insights.find(i => i.type === 'flaky')
789
+ ? 'Known flaky tests in this run. Consider increasing timeouts or adding waits.'
790
+ : null,
791
+ };
792
+ }
793
+
794
+ // Actionable improvements from cross-referencing this run with historical data
795
+ const improvements = generateImprovements(projectId, report);
796
+ if (improvements.length > 0) {
797
+ summary.improvements = improvements;
798
+ }
799
+ } catch { /* never fail the run response */ }
398
800
  }
399
801
 
400
802
  return textResult(JSON.stringify(summary, null, 2));
@@ -404,13 +806,26 @@ async function handleList(args) {
404
806
  const config = await loadConfig({}, args.cwd);
405
807
  const suites = listSuites(config.testsDir);
406
808
 
809
+ const lines = [];
810
+
407
811
  if (suites.length === 0) {
408
- return textResult('No test suites found in ' + config.testsDir);
812
+ lines.push('No test suites found in ' + config.testsDir);
813
+ } else {
814
+ lines.push(...suites.map(s =>
815
+ `${s.name} (${s.testCount} tests): ${s.tests.join(', ')}`
816
+ ));
409
817
  }
410
818
 
411
- const lines = suites.map(s =>
412
- `${s.name} (${s.testCount} tests): ${s.tests.join(', ')}`
413
- );
819
+ // List available modules
820
+ const modules = listModules(config.modulesDir);
821
+ if (modules.length > 0) {
822
+ lines.push('');
823
+ lines.push('Available modules:');
824
+ for (const mod of modules) {
825
+ const paramNames = mod.params.map(p => p.required ? p.name : `${p.name}?`).join(', ');
826
+ lines.push(` ${mod.name} (${paramNames}) — ${mod.description || mod.file}`);
827
+ }
828
+ }
414
829
 
415
830
  return textResult(lines.join('\n'));
416
831
  }
@@ -423,6 +838,14 @@ async function handleCreateTest(args) {
423
838
  }
424
839
 
425
840
  const safeName = path.basename(args.name);
841
+
842
+ // Reject generic/ambiguous suite names
843
+ const baseName = safeName.replace(/\.json$/, '').replace(/^\d+-/, '');
844
+ const FORBIDDEN_NAMES = ['all', 'test', 'tests', 'debug', 'new', 'temp', 'tmp', 'main', 'suite', 'run', 'e2e', 'default', 'untitled'];
845
+ if (FORBIDDEN_NAMES.includes(baseName.toLowerCase())) {
846
+ return errorResult(`Suite name "${baseName}" is too generic. Use a descriptive name specific to the feature or issue being tested (e.g. "login-valid-credentials", "issue-1743-auth-redirect").`);
847
+ }
848
+
426
849
  const filename = safeName.endsWith('.json') ? safeName : `${safeName}.json`;
427
850
  const filePath = path.join(config.testsDir, filename);
428
851
 
@@ -438,22 +861,299 @@ async function handleCreateTest(args) {
438
861
  }
439
862
 
440
863
  fs.writeFileSync(filePath, JSON.stringify(content, null, 2) + '\n');
441
- return textResult(`Created test file: ${filePath}\n\n${args.tests.length} test(s) defined.`);
864
+
865
+ // ── Collect all actions (tests + hooks) for analysis ──
866
+ const allActions = [];
867
+ for (const test of args.tests) {
868
+ if (test.actions) allActions.push(...test.actions);
869
+ }
870
+ if (args.hooks) {
871
+ for (const hookActions of Object.values(args.hooks)) {
872
+ if (Array.isArray(hookActions)) allActions.push(...hookActions);
873
+ }
874
+ }
875
+
876
+ const warnings = [];
877
+
878
+ // ── Warn about beforeAll pitfall ──
879
+ const beforeAll = args.hooks?.beforeAll;
880
+ if (beforeAll?.length) {
881
+ const stateActions = beforeAll.filter(a =>
882
+ ['evaluate', 'goto', 'navigate', 'clear_cookies', 'type', 'click', 'select'].includes(a.type)
883
+ );
884
+ if (stateActions.length > 0) {
885
+ warnings.push('⚠️ beforeAll runs on a separate browser page that is closed before tests start. ' +
886
+ 'Actions that set browser state (evaluate, goto, cookies, etc.) will NOT carry over. ' +
887
+ 'Use beforeEach instead if tests need this setup.');
888
+ }
889
+ }
890
+
891
+ // ── Detect evaluate blocks that could use built-in actions ──
892
+ const suggestions = analyzeEvaluateUsage(allActions);
893
+ if (suggestions.length > 0) {
894
+ warnings.push(`💡 ${suggestions.length} evaluate action(s) could potentially use built-in actions instead:\n` +
895
+ suggestions.map(s => ` • ${s}`).join('\n'));
896
+ }
897
+
898
+ // ── Detect suite-level issues: fixed waits, cross-test dependencies ──
899
+ const actionWarnings = analyzeActionPatterns(args.tests);
900
+ if (actionWarnings.length > 0) {
901
+ warnings.push(...actionWarnings);
902
+ }
903
+
904
+ // ── List available modules ──
905
+ let modulesInfo = '';
906
+ try {
907
+ const modules = listModules(config.modulesDir);
908
+ if (modules.length > 0) {
909
+ modulesInfo = '\n\n📦 Available modules: ' + modules.map(m => {
910
+ const params = m.params.filter(p => p.required).map(p => p.name);
911
+ return m.name + (params.length ? `(${params.join(', ')})` : '');
912
+ }).join(', ');
913
+ }
914
+ } catch { /* modules dir may not exist */ }
915
+
916
+ const warningBlock = warnings.length > 0 ? '\n\n' + warnings.join('\n\n') : '';
917
+
918
+ // Enrich with learnings context for smarter test authoring
919
+ let learningsBlock = '';
920
+ try {
921
+ const projectId = ensureProject(config._cwd, config.projectName, config.screenshotsDir, config.testsDir);
922
+ const ctx = getTestCreationContext(projectId);
923
+ if (ctx) {
924
+ const lines = ['\n\n⚠ LEARNINGS FROM PREVIOUS RUNS:'];
925
+
926
+ if (ctx.unstableSelectors?.length) {
927
+ lines.push(' Unstable selectors (avoid these):');
928
+ for (const s of ctx.unstableSelectors) {
929
+ lines.push(` - ${s.selector} (${s.failRate}% fail rate) → ${s.suggestion}`);
930
+ }
931
+ }
932
+
933
+ if (ctx.errorPatterns?.length) {
934
+ lines.push(' Common errors:');
935
+ for (const e of ctx.errorPatterns) {
936
+ lines.push(` - ${e.category || 'unknown'} (${e.count}x) — ${e.pattern}`);
937
+ }
938
+ }
939
+
940
+ if (ctx.slowPages?.length) {
941
+ lines.push(' Slow pages (add extra waits):');
942
+ for (const p of ctx.slowPages) {
943
+ lines.push(` - ${p.page} (avg ${(p.avgLoadMs / 1000).toFixed(1)}s load)`);
944
+ }
945
+ }
946
+
947
+ if (ctx.stableSelectors?.length) {
948
+ lines.push(' Reliable selectors (safe to use):');
949
+ for (const s of ctx.stableSelectors) {
950
+ lines.push(` - ${s.selector} (100% success, ${s.uses} uses)`);
951
+ }
952
+ }
953
+
954
+ if (ctx.flakyTests?.length) {
955
+ lines.push(' Flaky tests (consider retries):');
956
+ for (const f of ctx.flakyTests) {
957
+ lines.push(` - ${f.name} (${f.flakyCount} flaky runs out of ${f.totalRuns})`);
958
+ }
959
+ }
960
+
961
+ if (ctx.apiIssues?.length) {
962
+ lines.push(' Unreliable API endpoints:');
963
+ for (const a of ctx.apiIssues) {
964
+ lines.push(` - ${a.endpoint} (${a.errorRate}% error rate)`);
965
+ }
966
+ }
967
+
968
+ if (ctx.passRate !== undefined) {
969
+ lines.push(` Overall project pass rate: ${ctx.passRate}%`);
970
+ }
971
+
972
+ learningsBlock = lines.join('\n');
973
+ }
974
+ } catch { /* never fail test creation */ }
975
+
976
+ return textResult(`Created test file: ${filePath}\n\n${args.tests.length} test(s) defined.${warningBlock}${modulesInfo}${learningsBlock}`);
442
977
  }
443
978
 
444
- async function handlePoolStatus(args) {
445
- const config = await loadConfig({}, args.cwd);
446
- const status = await getPoolStatus(config.poolUrl);
979
+ /**
980
+ * Analyze evaluate actions and suggest built-in replacements.
981
+ * Returns an array of human-readable suggestion strings.
982
+ */
983
+ function analyzeEvaluateUsage(actions) {
984
+ const suggestions = [];
985
+
986
+ for (const action of actions) {
987
+ if (action.type !== 'evaluate' || !action.value) continue;
988
+ const code = action.value;
989
+
990
+ // Pattern: clicking elements by text — .click() after finding by textContent
991
+ if (/\.textContent[^]*\.click\(\)/s.test(code) || /\.find\([^)]*textContent[^)]*\)[^]*\.click/s.test(code)) {
992
+ if (/tab/i.test(code)) {
993
+ suggestions.push('Tab click via evaluate → use { type: "click", text: "Tab Name" } (click searches [role="tab"] natively)');
994
+ } else if (/menu/i.test(code)) {
995
+ suggestions.push('Menu item click via evaluate → use { type: "click_menu_item", text: "Item Name" }');
996
+ } else {
997
+ suggestions.push('Element click via evaluate → use { type: "click", text: "..." } or click_regex/click_in_context');
998
+ }
999
+ }
447
1000
 
448
- const lines = [
449
- `Available: ${status.available ? 'yes' : 'no'}`,
450
- `Running: ${status.running}/${status.maxConcurrent}`,
451
- `Queued: ${status.queued}`,
452
- `Sessions: ${status.sessions.length}`,
453
- ];
1001
+ // Pattern: body.innerText.includes() for text presence
1002
+ if (/document\.body\.innerText[^]*\.includes\(/s.test(code) || /body\.includes\(/s.test(code)) {
1003
+ // Detect negation patterns (!includes) that should use assert_no_text
1004
+ const hasNegation = /!\s*body\.includes\(|!\s*\w+\.includes\(|!body\.includes\(/s.test(code)
1005
+ || /=\s*!.*\.includes\(/s.test(code);
1006
+ const includeCount = (code.match(/\.includes\(/g) || []).length;
1007
+
1008
+ if (hasNegation) {
1009
+ suggestions.push(`🚨 Text negation check (!includes) → use { type: "assert_no_text", text: "..." } for absent text, and { type: "assert_text", text: "..." } for present text`);
1010
+ } else if (includeCount <= 3) {
1011
+ suggestions.push(`Text presence check (${includeCount} includes) → use ${includeCount}x { type: "assert_text", text: "..." }`);
1012
+ } else {
1013
+ suggestions.push(`Text presence check (${includeCount} includes) → use assert_text for each, or assert_text_in with regex: { type: "assert_text_in", selector: "body", text: "word1|word2" }`);
1014
+ }
1015
+ }
1016
+
1017
+ // Pattern: querySelectorAll(...).length checks
1018
+ if (/querySelectorAll\([^)]+\)\.length/s.test(code) && !/getComputedStyle/.test(code)) {
1019
+ suggestions.push('Element counting via evaluate → use { type: "assert_count", selector: "...", value: ">= N" }');
1020
+ }
1021
+
1022
+ // Pattern: checking element visibility/existence without computed styles
1023
+ if (/querySelector\([^)]+\)\s*;?\s*(if\s*\(!\s*\w+\)|===?\s*null)/s.test(code) && !/getComputedStyle/.test(code)) {
1024
+ suggestions.push('Element existence check via evaluate → use { type: "assert_visible", selector: "..." }');
1025
+ }
1026
+
1027
+ // Pattern: return JSON.stringify for debug info (no throw/Error)
1028
+ if (/return\s+JSON\.stringify/s.test(code) && !/throw\s+new\s+Error/s.test(code) && !/FAIL/s.test(code)) {
1029
+ suggestions.push('Informational evaluate (returns JSON, never throws) → remove or replace with specific assertions');
1030
+ }
1031
+
1032
+ // Pattern: setTimeout polling loop
1033
+ if (/setTimeout|setInterval/s.test(code) && /while|Date\.now/s.test(code)) {
1034
+ suggestions.push('Polling loop in evaluate → use { type: "wait", text: "..." } or { type: "wait", selector: "..." } with timeout');
1035
+ }
1036
+
1037
+ // Pattern: return static string with no checks
1038
+ if (/^\(\(\)\s*=>\s*\{\s*return\s+['"`][^]*['"`];\s*\}\)\(\)$/.test(code.trim())) {
1039
+ suggestions.push('No-op evaluate (returns static string) → remove entirely');
1040
+ }
1041
+
1042
+ // 🚨 Pattern: evaluate returns template string interpolating booleans but never throws/fails
1043
+ // e.g. return `Foo: ${hasFoo}, Bar: ${hasBar}` — always truthy, never fails
1044
+ if (!(/throw\s+new\s+Error/s.test(code) || /\bFAIL[:\s]/s.test(code) || /\bERROR[:\s]/s.test(code)
1045
+ || /return\s+false\b/s.test(code) || /return\s+'FAIL/s.test(code) || /return\s+`FAIL/s.test(code))) {
1046
+ // Check for template returns with ${var} interpolation (informational, never fails)
1047
+ if (/return\s+`[^`]*\$\{[^}]+\}[^`]*`/s.test(code)) {
1048
+ // Heuristic: does the template interpolate boolean-like variables?
1049
+ const hasConditionInterpolation = /\$\{(has\w+|is\w+|no\w+|found|exists|present|visible|loaded)\}/i.test(code);
1050
+ const hasComparisonInterpolation = /\$\{[^}]*(===|!==|>|<|&&|\|\|)[^}]*\}/s.test(code);
1051
+ if (hasConditionInterpolation || hasComparisonInterpolation) {
1052
+ suggestions.push(
1053
+ '🚨 Evaluate returns informational template string with boolean/condition values but NEVER throws or returns false — ' +
1054
+ 'this test will ALWAYS PASS. Either throw new Error("FAIL: ...") when conditions are not met, or replace with built-in assert actions'
1055
+ );
1056
+ }
1057
+ }
1058
+ }
1059
+
1060
+ // 🚨 Pattern: sets window.__e2e_* globals for cross-test state sharing
1061
+ if (/window\.__e2e_\w+\s*=/.test(code) && !/window\.__e2e\./.test(code.replace(/window\.__e2e_\w+\s*=/g, ''))) {
1062
+ suggestions.push(
1063
+ '⚠️ Cross-test state via window.__e2e_* — if test retries are enabled, retried tests get a fresh page and lose this state. ' +
1064
+ 'Make each test self-contained by re-querying data, or disable retries for this suite'
1065
+ );
1066
+ }
1067
+ }
1068
+
1069
+ return suggestions;
1070
+ }
1071
+
1072
+ /**
1073
+ * Analyze all actions in a suite for non-evaluate issues:
1074
+ * fixed numeric waits, cross-test dependencies, etc.
1075
+ */
1076
+ function analyzeActionPatterns(tests) {
1077
+ const warnings = [];
1078
+
1079
+ // Detect fixed numeric waits (could be text/selector-based)
1080
+ for (const test of tests) {
1081
+ if (!test.actions) continue;
1082
+ for (const action of test.actions) {
1083
+ if (action.type === 'wait' && /^\d+$/.test(String(action.value))) {
1084
+ const ms = parseInt(action.value, 10);
1085
+ if (ms >= 3000) {
1086
+ warnings.push(
1087
+ `⏱️ Fixed ${ms}ms wait in "${test.name}" — prefer { type: "wait", text: "..." } or { type: "wait", selector: "..." } ` +
1088
+ `which retries until the condition is met. Fixed waits are either too short (flaky) or too long (slow).`
1089
+ );
1090
+ break; // one warning per test is enough
1091
+ }
1092
+ }
1093
+ }
1094
+ }
1095
+
1096
+ // Detect cross-test state: test N writes window.__e2e_*, test M reads it
1097
+ const writers = new Map(); // varName → test name
1098
+ const readers = new Map(); // varName → [test names]
1099
+ for (const test of tests) {
1100
+ if (!test.actions) continue;
1101
+ for (const action of test.actions) {
1102
+ if (action.type !== 'evaluate' || !action.value) continue;
1103
+ const code = action.value;
1104
+ // Find writes: window.__e2e_foo = ...
1105
+ const writeMatches = code.matchAll(/window\.(__e2e_\w+)\s*=/g);
1106
+ for (const m of writeMatches) {
1107
+ if (!writers.has(m[1])) writers.set(m[1], test.name);
1108
+ }
1109
+ // Find reads: window.__e2e_foo (not followed by =)
1110
+ const readMatches = code.matchAll(/window\.(__e2e_\w+)(?!\s*=)/g);
1111
+ for (const m of readMatches) {
1112
+ if (!readers.has(m[1])) readers.set(m[1], []);
1113
+ if (!readers.get(m[1]).includes(test.name)) readers.get(m[1]).push(test.name);
1114
+ }
1115
+ }
1116
+ }
1117
+
1118
+ for (const [varName, writerTest] of writers) {
1119
+ const readerTests = (readers.get(varName) || []).filter(t => t !== writerTest);
1120
+ if (readerTests.length > 0) {
1121
+ warnings.push(
1122
+ `🔗 Cross-test dependency: "${writerTest}" sets ${varName}, read by: ${readerTests.map(t => `"${t}"`).join(', ')}. ` +
1123
+ `If "${writerTest}" fails, dependent tests will cascade-fail with confusing errors. ` +
1124
+ `Consider re-querying data in each test or combining them into a single test.`
1125
+ );
1126
+ }
1127
+ }
1128
+
1129
+ return warnings;
1130
+ }
454
1131
 
455
- if (status.error) {
456
- lines.push(`Error: ${status.error}`);
1132
+ async function handlePoolStatus(args) {
1133
+ const config = await loadConfig({}, args.cwd);
1134
+ const poolUrls = getPoolUrls(config);
1135
+ const aggregated = await getAggregatedPoolStatus(poolUrls);
1136
+
1137
+ const lines = [];
1138
+
1139
+ if (poolUrls.length > 1) {
1140
+ lines.push(`Pools: ${aggregated.totalPools} (${aggregated.availableCount} available)`);
1141
+ lines.push(`Running: ${aggregated.totalRunning}/${aggregated.totalMaxConcurrent}`);
1142
+ lines.push(`Queued: ${aggregated.totalQueued}`);
1143
+ lines.push('');
1144
+ for (const pool of aggregated.pools) {
1145
+ const status = pool.available ? 'available' : pool.error ? `offline (${pool.error})` : 'busy';
1146
+ lines.push(` ${pool.url}: ${status} (${pool.running}/${pool.maxConcurrent}, ${pool.queued} queued)`);
1147
+ }
1148
+ } else {
1149
+ const pool = aggregated.pools[0];
1150
+ lines.push(`Available: ${pool.available ? 'yes' : 'no'}`);
1151
+ lines.push(`Running: ${pool.running}/${pool.maxConcurrent}`);
1152
+ lines.push(`Queued: ${pool.queued}`);
1153
+ lines.push(`Sessions: ${pool.sessions?.length ?? 0}`);
1154
+ if (pool.error) {
1155
+ lines.push(`Error: ${pool.error}`);
1156
+ }
457
1157
  }
458
1158
 
459
1159
  return textResult(lines.join('\n'));
@@ -477,9 +1177,16 @@ async function handleScreenshot(args) {
477
1177
  const filename = path.basename(row.file_path);
478
1178
  const hash = row.hash;
479
1179
 
1180
+ // Build description with metadata if available
1181
+ const metaParts = [`Screenshot ss:${hash} (${filename})`];
1182
+ if (row.test_name) metaParts.push(`Test: ${row.test_name}`);
1183
+ if (row.screenshot_type) metaParts.push(`Type: ${row.screenshot_type}`);
1184
+ if (row.step_index != null) metaParts.push(`Step: ${row.step_index}`);
1185
+ if (row.page_url) metaParts.push(`URL: ${row.page_url}`);
1186
+
480
1187
  return {
481
1188
  content: [
482
- { type: 'text', text: `Screenshot ss:${hash} (${filename})` },
1189
+ { type: 'text', text: metaParts.join('\n') },
483
1190
  { type: 'image', data: base64, mimeType },
484
1191
  ],
485
1192
  };
@@ -489,6 +1196,7 @@ async function handleIssue(args) {
489
1196
  if (!args.url) return errorResult('Missing required parameter: url');
490
1197
 
491
1198
  const mode = args.mode || 'prompt';
1199
+ const testType = args.testType || 'e2e';
492
1200
  const config = await loadConfig({}, args.cwd);
493
1201
 
494
1202
  // Check provider and auth
@@ -511,6 +1219,7 @@ async function handleIssue(args) {
511
1219
 
512
1220
  if (args.authToken) config.authToken = args.authToken;
513
1221
  if (args.authStorageKey) config.authStorageKey = args.authStorageKey;
1222
+ config.testType = testType;
514
1223
 
515
1224
  const result = await verifyIssue(args.url, config);
516
1225
  const status = result.bugConfirmed ? 'BUG CONFIRMED' : 'NOT REPRODUCIBLE';
@@ -533,23 +1242,571 @@ async function handleIssue(args) {
533
1242
 
534
1243
  // Default: prompt mode
535
1244
  const issue = fetchIssue(args.url);
536
- const promptData = buildPrompt(issue, config);
1245
+ const promptData = buildPrompt(issue, config, testType);
537
1246
 
538
1247
  return textResult(promptData.prompt);
539
1248
  }
540
1249
 
541
- async function handleCapture(args) {
1250
+ async function handleCreateModule(args) {
1251
+ const config = await loadConfig({}, args.cwd);
1252
+
1253
+ if (!config.modulesDir) {
1254
+ return errorResult('modulesDir not configured');
1255
+ }
1256
+
1257
+ if (!fs.existsSync(config.modulesDir)) {
1258
+ fs.mkdirSync(config.modulesDir, { recursive: true });
1259
+ }
1260
+
1261
+ const safeName = path.basename(args.name);
1262
+ const filename = safeName.endsWith('.json') ? safeName : `${safeName}.json`;
1263
+ const filePath = path.join(config.modulesDir, filename);
1264
+
1265
+ if (fs.existsSync(filePath)) {
1266
+ return errorResult(`Module file already exists: ${filePath}`);
1267
+ }
1268
+
1269
+ const module = {
1270
+ $module: args.name,
1271
+ description: args.description || '',
1272
+ params: args.params || {},
1273
+ actions: args.actions,
1274
+ };
1275
+
1276
+ fs.writeFileSync(filePath, JSON.stringify(module, null, 2) + '\n');
1277
+
1278
+ const paramNames = Object.keys(args.params || {});
1279
+ return textResult(`Created module: ${filePath}\n\nName: ${args.name}\nParams: ${paramNames.length ? paramNames.join(', ') : 'none'}\nActions: ${args.actions.length}\n\nUsage in tests: { "$use": "${args.name}", "params": { ... } }`);
1280
+ }
1281
+
1282
+ // ── Page analysis helpers ─────────────────────────────────────────────────────
1283
+
1284
+ /**
1285
+ * Browser-side function passed to page.evaluate().
1286
+ * Extracts the complete interactive structure of a page in a single DOM pass.
1287
+ */
1288
+ function extractPageStructure(scopeSelector, maxElements) {
1289
+ const MAX = maxElements || 50;
1290
+ const root = scopeSelector ? document.querySelector(scopeSelector) : document.body;
1291
+ if (!root) return { error: `Scope selector not found: ${scopeSelector}` };
1292
+
1293
+ // ── bestSelector: generate the most reliable CSS selector for an element ──
1294
+ const FRAMEWORK_CLASS_RE = /^(css-|sc-|jss\d|Mui|emotion-|chakra-|ant-|el-|v-|ng-|_|svelte-|tw-)/;
1295
+
1296
+ function bestSelector(el) {
1297
+ // 1. ID (if unique)
1298
+ if (el.id && document.querySelectorAll(`#${CSS.escape(el.id)}`).length === 1) {
1299
+ return `#${CSS.escape(el.id)}`;
1300
+ }
1301
+ // 2. data-testid
1302
+ const testId = el.getAttribute('data-testid');
1303
+ if (testId) return `[data-testid="${testId}"]`;
1304
+ // 3. aria-label
1305
+ const ariaLabel = el.getAttribute('aria-label');
1306
+ if (ariaLabel && document.querySelectorAll(`[aria-label="${CSS.escape(ariaLabel)}"]`).length === 1) {
1307
+ return `[aria-label="${CSS.escape(ariaLabel)}"]`;
1308
+ }
1309
+ // 4. name attribute
1310
+ const name = el.getAttribute('name');
1311
+ if (name && document.querySelectorAll(`[name="${CSS.escape(name)}"]`).length === 1) {
1312
+ return `[name="${CSS.escape(name)}"]`;
1313
+ }
1314
+ // 5. Unique CSS class (filter framework-generated)
1315
+ const tag = el.tagName.toLowerCase();
1316
+ const classes = [...el.classList].filter(c => !FRAMEWORK_CLASS_RE.test(c));
1317
+ for (const cls of classes) {
1318
+ const sel = `${tag}.${CSS.escape(cls)}`;
1319
+ if (document.querySelectorAll(sel).length === 1) return sel;
1320
+ }
1321
+ // 6. Two-class combination
1322
+ for (let i = 0; i < classes.length; i++) {
1323
+ for (let j = i + 1; j < classes.length; j++) {
1324
+ const sel = `${tag}.${CSS.escape(classes[i])}.${CSS.escape(classes[j])}`;
1325
+ if (document.querySelectorAll(sel).length === 1) return sel;
1326
+ }
1327
+ }
1328
+ // 7. Parent with ID + tag:nth-of-type
1329
+ let parent = el.parentElement;
1330
+ while (parent && parent !== document.body) {
1331
+ if (parent.id) {
1332
+ const siblings = [...parent.querySelectorAll(`:scope > ${tag}`)];
1333
+ const idx = siblings.indexOf(el);
1334
+ if (idx !== -1) {
1335
+ const sel = `#${CSS.escape(parent.id)} > ${tag}:nth-of-type(${idx + 1})`;
1336
+ if (document.querySelectorAll(sel).length === 1) return sel;
1337
+ }
1338
+ break;
1339
+ }
1340
+ parent = parent.parentElement;
1341
+ }
1342
+ // 8. Fallback: tag:nth-of-type within parent
1343
+ if (el.parentElement) {
1344
+ const siblings = [...el.parentElement.querySelectorAll(`:scope > ${tag}`)];
1345
+ const idx = siblings.indexOf(el);
1346
+ if (idx !== -1) return `${tag}:nth-of-type(${idx + 1})`;
1347
+ }
1348
+ return tag;
1349
+ }
1350
+
1351
+ function getLabel(el) {
1352
+ // Check for associated label
1353
+ if (el.id) {
1354
+ const label = root.querySelector(`label[for="${CSS.escape(el.id)}"]`);
1355
+ if (label) return label.textContent.trim();
1356
+ }
1357
+ // Check for wrapping label
1358
+ const parentLabel = el.closest('label');
1359
+ if (parentLabel) return parentLabel.textContent.trim();
1360
+ // aria-label
1361
+ if (el.getAttribute('aria-label')) return el.getAttribute('aria-label');
1362
+ // placeholder
1363
+ if (el.placeholder) return el.placeholder;
1364
+ return '';
1365
+ }
1366
+
1367
+ function isVisible(el) {
1368
+ const style = getComputedStyle(el);
1369
+ return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0';
1370
+ }
1371
+
1372
+ function truncate(arr) {
1373
+ return arr.slice(0, MAX);
1374
+ }
1375
+
1376
+ // ── Extract forms ──
1377
+ const forms = [];
1378
+ for (const form of root.querySelectorAll('form')) {
1379
+ if (!isVisible(form)) continue;
1380
+ const fields = [];
1381
+ for (const input of form.querySelectorAll('input, select, textarea')) {
1382
+ if (!isVisible(input) || input.type === 'hidden') continue;
1383
+ fields.push({
1384
+ selector: bestSelector(input),
1385
+ tag: input.tagName.toLowerCase(),
1386
+ type: input.type || input.tagName.toLowerCase(),
1387
+ name: input.name || undefined,
1388
+ label: getLabel(input) || undefined,
1389
+ required: input.required || undefined,
1390
+ placeholder: input.placeholder || undefined,
1391
+ });
1392
+ }
1393
+ const submitBtn = form.querySelector('button[type="submit"], input[type="submit"]');
1394
+ forms.push({
1395
+ selector: bestSelector(form),
1396
+ action: form.action || undefined,
1397
+ method: form.method || undefined,
1398
+ fields: truncate(fields),
1399
+ submitButton: submitBtn ? { selector: bestSelector(submitBtn), text: submitBtn.textContent?.trim() || submitBtn.value } : undefined,
1400
+ });
1401
+ if (forms.length >= MAX) break;
1402
+ }
1403
+
1404
+ // ── Standalone inputs (outside forms) ──
1405
+ const standaloneInputs = [];
1406
+ for (const input of root.querySelectorAll('input, select, textarea')) {
1407
+ if (!isVisible(input) || input.type === 'hidden' || input.closest('form')) continue;
1408
+ standaloneInputs.push({
1409
+ selector: bestSelector(input),
1410
+ tag: input.tagName.toLowerCase(),
1411
+ type: input.type || input.tagName.toLowerCase(),
1412
+ name: input.name || undefined,
1413
+ label: getLabel(input) || undefined,
1414
+ placeholder: input.placeholder || undefined,
1415
+ });
1416
+ if (standaloneInputs.length >= MAX) break;
1417
+ }
1418
+
1419
+ // ── Buttons ──
1420
+ const buttons = [];
1421
+ for (const btn of root.querySelectorAll('button, [role="button"], input[type="button"], input[type="submit"]')) {
1422
+ if (!isVisible(btn)) continue;
1423
+ buttons.push({
1424
+ selector: bestSelector(btn),
1425
+ text: btn.textContent?.trim() || btn.value || '',
1426
+ type: btn.type || undefined,
1427
+ disabled: btn.disabled || undefined,
1428
+ ariaLabel: btn.getAttribute('aria-label') || undefined,
1429
+ });
1430
+ if (buttons.length >= MAX) break;
1431
+ }
1432
+
1433
+ // ── Links ──
1434
+ const links = [];
1435
+ for (const a of root.querySelectorAll('a[href]')) {
1436
+ if (!isVisible(a)) continue;
1437
+ links.push({
1438
+ selector: bestSelector(a),
1439
+ text: a.textContent?.trim() || '',
1440
+ href: a.getAttribute('href'),
1441
+ });
1442
+ if (links.length >= MAX) break;
1443
+ }
1444
+
1445
+ // ── Navigation regions ──
1446
+ const navigation = [];
1447
+ for (const nav of root.querySelectorAll('nav, [role="navigation"]')) {
1448
+ if (!isVisible(nav)) continue;
1449
+ const items = [];
1450
+ for (const link of nav.querySelectorAll('a, button, [role="tab"], [role="menuitem"]')) {
1451
+ if (!isVisible(link)) continue;
1452
+ items.push({
1453
+ selector: bestSelector(link),
1454
+ text: link.textContent?.trim() || '',
1455
+ href: link.getAttribute('href') || undefined,
1456
+ active: link.classList.contains('active') || link.getAttribute('aria-current') === 'page' || undefined,
1457
+ });
1458
+ }
1459
+ navigation.push({
1460
+ selector: bestSelector(nav),
1461
+ ariaLabel: nav.getAttribute('aria-label') || undefined,
1462
+ items: truncate(items),
1463
+ });
1464
+ if (navigation.length >= MAX) break;
1465
+ }
1466
+
1467
+ // ── Tabs ──
1468
+ const tabs = [];
1469
+ for (const tab of root.querySelectorAll('[role="tab"]')) {
1470
+ if (!isVisible(tab)) continue;
1471
+ tabs.push({
1472
+ selector: bestSelector(tab),
1473
+ text: tab.textContent?.trim() || '',
1474
+ selected: tab.getAttribute('aria-selected') === 'true' || undefined,
1475
+ });
1476
+ if (tabs.length >= MAX) break;
1477
+ }
1478
+
1479
+ // ── Headings ──
1480
+ const headings = [];
1481
+ for (const h of root.querySelectorAll('h1, h2, h3, h4, h5, h6')) {
1482
+ if (!isVisible(h)) continue;
1483
+ headings.push({
1484
+ level: parseInt(h.tagName[1]),
1485
+ text: h.textContent?.trim() || '',
1486
+ selector: bestSelector(h),
1487
+ });
1488
+ if (headings.length >= MAX) break;
1489
+ }
1490
+
1491
+ // ── Tables ──
1492
+ const tables = [];
1493
+ for (const table of root.querySelectorAll('table')) {
1494
+ if (!isVisible(table)) continue;
1495
+ const headers = [...table.querySelectorAll('th')].map(th => th.textContent?.trim());
1496
+ tables.push({
1497
+ selector: bestSelector(table),
1498
+ headers: truncate(headers),
1499
+ rowCount: table.querySelectorAll('tbody tr, tr').length,
1500
+ hasHeader: headers.length > 0,
1501
+ });
1502
+ if (tables.length >= MAX) break;
1503
+ }
1504
+
1505
+ // ── Modals/Dialogs ──
1506
+ const modals = [];
1507
+ for (const modal of root.querySelectorAll('[role="dialog"], dialog, .modal, [class*="modal"], [class*="Modal"]')) {
1508
+ if (!isVisible(modal)) continue;
1509
+ const title = modal.querySelector('[class*="title"], [class*="Title"], h1, h2, h3, [role="heading"]');
1510
+ const closeBtn = modal.querySelector('[aria-label="close"], [aria-label="Close"], button.close, [class*="close"]');
1511
+ modals.push({
1512
+ selector: bestSelector(modal),
1513
+ title: title?.textContent?.trim() || undefined,
1514
+ hasCloseButton: !!closeBtn,
1515
+ closeSelector: closeBtn ? bestSelector(closeBtn) : undefined,
1516
+ });
1517
+ if (modals.length >= MAX) break;
1518
+ }
1519
+
1520
+ // ── Menus/Dropdowns ──
1521
+ const menus = [];
1522
+ for (const menu of root.querySelectorAll('[role="menu"], .dropdown-menu, [class*="dropdown"]')) {
1523
+ if (!isVisible(menu)) continue;
1524
+ const items = [];
1525
+ for (const item of menu.querySelectorAll('[role="menuitem"], [role="menuitemradio"], [role="menuitemcheckbox"], .dropdown-item, [class*="MenuItem"]')) {
1526
+ if (!isVisible(item)) continue;
1527
+ items.push({ text: item.textContent?.trim() || '', selector: bestSelector(item) });
1528
+ }
1529
+ menus.push({
1530
+ selector: bestSelector(menu),
1531
+ items: truncate(items),
1532
+ });
1533
+ if (menus.length >= MAX) break;
1534
+ }
1535
+
1536
+ // ── Alerts/Banners ──
1537
+ const alerts = [];
1538
+ for (const alert of root.querySelectorAll('[role="alert"], [role="status"], .alert, [class*="banner"], [class*="Banner"], [class*="toast"], [class*="Toast"], [class*="notification"], [class*="Notification"]')) {
1539
+ if (!isVisible(alert)) continue;
1540
+ alerts.push({
1541
+ selector: bestSelector(alert),
1542
+ text: alert.textContent?.trim().slice(0, 200) || '',
1543
+ role: alert.getAttribute('role') || undefined,
1544
+ });
1545
+ if (alerts.length >= MAX) break;
1546
+ }
1547
+
1548
+ // ── Significant images (>50px) ──
1549
+ const images = [];
1550
+ for (const img of root.querySelectorAll('img, svg[role="img"], [role="img"]')) {
1551
+ if (!isVisible(img)) continue;
1552
+ const rect = img.getBoundingClientRect();
1553
+ if (rect.width < 50 && rect.height < 50) continue;
1554
+ images.push({
1555
+ selector: bestSelector(img),
1556
+ alt: img.alt || img.getAttribute('aria-label') || undefined,
1557
+ width: Math.round(rect.width),
1558
+ height: Math.round(rect.height),
1559
+ src: img.src ? img.src.slice(0, 200) : undefined,
1560
+ });
1561
+ if (images.length >= MAX) break;
1562
+ }
1563
+
1564
+ return {
1565
+ forms,
1566
+ standaloneInputs: standaloneInputs.length > 0 ? standaloneInputs : undefined,
1567
+ buttons,
1568
+ links,
1569
+ navigation: navigation.length > 0 ? navigation : undefined,
1570
+ tabs: tabs.length > 0 ? tabs : undefined,
1571
+ headings,
1572
+ tables: tables.length > 0 ? tables : undefined,
1573
+ modals: modals.length > 0 ? modals : undefined,
1574
+ menus: menus.length > 0 ? menus : undefined,
1575
+ alerts: alerts.length > 0 ? alerts : undefined,
1576
+ images: images.length > 0 ? images : undefined,
1577
+ stats: {
1578
+ totalForms: forms.length,
1579
+ totalButtons: buttons.length,
1580
+ totalLinks: links.length,
1581
+ totalInputs: forms.reduce((n, f) => n + f.fields.length, 0) + standaloneInputs.length,
1582
+ totalHeadings: headings.length,
1583
+ totalTables: tables.length,
1584
+ totalNavRegions: navigation.length,
1585
+ totalTabs: tabs.length,
1586
+ totalModals: modals.length,
1587
+ totalImages: images.length,
1588
+ },
1589
+ };
1590
+ }
1591
+
1592
+ /**
1593
+ * Analyzes extracted page structure and generates ready-to-use test scaffolds.
1594
+ * Runs on the Node.js side after page.evaluate returns.
1595
+ */
1596
+ function buildSuggestedTests(structure, pageUrl) {
1597
+ const tests = [];
1598
+ const urlPath = (() => { try { return new URL(pageUrl).pathname; } catch { return '/'; } })();
1599
+
1600
+ // Login form detection
1601
+ for (const form of structure.forms || []) {
1602
+ const fields = form.fields || [];
1603
+ const hasPassword = fields.some(f => f.type === 'password');
1604
+ const hasEmail = fields.some(f => f.type === 'email' || f.name === 'email' || (f.label || '').toLowerCase().includes('email'));
1605
+ const hasUsername = fields.some(f => f.name === 'username' || (f.label || '').toLowerCase().includes('user'));
1606
+
1607
+ if (hasPassword && (hasEmail || hasUsername)) {
1608
+ const actions = [{ type: 'goto', value: urlPath }];
1609
+ const emailField = fields.find(f => f.type === 'email' || f.name === 'email' || (f.label || '').toLowerCase().includes('email'));
1610
+ const usernameField = fields.find(f => f.name === 'username' || (f.label || '').toLowerCase().includes('user'));
1611
+ const passwordField = fields.find(f => f.type === 'password');
1612
+ const credential = emailField || usernameField;
1613
+ if (credential) actions.push({ type: 'type', selector: credential.selector, value: 'test@example.com' });
1614
+ if (passwordField) actions.push({ type: 'type', selector: passwordField.selector, value: 'password123' });
1615
+ if (form.submitButton) actions.push({ type: 'click', selector: form.submitButton.selector });
1616
+ actions.push({ type: 'wait', value: '2000' });
1617
+ tests.push({ name: 'login-form-submission', actions });
1618
+ continue;
1619
+ }
1620
+
1621
+ // Generic form fill + submit
1622
+ if (fields.length > 0) {
1623
+ const actions = [{ type: 'goto', value: urlPath }];
1624
+ for (const field of fields.slice(0, 10)) {
1625
+ const val = field.type === 'email' ? 'test@example.com'
1626
+ : field.type === 'number' ? '42'
1627
+ : field.type === 'tel' ? '555-0100'
1628
+ : field.type === 'date' ? '2025-01-15'
1629
+ : field.tag === 'select' ? undefined
1630
+ : field.tag === 'textarea' ? 'Sample text input'
1631
+ : 'Test value';
1632
+ if (val && field.tag !== 'select') {
1633
+ actions.push({ type: 'type', selector: field.selector, value: val });
1634
+ }
1635
+ }
1636
+ if (form.submitButton) actions.push({ type: 'click', selector: form.submitButton.selector });
1637
+ actions.push({ type: 'wait', value: '1000' });
1638
+ tests.push({ name: `form-submission-${tests.length + 1}`, actions });
1639
+ }
1640
+ }
1641
+
1642
+ // Navigation test
1643
+ const navItems = (structure.navigation || []).flatMap(n => n.items || []).filter(i => i.href && i.text);
1644
+ if (navItems.length > 0) {
1645
+ const actions = [{ type: 'goto', value: urlPath }];
1646
+ for (const item of navItems.slice(0, 5)) {
1647
+ actions.push({ type: 'click', selector: item.selector });
1648
+ actions.push({ type: 'wait', value: '1000' });
1649
+ if (item.href && item.href !== '#' && !item.href.startsWith('javascript:')) {
1650
+ actions.push({ type: 'assert_url', value: item.href });
1651
+ }
1652
+ actions.push({ type: 'goto', value: urlPath });
1653
+ }
1654
+ tests.push({ name: 'navigation-links', actions });
1655
+ }
1656
+
1657
+ // Table data assertion
1658
+ for (const table of structure.tables || []) {
1659
+ if (table.rowCount > 0) {
1660
+ tests.push({
1661
+ name: `table-has-data`,
1662
+ actions: [
1663
+ { type: 'goto', value: urlPath },
1664
+ { type: 'wait', selector: table.selector },
1665
+ { type: 'assert_count', selector: `${table.selector} tbody tr`, value: '>=1' },
1666
+ ],
1667
+ });
1668
+ break;
1669
+ }
1670
+ }
1671
+
1672
+ // Tab switching test
1673
+ if ((structure.tabs || []).length >= 2) {
1674
+ const actions = [{ type: 'goto', value: urlPath }];
1675
+ for (const tab of structure.tabs.slice(0, 5)) {
1676
+ actions.push({ type: 'click', selector: tab.selector });
1677
+ actions.push({ type: 'wait', value: '500' });
1678
+ }
1679
+ tests.push({ name: 'tab-switching', actions });
1680
+ }
1681
+
1682
+ // Page structure verification (always generated)
1683
+ const verifyActions = [{ type: 'goto', value: urlPath }];
1684
+ for (const h of (structure.headings || []).filter(h => h.level <= 2).slice(0, 3)) {
1685
+ verifyActions.push({ type: 'assert_text', text: h.text });
1686
+ }
1687
+ if (structure.stats.totalButtons > 0) {
1688
+ const visibleBtns = (structure.buttons || []).filter(b => b.text);
1689
+ for (const btn of visibleBtns.slice(0, 3)) {
1690
+ verifyActions.push({ type: 'assert_visible', selector: btn.selector });
1691
+ }
1692
+ }
1693
+ tests.push({ name: 'page-structure-verification', actions: verifyActions });
1694
+
1695
+ return tests;
1696
+ }
1697
+
1698
+ async function handleAnalyze(args) {
542
1699
  if (!args.url) return errorResult('Missing required parameter: url');
543
1700
 
544
1701
  const config = await loadConfig({}, args.cwd);
1702
+ const poolUrls = getPoolUrls(config);
1703
+ const chosenPool = await selectPool(poolUrls);
1704
+
1705
+ let browser;
1706
+ try {
1707
+ browser = await connectToPool(chosenPool);
1708
+ const page = await browser.newPage();
1709
+ await page.setViewport(config.viewport);
1710
+
1711
+ // Inject auth token into localStorage before navigation
1712
+ const authToken = args.authToken || config.authToken;
1713
+ if (authToken) {
1714
+ const storageKey = args.authStorageKey || config.authStorageKey || 'accessToken';
1715
+ const origin = new URL(args.url).origin;
1716
+ await page.goto(origin, { waitUntil: 'domcontentloaded', timeout: 15000 });
1717
+ await page.evaluate((key, token) => { localStorage.setItem(key, token); }, storageKey, authToken);
1718
+ }
1719
+
1720
+ await page.goto(args.url, { waitUntil: 'networkidle2', timeout: 30000 });
545
1721
 
546
- await waitForPool(config.poolUrl);
1722
+ if (args.selector) {
1723
+ await page.waitForSelector(args.selector, { timeout: 10000 });
1724
+ }
1725
+
1726
+ if (args.delay && args.delay > 0) {
1727
+ await new Promise(r => setTimeout(r, args.delay));
1728
+ }
1729
+
1730
+ // Extract page structure
1731
+ const structure = await page.evaluate(extractPageStructure, args.scope || null, args.maxElements || 50);
1732
+
1733
+ if (structure.error) {
1734
+ return errorResult(structure.error);
1735
+ }
1736
+
1737
+ // Build meta
1738
+ const title = await page.title();
1739
+ const meta = {
1740
+ url: args.url,
1741
+ title,
1742
+ viewport: config.viewport,
1743
+ scope: args.scope || undefined,
1744
+ };
1745
+
1746
+ // Build suggested tests
1747
+ const suggestedTests = buildSuggestedTests(structure, args.url);
1748
+
1749
+ // Optional screenshot (default: true)
1750
+ const includeScreenshot = args.includeScreenshot !== false;
1751
+ let screenshotHash;
1752
+ let screenshotBase64;
1753
+
1754
+ if (includeScreenshot) {
1755
+ const filename = `analyze-${Date.now()}.png`;
1756
+ if (!fs.existsSync(config.screenshotsDir)) {
1757
+ fs.mkdirSync(config.screenshotsDir, { recursive: true });
1758
+ }
1759
+ const screenshotPath = path.join(config.screenshotsDir, filename);
1760
+ await page.screenshot({ path: screenshotPath, fullPage: false });
1761
+
1762
+ const cwd = args.cwd || process.cwd();
1763
+ const projectName = config.projectName || path.basename(cwd);
1764
+ const projectId = ensureProject(cwd, projectName, config.screenshotsDir, config.testsDir);
1765
+ const hash = computeScreenshotHash(screenshotPath);
1766
+ registerScreenshotHash(hash, screenshotPath, projectId, null);
1767
+ screenshotHash = `ss:${hash}`;
1768
+ meta.screenshotHash = screenshotHash;
1769
+
1770
+ const data = fs.readFileSync(screenshotPath);
1771
+ screenshotBase64 = data.toString('base64');
1772
+ }
1773
+
1774
+ const result = { meta, ...structure, suggestedTests };
1775
+ const content = [{ type: 'text', text: JSON.stringify(result, null, 2) }];
1776
+
1777
+ if (screenshotBase64) {
1778
+ content.push({ type: 'image', data: screenshotBase64, mimeType: 'image/png' });
1779
+ }
1780
+
1781
+ return { content };
1782
+ } finally {
1783
+ if (browser) browser.disconnect();
1784
+ }
1785
+ }
1786
+
1787
+ async function handleCapture(args) {
1788
+ if (!args.url) return errorResult('Missing required parameter: url');
1789
+
1790
+ const config = await loadConfig({}, args.cwd);
1791
+ const capturePoolUrls = getPoolUrls(config);
1792
+ const capturePool = await selectPool(capturePoolUrls);
547
1793
 
548
1794
  let browser;
549
1795
  try {
550
- browser = await connectToPool(config.poolUrl);
1796
+ browser = await connectToPool(capturePool);
551
1797
  const page = await browser.newPage();
552
1798
  await page.setViewport(config.viewport);
1799
+
1800
+ // Inject auth token into localStorage before navigation
1801
+ const authToken = args.authToken || config.authToken;
1802
+ if (authToken) {
1803
+ const storageKey = args.authStorageKey || config.authStorageKey || 'accessToken';
1804
+ // Navigate to origin first so localStorage is accessible
1805
+ const origin = new URL(args.url).origin;
1806
+ await page.goto(origin, { waitUntil: 'domcontentloaded', timeout: 15000 });
1807
+ await page.evaluate((key, token) => { localStorage.setItem(key, token); }, storageKey, authToken);
1808
+ }
1809
+
553
1810
  await page.goto(args.url, { waitUntil: 'networkidle2', timeout: 30000 });
554
1811
 
555
1812
  if (args.selector) {
@@ -617,6 +1874,245 @@ async function handleDashboardStop() {
617
1874
  return textResult('Dashboard stopped');
618
1875
  }
619
1876
 
1877
+ async function handleNeo4j(args) {
1878
+ if (!args.action) return errorResult('Missing required parameter: action');
1879
+
1880
+ const config = await loadConfig({}, args.cwd);
1881
+
1882
+ switch (args.action) {
1883
+ case 'start':
1884
+ try {
1885
+ startNeo4j(config, args.cwd);
1886
+ return textResult(`Neo4j started. Bolt: bolt://localhost:${config.neo4jBoltPort || 7687}, Browser: http://localhost:${config.neo4jHttpPort || 7474}`);
1887
+ } catch (err) {
1888
+ return errorResult(`Failed to start Neo4j: ${err.message}`);
1889
+ }
1890
+ case 'stop':
1891
+ try {
1892
+ stopNeo4j(config, args.cwd);
1893
+ return textResult('Neo4j stopped');
1894
+ } catch (err) {
1895
+ return errorResult(`Failed to stop Neo4j: ${err.message}`);
1896
+ }
1897
+ case 'status': {
1898
+ const status = getNeo4jStatus(config, args.cwd);
1899
+ const lines = [
1900
+ `Running: ${status.running ? 'yes' : 'no'}`,
1901
+ ];
1902
+ if (status.running) {
1903
+ lines.push(`Bolt: bolt://localhost:${status.boltPort}`);
1904
+ lines.push(`Browser: http://localhost:${status.httpPort}`);
1905
+ }
1906
+ if (status.error) lines.push(`Error: ${status.error}`);
1907
+ return textResult(lines.join('\n'));
1908
+ }
1909
+ default:
1910
+ return errorResult('Unknown action. Use: start, stop, status');
1911
+ }
1912
+ }
1913
+
1914
+ async function handleLearnings(args) {
1915
+ if (!args.query) return errorResult('Missing required parameter: query');
1916
+
1917
+ const config = await loadConfig({}, args.cwd);
1918
+ const days = Math.min(Math.max(parseInt(args.days || config.learningsDays || 30, 10) || 30, 1), 365);
1919
+ const projectId = ensureProject(config._cwd, config.projectName, config.screenshotsDir, config.testsDir);
1920
+
1921
+ const query = args.query.trim().toLowerCase();
1922
+
1923
+ // Drill-down queries (enriched with graph data when Neo4j is available)
1924
+ if (query.startsWith('test:')) {
1925
+ const testName = args.query.slice(5).trim();
1926
+ const history = getTestHistory(projectId, testName, days);
1927
+ const result = { query: args.query, testName, history };
1928
+ const graphDeps = await queryGraph(config, 'test-dependencies', { testName }).catch(() => null);
1929
+ if (graphDeps) result.relatedTests = graphDeps;
1930
+ return textResult(JSON.stringify(result, null, 2));
1931
+ }
1932
+ if (query.startsWith('page:')) {
1933
+ const urlPath = args.query.slice(5).trim();
1934
+ const history = getPageHistory(projectId, urlPath, days);
1935
+ const result = { query: args.query, urlPath, history };
1936
+ const graphImpact = await queryGraph(config, 'page-impact', { path: urlPath }).catch(() => null);
1937
+ if (graphImpact) result.affectedTests = graphImpact;
1938
+ return textResult(JSON.stringify(result, null, 2));
1939
+ }
1940
+ if (query.startsWith('selector:')) {
1941
+ const selector = args.query.slice(9).trim();
1942
+ const history = getSelectorHistory(projectId, selector, days);
1943
+ const result = { query: args.query, selector, history };
1944
+ const graphUsage = await queryGraph(config, 'selector-usage', { selector }).catch(() => null);
1945
+ if (graphUsage) result.usage = graphUsage;
1946
+ return textResult(JSON.stringify(result, null, 2));
1947
+ }
1948
+
1949
+ // Category queries
1950
+ switch (query) {
1951
+ case 'summary': {
1952
+ const summary = getLearningsSummary(projectId);
1953
+ const trendsResult = getTestTrends(projectId, 7);
1954
+ return textResult(JSON.stringify({ ...summary, recentTrend: trendsResult }, null, 2));
1955
+ }
1956
+ case 'flaky':
1957
+ return textResult(JSON.stringify(getFlakySummary(projectId, days), null, 2));
1958
+ case 'selectors':
1959
+ return textResult(JSON.stringify(getSelectorStability(projectId, days), null, 2));
1960
+ case 'pages':
1961
+ return textResult(JSON.stringify(getPageHealth(projectId, days), null, 2));
1962
+ case 'apis':
1963
+ return textResult(JSON.stringify(getApiHealth(projectId, days), null, 2));
1964
+ case 'errors':
1965
+ return textResult(JSON.stringify(getErrorPatterns(projectId), null, 2));
1966
+ case 'trends':
1967
+ return textResult(JSON.stringify(getTestTrends(projectId, days), null, 2));
1968
+ default:
1969
+ return errorResult(`Unknown query: "${args.query}". Use: summary, flaky, selectors, pages, apis, errors, trends, test:<name>, page:<path>, selector:<value>`);
1970
+ }
1971
+ }
1972
+
1973
+ async function handleNetworkLogs(args) {
1974
+ if (!args.runDbId) return errorResult('Missing required parameter: runDbId');
1975
+
1976
+ const filters = {};
1977
+ if (args.testName) filters.testName = args.testName;
1978
+ if (args.method) filters.method = args.method;
1979
+ if (args.statusMin !== undefined) filters.statusMin = args.statusMin;
1980
+ if (args.statusMax !== undefined) filters.statusMax = args.statusMax;
1981
+ if (args.urlPattern) filters.urlPattern = args.urlPattern;
1982
+ if (args.errorsOnly) filters.errorsOnly = true;
1983
+ if (args.includeHeaders) filters.includeHeaders = true;
1984
+ if (args.includeBodies) filters.includeBodies = true;
1985
+
1986
+ const results = getNetworkLogs(args.runDbId, filters);
1987
+
1988
+ if (results.length === 0) {
1989
+ return textResult('No network logs found for the given filters.');
1990
+ }
1991
+
1992
+ return textResult(JSON.stringify(results, null, 2));
1993
+ }
1994
+
1995
+ async function handleVars(args) {
1996
+ const action = args.action;
1997
+ if (!action) return errorResult('Missing required parameter: action');
1998
+
1999
+ const cwd = args.cwd || process.cwd();
2000
+ const config = await loadConfig({}, cwd);
2001
+ const projectName = config.projectName || cwd.split('/').pop() || 'default';
2002
+ const projectId = ensureProject(cwd, projectName, config.screenshotsDir, config.testsDir);
2003
+ const scope = args.scope || 'project';
2004
+
2005
+ switch (action) {
2006
+ case 'set': {
2007
+ if (!args.key) return errorResult('Missing required parameter: key');
2008
+ if (args.value === undefined) return errorResult('Missing required parameter: value');
2009
+ setVariable(projectId, scope, args.key, args.value);
2010
+ return textResult(`Variable set: ${args.key} (scope: ${scope})`);
2011
+ }
2012
+ case 'get': {
2013
+ if (!args.key) return errorResult('Missing required parameter: key');
2014
+ const vars = getVariables(projectId, scope);
2015
+ if (vars[args.key] !== undefined) {
2016
+ return textResult(JSON.stringify({ key: args.key, value: vars[args.key], scope }));
2017
+ }
2018
+ // Fall back to project scope if not found in specific scope
2019
+ if (scope !== 'project') {
2020
+ const projectVars = getVariables(projectId, 'project');
2021
+ if (projectVars[args.key] !== undefined) {
2022
+ return textResult(JSON.stringify({ key: args.key, value: projectVars[args.key], scope: 'project' }));
2023
+ }
2024
+ }
2025
+ return errorResult(`Variable not found: ${args.key} (scope: ${scope})`);
2026
+ }
2027
+ case 'list': {
2028
+ const all = listVariables(projectId);
2029
+ if (Object.keys(all).length === 0) {
2030
+ return textResult('No variables set for this project.');
2031
+ }
2032
+ return textResult(JSON.stringify(all, null, 2));
2033
+ }
2034
+ case 'delete': {
2035
+ if (!args.key) return errorResult('Missing required parameter: key');
2036
+ const deleted = deleteVariable(projectId, scope, args.key);
2037
+ if (deleted) {
2038
+ return textResult(`Variable deleted: ${args.key} (scope: ${scope})`);
2039
+ }
2040
+ return errorResult(`Variable not found: ${args.key} (scope: ${scope})`);
2041
+ }
2042
+ default:
2043
+ return errorResult(`Unknown action: ${action}. Use set, get, list, or delete.`);
2044
+ }
2045
+ }
2046
+
2047
+ // ── Verification instructions builder ─────────────────────────────────────────
2048
+
2049
+ function buildVerificationInstructions(strictness, hasBaselines, hasChecklists) {
2050
+ const levels = {
2051
+ strict: 'STRICT — No ambiguity allowed. If ANY criterion is unclear, not fully visible, or doubtful, verdict is FAIL. Err on the side of failing.',
2052
+ moderate: 'MODERATE — Use reasonable judgment. Minor cosmetic differences are acceptable, but functional mismatches or missing elements are FAIL.',
2053
+ lenient: 'LENIENT — Only fail on clear, obvious contradictions. Partial matches and minor discrepancies are acceptable.',
2054
+ };
2055
+
2056
+ const lines = [
2057
+ `Verification strictness: ${levels[strictness] || levels.moderate}`,
2058
+ '',
2059
+ 'For each entry in the verifications array:',
2060
+ '',
2061
+ '1. RETRIEVE SCREENSHOTS',
2062
+ ' - Call e2e_screenshot with the screenshotHash (after-state).',
2063
+ ];
2064
+
2065
+ if (hasBaselines) {
2066
+ lines.push(' - If baselineScreenshotHash is present, also call e2e_screenshot with it (before-state).');
2067
+ }
2068
+
2069
+ lines.push(
2070
+ '',
2071
+ '2. EVALUATE',
2072
+ );
2073
+
2074
+ if (hasChecklists) {
2075
+ lines.push(
2076
+ ' - If isChecklist is true, evaluate EACH item in the expect array independently as PASS or FAIL.',
2077
+ ' - If isChecklist is false (or absent), evaluate the single expect description as a whole.',
2078
+ );
2079
+ } else {
2080
+ lines.push(' - Compare the screenshot against the expect description.');
2081
+ }
2082
+
2083
+ if (hasBaselines) {
2084
+ lines.push(
2085
+ '',
2086
+ '3. COMPARE BEFORE/AFTER',
2087
+ ' - If a baseline screenshot was retrieved, describe the state change between baseline and after screenshots.',
2088
+ ' - Verify the state change is consistent with what the test actions intended.',
2089
+ );
2090
+ }
2091
+
2092
+ lines.push(
2093
+ '',
2094
+ `${hasBaselines ? '4' : '3'}. REPORT VERDICT — use this exact format for each test:`,
2095
+ '',
2096
+ ' TEST: <test-name>',
2097
+ ' VERDICT: PASS | FAIL',
2098
+ );
2099
+
2100
+ if (hasBaselines) {
2101
+ lines.push(' STATE CHANGE: <one-line description of what changed from baseline to after>');
2102
+ }
2103
+
2104
+ if (hasChecklists) {
2105
+ lines.push(
2106
+ ' CRITERIA:',
2107
+ ' - "<criterion text>": PASS | FAIL (reason if FAIL)',
2108
+ );
2109
+ }
2110
+
2111
+ lines.push(' REASON: <brief explanation of the verdict>');
2112
+
2113
+ return lines.join('\n');
2114
+ }
2115
+
620
2116
  // ── Helpers ───────────────────────────────────────────────────────────────────
621
2117
 
622
2118
  export function textResult(text) {
@@ -648,8 +2144,20 @@ export async function dispatchTool(name, args = {}) {
648
2144
  return await handleDashboardStop();
649
2145
  case 'e2e_issue':
650
2146
  return await handleIssue(args);
2147
+ case 'e2e_create_module':
2148
+ return await handleCreateModule(args);
651
2149
  case 'e2e_capture':
652
2150
  return await handleCapture(args);
2151
+ case 'e2e_analyze':
2152
+ return await handleAnalyze(args);
2153
+ case 'e2e_learnings':
2154
+ return await handleLearnings(args);
2155
+ case 'e2e_neo4j':
2156
+ return await handleNeo4j(args);
2157
+ case 'e2e_network_logs':
2158
+ return await handleNetworkLogs(args);
2159
+ case 'e2e_vars':
2160
+ return await handleVars(args);
653
2161
  default:
654
2162
  return errorResult(`Unknown tool: ${name}`);
655
2163
  }