@matware/e2e-runner 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/.claude-plugin/plugin.json +9 -0
  2. package/.mcp.json +9 -0
  3. package/README.md +505 -279
  4. package/agents/test-analyzer.md +81 -0
  5. package/agents/test-creator.md +102 -0
  6. package/agents/test-improver.md +140 -0
  7. package/bin/cli.js +275 -7
  8. package/commands/create-test.md +50 -0
  9. package/commands/run.md +49 -0
  10. package/commands/verify-issue.md +63 -0
  11. package/package.json +11 -3
  12. package/skills/e2e-testing/SKILL.md +166 -0
  13. package/skills/e2e-testing/references/action-types.md +100 -0
  14. package/skills/e2e-testing/references/test-json-format.md +159 -0
  15. package/skills/e2e-testing/references/troubleshooting.md +182 -0
  16. package/src/actions.js +280 -17
  17. package/src/ai-generate.js +122 -11
  18. package/src/config.js +58 -0
  19. package/src/dashboard.js +173 -10
  20. package/src/db.js +232 -17
  21. package/src/index.js +9 -3
  22. package/src/learner-markdown.js +177 -0
  23. package/src/learner-neo4j.js +255 -0
  24. package/src/learner-sqlite.js +354 -0
  25. package/src/learner.js +413 -0
  26. package/src/mcp-tools.js +575 -16
  27. package/src/module-resolver.js +273 -0
  28. package/src/narrate.js +225 -0
  29. package/src/neo4j-pool.js +124 -0
  30. package/src/reporter.js +47 -2
  31. package/src/runner.js +180 -40
  32. package/src/verify.js +19 -5
  33. package/templates/build-dashboard.js +28 -0
  34. package/templates/dashboard/app.js +1152 -0
  35. package/templates/dashboard/styles.css +413 -0
  36. package/templates/dashboard/template.html +201 -0
  37. package/templates/dashboard.html +1091 -268
  38. package/templates/docker-compose-neo4j.yml +19 -0
  39. package/templates/e2e.config.js +3 -0
package/src/mcp-tools.js CHANGED
@@ -13,14 +13,19 @@ import path from 'path';
13
13
  import http from 'http';
14
14
 
15
15
  import { loadConfig } from './config.js';
16
- import { waitForPool, getPoolStatus } from './pool.js';
16
+ import { waitForPool, getPoolStatus, connectToPool } from './pool.js';
17
17
  import { runTestsParallel, loadTestFile, loadTestSuite, loadAllSuites, listSuites } from './runner.js';
18
18
  import { generateReport, saveReport, persistRun } from './reporter.js';
19
+ import { narrateTest } from './narrate.js';
19
20
  import { startDashboard, stopDashboard } from './dashboard.js';
20
- import { lookupScreenshotHash } from './db.js';
21
+ import { lookupScreenshotHash, ensureProject, computeScreenshotHash, registerScreenshotHash, getNetworkLogs } from './db.js';
21
22
  import { fetchIssue, checkCliAuth, detectProvider } from './issues.js';
22
23
  import { buildPrompt, hasApiKey } from './ai-generate.js';
23
24
  import { verifyIssue } from './verify.js';
25
+ import { listModules } from './module-resolver.js';
26
+ import { getLearningsSummary, getFlakySummary, getSelectorStability, getPageHealth, getApiHealth, getErrorPatterns, getTestTrends, getRunInsights, getTestHistory, getPageHistory, getSelectorHistory } from './learner-sqlite.js';
27
+ import { queryGraph } from './learner-neo4j.js';
28
+ import { startNeo4j, stopNeo4j, getNeo4jStatus } from './neo4j-pool.js';
24
29
 
25
30
  // ── Tool definitions ──────────────────────────────────────────────────────────
26
31
 
@@ -56,6 +61,10 @@ export const TOOLS = [
56
61
  type: 'number',
57
62
  description: 'Number of retries for failed tests',
58
63
  },
64
+ failOnNetworkError: {
65
+ type: 'boolean',
66
+ description: 'Fail tests when network requests fail (e.g. ERR_CONNECTION_REFUSED). Default: false.',
67
+ },
59
68
  cwd: {
60
69
  type: 'string',
61
70
  description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
@@ -80,7 +89,7 @@ export const TOOLS = [
80
89
  {
81
90
  name: 'e2e_create_test',
82
91
  description:
83
- 'Create a new E2E test JSON file. Provide the suite name and an array of test objects, each with a name and actions array.',
92
+ 'Create a new E2E test JSON file. Provide the suite name and an array of test objects, each with a name and actions array. Actions can include { "$use": "module-name", "params": {...} } to reference reusable modules.',
84
93
  inputSchema: {
85
94
  type: 'object',
86
95
  properties: {
@@ -95,6 +104,7 @@ export const TOOLS = [
95
104
  type: 'object',
96
105
  properties: {
97
106
  name: { type: 'string', description: 'Test name' },
107
+ expect: { type: 'string', description: 'Human-readable description of the expected visual outcome. After the test runs, a verification screenshot is captured and Claude Code judges pass/fail against this description.' },
98
108
  actions: {
99
109
  type: 'array',
100
110
  description: 'Sequential browser actions',
@@ -103,7 +113,7 @@ export const TOOLS = [
103
113
  properties: {
104
114
  type: {
105
115
  type: 'string',
106
- description: 'Action type: goto, click, type, wait, assert_text, assert_url, assert_visible, assert_count, screenshot, select, clear, press, scroll, hover, evaluate, navigate',
116
+ description: 'Action type: goto, click, click_regex, click_option, click_chip, type, type_react, focus_autocomplete, wait, assert_text, assert_element_text, assert_attribute, assert_class, assert_visible, assert_not_visible, assert_input_value, assert_matches, assert_url, assert_count, assert_no_network_errors, get_text, screenshot, select, clear, clear_cookies, press, scroll, hover, evaluate, navigate',
107
117
  },
108
118
  selector: { type: 'string', description: 'CSS selector' },
109
119
  value: { type: 'string', description: 'Value for the action' },
@@ -206,6 +216,62 @@ export const TOOLS = [
206
216
  description:
207
217
  'prompt = return issue + prompt for Claude Code to create tests (default). verify = auto-generate tests via Claude API and run them.',
208
218
  },
219
+ testType: {
220
+ type: 'string',
221
+ enum: ['e2e', 'api'],
222
+ description: "Test category: 'e2e' (default) for UI-driven tests, 'api' for backend API tests",
223
+ },
224
+ authToken: {
225
+ type: 'string',
226
+ description: 'JWT or auth token to inject into localStorage before running tests (for authenticated apps)',
227
+ },
228
+ authStorageKey: {
229
+ type: 'string',
230
+ description: 'localStorage key name for the auth token (default: "accessToken")',
231
+ },
232
+ cwd: {
233
+ type: 'string',
234
+ description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
235
+ },
236
+ },
237
+ required: ['url'],
238
+ },
239
+ },
240
+ {
241
+ name: 'e2e_capture',
242
+ description:
243
+ 'Capture a screenshot of any URL on demand. Connects to the Chrome pool, navigates to the URL, takes a screenshot, and returns the image with its ss:HASH.',
244
+ inputSchema: {
245
+ type: 'object',
246
+ properties: {
247
+ url: {
248
+ type: 'string',
249
+ description: 'Full URL to capture (e.g. "https://example.com" or "http://host.docker.internal:3000/dashboard")',
250
+ },
251
+ filename: {
252
+ type: 'string',
253
+ description: 'Output filename (default: capture-<timestamp>.png)',
254
+ },
255
+ fullPage: {
256
+ type: 'boolean',
257
+ description: 'Capture full scrollable page (default: false)',
258
+ },
259
+ selector: {
260
+ type: 'string',
261
+ description: 'Wait for this CSS selector before capturing',
262
+ },
263
+ delay: {
264
+ type: 'number',
265
+ description: 'Wait N milliseconds after page load before capturing (default: 0)',
266
+ },
267
+ authToken: {
268
+ type: 'string',
269
+ description: 'JWT or auth token to inject into localStorage before navigating (for authenticated pages)',
270
+ },
271
+ authStorageKey: {
272
+ type: 'string',
273
+ description: 'localStorage key name for the auth token (default: "accessToken")',
274
+ },
209
275
  cwd: {
210
276
  type: 'string',
211
277
  description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
@@ -214,6 +280,146 @@ export const TOOLS = [
214
280
  required: ['url'],
215
281
  },
216
282
  },
283
+ {
284
+ name: 'e2e_create_module',
285
+ description:
286
+ 'Create a reusable module for E2E tests. Modules define action sequences that can be referenced from tests via { "$use": "module-name", "params": {...} }. Useful for auth setup, navigation patterns, and other repeated sequences.',
287
+ inputSchema: {
288
+ type: 'object',
289
+ properties: {
290
+ name: {
291
+ type: 'string',
292
+ description: 'Module name (used in $use references, e.g. "auth-jwt", "navigate-patient")',
293
+ },
294
+ description: {
295
+ type: 'string',
296
+ description: 'Human-readable description of what this module does',
297
+ },
298
+ params: {
299
+ type: 'object',
300
+ description: 'Parameter definitions. Each key is a param name, value is { required: boolean, default?: string, description?: string }',
301
+ additionalProperties: {
302
+ type: 'object',
303
+ properties: {
304
+ required: { type: 'boolean' },
305
+ default: { type: 'string' },
306
+ description: { type: 'string' },
307
+ },
308
+ },
309
+ },
310
+ actions: {
311
+ type: 'array',
312
+ description: 'Sequential actions with {{param}} placeholders for substitution',
313
+ items: {
314
+ type: 'object',
315
+ properties: {
316
+ type: { type: 'string', description: 'Action type (goto, click, evaluate, wait, etc.) or omit for $use references' },
317
+ selector: { type: 'string' },
318
+ value: { type: 'string' },
319
+ text: { type: 'string' },
320
+ $use: { type: 'string', description: 'Reference another module by name' },
321
+ params: { type: 'object', description: 'Parameters for nested $use' },
322
+ },
323
+ },
324
+ },
325
+ cwd: {
326
+ type: 'string',
327
+ description: 'Absolute path to the project root directory.',
328
+ },
329
+ },
330
+ required: ['name', 'actions'],
331
+ },
332
+ },
333
+ {
334
+ name: 'e2e_learnings',
335
+ description:
336
+ 'Query the E2E learning system for insights about test stability, flaky tests, selector health, page health, API health, error patterns, and trends. Builds knowledge across runs.',
337
+ inputSchema: {
338
+ type: 'object',
339
+ properties: {
340
+ query: {
341
+ type: 'string',
342
+ description: 'What to query: "summary" (full overview), "flaky" (flaky tests), "selectors" (selector stability), "pages" (page health), "apis" (API health), "errors" (error patterns), "trends" (7-day trend). Drill-down: "test:<name>", "page:<path>", "selector:<value>".',
343
+ },
344
+ days: {
345
+ type: 'number',
346
+ description: 'Analysis window in days (default: 30)',
347
+ },
348
+ cwd: {
349
+ type: 'string',
350
+ description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
351
+ },
352
+ },
353
+ required: ['query'],
354
+ },
355
+ },
356
+ {
357
+ name: 'e2e_neo4j',
358
+ description:
359
+ 'Manage the Neo4j knowledge graph container for E2E learnings. Requires Docker.',
360
+ inputSchema: {
361
+ type: 'object',
362
+ properties: {
363
+ action: {
364
+ type: 'string',
365
+ enum: ['start', 'stop', 'status'],
366
+ description: 'Container lifecycle action',
367
+ },
368
+ cwd: {
369
+ type: 'string',
370
+ description: 'Absolute path to the project root directory.',
371
+ },
372
+ },
373
+ required: ['action'],
374
+ },
375
+ },
376
+ {
377
+ name: 'e2e_network_logs',
378
+ description:
379
+ 'Query network request/response logs for a specific test run. Returns filtered logs from SQLite. Use the runDbId from e2e_run results to drill down into network details on demand.',
380
+ inputSchema: {
381
+ type: 'object',
382
+ properties: {
383
+ runDbId: {
384
+ type: 'number',
385
+ description: 'The run database ID (returned by e2e_run in the summary)',
386
+ },
387
+ testName: {
388
+ type: 'string',
389
+ description: 'Filter by test name',
390
+ },
391
+ method: {
392
+ type: 'string',
393
+ description: 'Filter by HTTP method (GET, POST, etc.)',
394
+ },
395
+ statusMin: {
396
+ type: 'number',
397
+ description: 'Minimum HTTP status code (e.g. 400 for errors only)',
398
+ },
399
+ statusMax: {
400
+ type: 'number',
401
+ description: 'Maximum HTTP status code',
402
+ },
403
+ urlPattern: {
404
+ type: 'string',
405
+ description: 'Regex pattern to match against request URLs',
406
+ },
407
+ errorsOnly: {
408
+ type: 'boolean',
409
+ description: 'Only return requests with status >= 400',
410
+ },
411
+ includeHeaders: {
412
+ type: 'boolean',
413
+ description: 'Include request/response headers (default: false)',
414
+ },
415
+ includeBodies: {
416
+ type: 'boolean',
417
+ description: 'Include request/response bodies (default: false, implies includeHeaders)',
418
+ },
419
+ },
420
+ required: ['runDbId'],
421
+ },
422
+ },
217
423
  ];
218
424
 
219
425
  /** Tools exposed on the dashboard — excludes dashboard start/stop (already running). */
@@ -262,21 +468,23 @@ async function handleRun(args) {
262
468
  if (args.concurrency) configOverrides.concurrency = args.concurrency;
263
469
  if (args.baseUrl) configOverrides.baseUrl = args.baseUrl;
264
470
  if (args.retries !== undefined) configOverrides.retries = args.retries;
471
+ if (args.failOnNetworkError !== undefined) configOverrides.failOnNetworkError = args.failOnNetworkError;
265
472
 
266
473
  const config = await loadConfig(configOverrides, args.cwd);
474
+ config.triggeredBy = 'mcp';
267
475
 
268
476
  await waitForPool(config.poolUrl);
269
477
 
270
478
  let tests, hooks;
271
479
 
272
480
  if (args.all) {
273
- ({ tests, hooks } = loadAllSuites(config.testsDir));
481
+ ({ tests, hooks } = loadAllSuites(config.testsDir, config.modulesDir, config.exclude));
274
482
  } else if (args.suite) {
275
- ({ tests, hooks } = loadTestSuite(args.suite, config.testsDir));
483
+ ({ tests, hooks } = loadTestSuite(args.suite, config.testsDir, config.modulesDir));
276
484
  } else if (args.file) {
277
485
  const cwd = args.cwd || process.cwd();
278
486
  const filePath = path.isAbsolute(args.file) ? args.file : path.resolve(cwd, args.file);
279
- ({ tests, hooks } = loadTestFile(filePath));
487
+ ({ tests, hooks } = loadTestFile(filePath, config.modulesDir));
280
488
  } else {
281
489
  return errorResult('Provide one of: all (true), suite (name), or file (path)');
282
490
  }
@@ -298,7 +506,7 @@ async function handleRun(args) {
298
506
 
299
507
  const report = generateReport(results);
300
508
  saveReport(report, config.screenshotsDir, config);
301
- persistRun(report, config, args.suite || null);
509
+ const { runDbId } = persistRun(report, config, args.suite || null);
302
510
 
303
511
  const failures = report.results
304
512
  .filter(r => !r.success)
@@ -316,6 +524,7 @@ async function handleRun(args) {
316
524
  ...report.summary,
317
525
  reportPath: path.join(config.screenshotsDir, 'report.json'),
318
526
  };
527
+ if (runDbId) summary.runDbId = runDbId;
319
528
 
320
529
  const consoleErrors = report.results
321
530
  .filter(r => r.consoleLogs?.some(l => l.type === 'error' || l.type === 'warning'))
@@ -324,10 +533,93 @@ async function handleRun(args) {
324
533
  .filter(r => r.networkErrors?.length > 0)
325
534
  .map(r => ({ name: r.name, errors: r.networkErrors }));
326
535
 
536
+ // Compact network summary — full logs available on-demand via e2e_network_logs
537
+ const networkSummary = report.results
538
+ .filter(r => r.networkLogs?.length > 0)
539
+ .map(r => {
540
+ const logs = r.networkLogs;
541
+ const statusDist = { '2xx': 0, '3xx': 0, '4xx': 0, '5xx': 0, other: 0 };
542
+ let totalDuration = 0;
543
+ for (const l of logs) {
544
+ const s = l.status;
545
+ if (s >= 200 && s < 300) statusDist['2xx']++;
546
+ else if (s >= 300 && s < 400) statusDist['3xx']++;
547
+ else if (s >= 400 && s < 500) statusDist['4xx']++;
548
+ else if (s >= 500 && s < 600) statusDist['5xx']++;
549
+ else statusDist.other++;
550
+ totalDuration += l.duration || 0;
551
+ }
552
+ const failed = logs.filter(l => l.status >= 400).map(l => ({ url: l.url, method: l.method, status: l.status }));
553
+ const slowest = [...logs].sort((a, b) => (b.duration || 0) - (a.duration || 0)).slice(0, 3).map(l => ({ url: l.url, method: l.method, status: l.status, duration: l.duration }));
554
+ return {
555
+ name: r.name,
556
+ totalRequests: logs.length,
557
+ statusDistribution: statusDist,
558
+ avgDurationMs: logs.length > 0 ? Math.round(totalDuration / logs.length) : 0,
559
+ failedRequests: failed,
560
+ slowestRequests: slowest,
561
+ };
562
+ });
563
+
564
+ const verifications = report.results
565
+ .filter(r => r.expect && r.verificationScreenshot)
566
+ .map(r => ({
567
+ name: r.name,
568
+ expect: r.expect,
569
+ success: r.success,
570
+ screenshotHash: 'ss:' + computeScreenshotHash(r.verificationScreenshot),
571
+ }));
572
+
327
573
  if (flaky.length > 0) summary.flaky = flaky;
328
574
  if (failures.length > 0) summary.failures = failures;
329
575
  if (consoleErrors.length > 0) summary.consoleErrors = consoleErrors;
330
- if (networkErrors.length > 0) summary.networkErrors = networkErrors;
576
+ if (networkErrors.length > 0) {
577
+ summary.networkErrors = networkErrors;
578
+ // Warn when tests pass but have network errors and failOnNetworkError is off
579
+ if (!config.failOnNetworkError) {
580
+ const totalNetErrors = networkErrors.reduce((sum, r) => sum + r.errors.length, 0);
581
+ const passingWithErrors = networkErrors.filter(r => report.results.find(rr => rr.name === r.name)?.success).length;
582
+ if (passingWithErrors > 0) {
583
+ summary.networkWarning = `⚠️ ${passingWithErrors} test(s) PASSED but had ${totalNetErrors} network error(s). Set failOnNetworkError: true to fail these tests.`;
584
+ }
585
+ }
586
+ }
587
+ if (networkSummary.length > 0) {
588
+ summary.networkSummary = networkSummary;
589
+ if (runDbId) summary.networkLogsHint = 'Full network logs available via e2e_network_logs tool using the runDbId above.';
590
+ }
591
+ if (verifications.length > 0) {
592
+ summary.verifications = verifications;
593
+ summary.verificationInstructions = 'For each verification, call e2e_screenshot with the screenshotHash to view the screenshot. Then compare what you see against the "expect" description. Report any mismatches as FAIL.';
594
+ }
595
+
596
+ // Build per-test narrative: a step-by-step human-readable story of what happened
597
+ const narratives = report.results.map(r => ({
598
+ name: r.name,
599
+ status: r.success ? 'PASSED' : 'FAILED',
600
+ steps: narrateTest(r),
601
+ }));
602
+ if (narratives.length > 0) summary.narratives = narratives;
603
+
604
+ // Enrich with learning insights (fire-and-forget — never fails the response)
605
+ if (config.learningsEnabled !== false) {
606
+ try {
607
+ const projectId = ensureProject(config._cwd, config.projectName, config.screenshotsDir, config.testsDir);
608
+ const insights = getRunInsights(projectId, report);
609
+ if (insights.length > 0) {
610
+ summary.learnings = {
611
+ insights,
612
+ tip: insights.find(i => i.type === 'new-failure')
613
+ ? 'New test failure detected — this test was previously stable. Check recent code changes.'
614
+ : insights.find(i => i.type === 'unstable-selectors')
615
+ ? 'Unstable selectors detected in this run. Consider using more specific selectors or data-testid attributes.'
616
+ : insights.find(i => i.type === 'flaky')
617
+ ? 'Known flaky tests in this run. Consider increasing timeouts or adding waits.'
618
+ : null,
619
+ };
620
+ }
621
+ } catch { /* never fail the run response */ }
622
+ }
331
623
 
332
624
  return textResult(JSON.stringify(summary, null, 2));
333
625
  }
@@ -336,13 +628,26 @@ async function handleList(args) {
336
628
  const config = await loadConfig({}, args.cwd);
337
629
  const suites = listSuites(config.testsDir);
338
630
 
631
+ const lines = [];
632
+
339
633
  if (suites.length === 0) {
340
- return textResult('No test suites found in ' + config.testsDir);
634
+ lines.push('No test suites found in ' + config.testsDir);
635
+ } else {
636
+ lines.push(...suites.map(s =>
637
+ `${s.name} (${s.testCount} tests): ${s.tests.join(', ')}`
638
+ ));
341
639
  }
342
640
 
343
- const lines = suites.map(s =>
344
- `${s.name} (${s.testCount} tests): ${s.tests.join(', ')}`
345
- );
641
+ // List available modules
642
+ const modules = listModules(config.modulesDir);
643
+ if (modules.length > 0) {
644
+ lines.push('');
645
+ lines.push('Available modules:');
646
+ for (const mod of modules) {
647
+ const paramNames = mod.params.map(p => p.required ? p.name : `${p.name}?`).join(', ');
648
+ lines.push(` ${mod.name} (${paramNames}) — ${mod.description || mod.file}`);
649
+ }
650
+ }
346
651
 
347
652
  return textResult(lines.join('\n'));
348
653
  }
@@ -370,7 +675,22 @@ async function handleCreateTest(args) {
370
675
  }
371
676
 
372
677
  fs.writeFileSync(filePath, JSON.stringify(content, null, 2) + '\n');
373
- return textResult(`Created test file: ${filePath}\n\n${args.tests.length} test(s) defined.`);
678
+
679
+ // Warn about beforeAll pitfall
680
+ let warning = '';
681
+ const beforeAll = args.hooks?.beforeAll;
682
+ if (beforeAll?.length) {
683
+ const stateActions = beforeAll.filter(a =>
684
+ ['evaluate', 'goto', 'navigate', 'clear_cookies', 'type', 'click', 'select'].includes(a.type)
685
+ );
686
+ if (stateActions.length > 0) {
687
+ warning = '\n\n⚠️ Warning: beforeAll runs on a separate browser page that is closed before tests start. ' +
688
+ 'Actions that set browser state (evaluate, goto, cookies, etc.) will NOT carry over to individual tests. ' +
689
+ 'Use beforeEach instead if tests need this setup.';
690
+ }
691
+ }
692
+
693
+ return textResult(`Created test file: ${filePath}\n\n${args.tests.length} test(s) defined.${warning}`);
374
694
  }
375
695
 
376
696
  async function handlePoolStatus(args) {
@@ -409,9 +729,16 @@ async function handleScreenshot(args) {
409
729
  const filename = path.basename(row.file_path);
410
730
  const hash = row.hash;
411
731
 
732
+ // Build description with metadata if available
733
+ const metaParts = [`Screenshot ss:${hash} (${filename})`];
734
+ if (row.test_name) metaParts.push(`Test: ${row.test_name}`);
735
+ if (row.screenshot_type) metaParts.push(`Type: ${row.screenshot_type}`);
736
+ if (row.step_index != null) metaParts.push(`Step: ${row.step_index}`);
737
+ if (row.page_url) metaParts.push(`URL: ${row.page_url}`);
738
+
412
739
  return {
413
740
  content: [
414
- { type: 'text', text: `Screenshot ss:${hash} (${filename})` },
741
+ { type: 'text', text: metaParts.join('\n') },
415
742
  { type: 'image', data: base64, mimeType },
416
743
  ],
417
744
  };
@@ -421,6 +748,7 @@ async function handleIssue(args) {
421
748
  if (!args.url) return errorResult('Missing required parameter: url');
422
749
 
423
750
  const mode = args.mode || 'prompt';
751
+ const testType = args.testType || 'e2e';
424
752
  const config = await loadConfig({}, args.cwd);
425
753
 
426
754
  // Check provider and auth
@@ -441,6 +769,10 @@ async function handleIssue(args) {
441
769
  return errorResult('ANTHROPIC_API_KEY is required for verify mode. Set it as an environment variable.');
442
770
  }
443
771
 
772
+ if (args.authToken) config.authToken = args.authToken;
773
+ if (args.authStorageKey) config.authStorageKey = args.authStorageKey;
774
+ config.testType = testType;
775
+
444
776
  const result = await verifyIssue(args.url, config);
445
777
  const status = result.bugConfirmed ? 'BUG CONFIRMED' : 'NOT REPRODUCIBLE';
446
778
  const summary = {
@@ -462,11 +794,110 @@ async function handleIssue(args) {
462
794
 
463
795
  // Default: prompt mode
464
796
  const issue = fetchIssue(args.url);
465
- const promptData = buildPrompt(issue, config);
797
+ const promptData = buildPrompt(issue, config, testType);
466
798
 
467
799
  return textResult(promptData.prompt);
468
800
  }
469
801
 
802
+ async function handleCreateModule(args) {
803
+ const config = await loadConfig({}, args.cwd);
804
+
805
+ if (!config.modulesDir) {
806
+ return errorResult('modulesDir not configured');
807
+ }
808
+
809
+ if (!fs.existsSync(config.modulesDir)) {
810
+ fs.mkdirSync(config.modulesDir, { recursive: true });
811
+ }
812
+
813
+ const safeName = path.basename(args.name);
814
+ const filename = safeName.endsWith('.json') ? safeName : `${safeName}.json`;
815
+ const filePath = path.join(config.modulesDir, filename);
816
+
817
+ if (fs.existsSync(filePath)) {
818
+ return errorResult(`Module file already exists: ${filePath}`);
819
+ }
820
+
821
+ const module = {
822
+ $module: args.name,
823
+ description: args.description || '',
824
+ params: args.params || {},
825
+ actions: args.actions,
826
+ };
827
+
828
+ fs.writeFileSync(filePath, JSON.stringify(module, null, 2) + '\n');
829
+
830
+ const paramNames = Object.keys(args.params || {});
831
+ return textResult(`Created module: ${filePath}\n\nName: ${args.name}\nParams: ${paramNames.length ? paramNames.join(', ') : 'none'}\nActions: ${args.actions.length}\n\nUsage in tests: { "$use": "${args.name}", "params": { ... } }`);
832
+ }
833
+
834
+ async function handleCapture(args) {
835
+ if (!args.url) return errorResult('Missing required parameter: url');
836
+
837
+ const config = await loadConfig({}, args.cwd);
838
+
839
+ await waitForPool(config.poolUrl);
840
+
841
+ let browser;
842
+ try {
843
+ browser = await connectToPool(config.poolUrl);
844
+ const page = await browser.newPage();
845
+ await page.setViewport(config.viewport);
846
+
847
+ // Inject auth token into localStorage before navigation
848
+ const authToken = args.authToken || config.authToken;
849
+ if (authToken) {
850
+ const storageKey = args.authStorageKey || config.authStorageKey || 'accessToken';
851
+ // Navigate to origin first so localStorage is accessible
852
+ const origin = new URL(args.url).origin;
853
+ await page.goto(origin, { waitUntil: 'domcontentloaded', timeout: 15000 });
854
+ await page.evaluate((key, token) => { localStorage.setItem(key, token); }, storageKey, authToken);
855
+ }
856
+
857
+ await page.goto(args.url, { waitUntil: 'networkidle2', timeout: 30000 });
858
+
859
+ if (args.selector) {
860
+ await page.waitForSelector(args.selector, { timeout: 10000 });
861
+ }
862
+
863
+ if (args.delay && args.delay > 0) {
864
+ await new Promise(r => setTimeout(r, args.delay));
865
+ }
866
+
867
+ // Build filename: sanitize and ensure .png
868
+ let filename = args.filename || `capture-${Date.now()}.png`;
869
+ filename = path.basename(filename);
870
+ if (!filename.endsWith('.png')) filename += '.png';
871
+
872
+ if (!fs.existsSync(config.screenshotsDir)) {
873
+ fs.mkdirSync(config.screenshotsDir, { recursive: true });
874
+ }
875
+
876
+ const screenshotPath = path.join(config.screenshotsDir, filename);
877
+ await page.screenshot({ path: screenshotPath, fullPage: !!args.fullPage });
878
+
879
+ // Register hash in SQLite
880
+ const cwd = args.cwd || process.cwd();
881
+ const projectName = config.projectName || path.basename(cwd);
882
+ const projectId = ensureProject(cwd, projectName, config.screenshotsDir, config.testsDir);
883
+ const hash = computeScreenshotHash(screenshotPath);
884
+ registerScreenshotHash(hash, screenshotPath, projectId, null);
885
+
886
+ // Read image for response
887
+ const data = fs.readFileSync(screenshotPath);
888
+ const base64 = data.toString('base64');
889
+
890
+ return {
891
+ content: [
892
+ { type: 'text', text: `Screenshot saved: ${screenshotPath}\nHash: ss:${hash}` },
893
+ { type: 'image', data: base64, mimeType: 'image/png' },
894
+ ],
895
+ };
896
+ } finally {
897
+ if (browser) browser.disconnect();
898
+ }
899
+ }
900
+
470
901
  // Module-level state for stdio path only
471
902
  let dashboardHandle = null;
472
903
 
@@ -490,6 +921,124 @@ async function handleDashboardStop() {
490
921
  return textResult('Dashboard stopped');
491
922
  }
492
923
 
924
+ async function handleNeo4j(args) {
925
+ if (!args.action) return errorResult('Missing required parameter: action');
926
+
927
+ const config = await loadConfig({}, args.cwd);
928
+
929
+ switch (args.action) {
930
+ case 'start':
931
+ try {
932
+ startNeo4j(config, args.cwd);
933
+ return textResult(`Neo4j started. Bolt: bolt://localhost:${config.neo4jBoltPort || 7687}, Browser: http://localhost:${config.neo4jHttpPort || 7474}`);
934
+ } catch (err) {
935
+ return errorResult(`Failed to start Neo4j: ${err.message}`);
936
+ }
937
+ case 'stop':
938
+ try {
939
+ stopNeo4j(config, args.cwd);
940
+ return textResult('Neo4j stopped');
941
+ } catch (err) {
942
+ return errorResult(`Failed to stop Neo4j: ${err.message}`);
943
+ }
944
+ case 'status': {
945
+ const status = getNeo4jStatus(config, args.cwd);
946
+ const lines = [
947
+ `Running: ${status.running ? 'yes' : 'no'}`,
948
+ ];
949
+ if (status.running) {
950
+ lines.push(`Bolt: bolt://localhost:${status.boltPort}`);
951
+ lines.push(`Browser: http://localhost:${status.httpPort}`);
952
+ }
953
+ if (status.error) lines.push(`Error: ${status.error}`);
954
+ return textResult(lines.join('\n'));
955
+ }
956
+ default:
957
+ return errorResult('Unknown action. Use: start, stop, status');
958
+ }
959
+ }
960
+
961
+ async function handleLearnings(args) {
962
+ if (!args.query) return errorResult('Missing required parameter: query');
963
+
964
+ const config = await loadConfig({}, args.cwd);
965
+ const days = Math.min(Math.max(parseInt(args.days || config.learningsDays || 30, 10) || 30, 1), 365);
966
+ const projectId = ensureProject(config._cwd, config.projectName, config.screenshotsDir, config.testsDir);
967
+
968
+ const query = args.query.trim().toLowerCase();
969
+
970
+ // Drill-down queries (enriched with graph data when Neo4j is available)
971
+ if (query.startsWith('test:')) {
972
+ const testName = args.query.slice(5).trim();
973
+ const history = getTestHistory(projectId, testName, days);
974
+ const result = { query: args.query, testName, history };
975
+ const graphDeps = await queryGraph(config, 'test-dependencies', { testName }).catch(() => null);
976
+ if (graphDeps) result.relatedTests = graphDeps;
977
+ return textResult(JSON.stringify(result, null, 2));
978
+ }
979
+ if (query.startsWith('page:')) {
980
+ const urlPath = args.query.slice(5).trim();
981
+ const history = getPageHistory(projectId, urlPath, days);
982
+ const result = { query: args.query, urlPath, history };
983
+ const graphImpact = await queryGraph(config, 'page-impact', { path: urlPath }).catch(() => null);
984
+ if (graphImpact) result.affectedTests = graphImpact;
985
+ return textResult(JSON.stringify(result, null, 2));
986
+ }
987
+ if (query.startsWith('selector:')) {
988
+ const selector = args.query.slice(9).trim();
989
+ const history = getSelectorHistory(projectId, selector, days);
990
+ const result = { query: args.query, selector, history };
991
+ const graphUsage = await queryGraph(config, 'selector-usage', { selector }).catch(() => null);
992
+ if (graphUsage) result.usage = graphUsage;
993
+ return textResult(JSON.stringify(result, null, 2));
994
+ }
995
+
996
+ // Category queries
997
+ switch (query) {
998
+ case 'summary': {
999
+ const summary = getLearningsSummary(projectId);
1000
+ const trendsResult = getTestTrends(projectId, 7);
1001
+ return textResult(JSON.stringify({ ...summary, recentTrend: trendsResult }, null, 2));
1002
+ }
1003
+ case 'flaky':
1004
+ return textResult(JSON.stringify(getFlakySummary(projectId, days), null, 2));
1005
+ case 'selectors':
1006
+ return textResult(JSON.stringify(getSelectorStability(projectId, days), null, 2));
1007
+ case 'pages':
1008
+ return textResult(JSON.stringify(getPageHealth(projectId, days), null, 2));
1009
+ case 'apis':
1010
+ return textResult(JSON.stringify(getApiHealth(projectId, days), null, 2));
1011
+ case 'errors':
1012
+ return textResult(JSON.stringify(getErrorPatterns(projectId), null, 2));
1013
+ case 'trends':
1014
+ return textResult(JSON.stringify(getTestTrends(projectId, days), null, 2));
1015
+ default:
1016
+ return errorResult(`Unknown query: "${args.query}". Use: summary, flaky, selectors, pages, apis, errors, trends, test:<name>, page:<path>, selector:<value>`);
1017
+ }
1018
+ }
1019
+
1020
+ async function handleNetworkLogs(args) {
1021
+ if (!args.runDbId) return errorResult('Missing required parameter: runDbId');
1022
+
1023
+ const filters = {};
1024
+ if (args.testName) filters.testName = args.testName;
1025
+ if (args.method) filters.method = args.method;
1026
+ if (args.statusMin !== undefined) filters.statusMin = args.statusMin;
1027
+ if (args.statusMax !== undefined) filters.statusMax = args.statusMax;
1028
+ if (args.urlPattern) filters.urlPattern = args.urlPattern;
1029
+ if (args.errorsOnly) filters.errorsOnly = true;
1030
+ if (args.includeHeaders) filters.includeHeaders = true;
1031
+ if (args.includeBodies) filters.includeBodies = true;
1032
+
1033
+ const results = getNetworkLogs(args.runDbId, filters);
1034
+
1035
+ if (results.length === 0) {
1036
+ return textResult('No network logs found for the given filters.');
1037
+ }
1038
+
1039
+ return textResult(JSON.stringify(results, null, 2));
1040
+ }
1041
+
493
1042
  // ── Helpers ───────────────────────────────────────────────────────────────────
494
1043
 
495
1044
  export function textResult(text) {
@@ -521,6 +1070,16 @@ export async function dispatchTool(name, args = {}) {
521
1070
  return await handleDashboardStop();
522
1071
  case 'e2e_issue':
523
1072
  return await handleIssue(args);
1073
+ case 'e2e_create_module':
1074
+ return await handleCreateModule(args);
1075
+ case 'e2e_capture':
1076
+ return await handleCapture(args);
1077
+ case 'e2e_learnings':
1078
+ return await handleLearnings(args);
1079
+ case 'e2e_neo4j':
1080
+ return await handleNeo4j(args);
1081
+ case 'e2e_network_logs':
1082
+ return await handleNetworkLogs(args);
524
1083
  default:
525
1084
  return errorResult(`Unknown tool: ${name}`);
526
1085
  }