@matware/e2e-runner 1.2.1 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/.claude-plugin/marketplace.json +52 -0
  2. package/.claude-plugin/plugin.json +17 -3
  3. package/.mcp.json +2 -2
  4. package/.opencode/commands/create-test.md +63 -0
  5. package/.opencode/commands/run.md +50 -0
  6. package/.opencode/commands/verify-issue.md +62 -0
  7. package/.opencode/skills/e2e-testing/SKILL.md +181 -0
  8. package/.opencode/skills/e2e-testing/references/action-types.md +143 -0
  9. package/.opencode/skills/e2e-testing/references/auth-strategies.md +91 -0
  10. package/.opencode/skills/e2e-testing/references/graphql.md +59 -0
  11. package/.opencode/skills/e2e-testing/references/issue-verification.md +59 -0
  12. package/.opencode/skills/e2e-testing/references/multi-pool.md +60 -0
  13. package/.opencode/skills/e2e-testing/references/network-debugging.md +62 -0
  14. package/.opencode/skills/e2e-testing/references/test-json-format.md +163 -0
  15. package/.opencode/skills/e2e-testing/references/troubleshooting.md +224 -0
  16. package/.opencode/skills/e2e-testing/references/variables.md +41 -0
  17. package/.opencode/skills/e2e-testing/references/visual-verification.md +89 -0
  18. package/LICENSE +190 -0
  19. package/OPENCODE.md +166 -0
  20. package/README.md +165 -104
  21. package/agents/test-creator.md +54 -1
  22. package/agents/test-improver.md +37 -0
  23. package/bin/cli.js +409 -16
  24. package/commands/capture.md +45 -0
  25. package/commands/create-test.md +16 -1
  26. package/opencode.json +11 -0
  27. package/package.json +7 -2
  28. package/scripts/setup-opencode.sh +113 -0
  29. package/skills/e2e-testing/SKILL.md +10 -3
  30. package/skills/e2e-testing/references/action-types.md +48 -5
  31. package/skills/e2e-testing/references/auth-strategies.md +91 -0
  32. package/skills/e2e-testing/references/graphql.md +59 -0
  33. package/skills/e2e-testing/references/issue-verification.md +59 -0
  34. package/skills/e2e-testing/references/multi-pool.md +60 -0
  35. package/skills/e2e-testing/references/network-debugging.md +62 -0
  36. package/skills/e2e-testing/references/test-json-format.md +4 -0
  37. package/skills/e2e-testing/references/troubleshooting.md +44 -2
  38. package/skills/e2e-testing/references/variables.md +41 -0
  39. package/skills/e2e-testing/references/visual-verification.md +89 -0
  40. package/src/actions.js +475 -2
  41. package/src/ai-generate.js +139 -8
  42. package/src/app-pool.js +339 -0
  43. package/src/config.js +266 -5
  44. package/src/dashboard.js +216 -17
  45. package/src/db.js +191 -7
  46. package/src/index.js +12 -9
  47. package/src/learner-sqlite.js +458 -0
  48. package/src/learner.js +78 -6
  49. package/src/mcp-tools.js +1348 -51
  50. package/src/module-resolver.js +37 -0
  51. package/src/narrate.js +65 -0
  52. package/src/pool-manager.js +229 -0
  53. package/src/pool.js +301 -31
  54. package/src/reporter.js +86 -2
  55. package/src/runner.js +480 -71
  56. package/src/sync/auth.js +354 -0
  57. package/src/sync/client.js +572 -0
  58. package/src/sync/hub-routes.js +816 -0
  59. package/src/sync/index.js +68 -0
  60. package/src/sync/middleware.js +347 -0
  61. package/src/sync/queue.js +209 -0
  62. package/src/sync/schema.js +540 -0
  63. package/src/verify.js +10 -7
  64. package/src/visual-diff.js +446 -0
  65. package/src/watch.js +384 -0
  66. package/templates/build-dashboard.js +47 -6
  67. package/templates/dashboard/js/api.js +62 -0
  68. package/templates/dashboard/js/init.js +13 -0
  69. package/templates/dashboard/js/keyboard.js +46 -0
  70. package/templates/dashboard/js/state.js +40 -0
  71. package/templates/dashboard/js/toast.js +41 -0
  72. package/templates/dashboard/js/utils.js +216 -0
  73. package/templates/dashboard/js/view-live.js +181 -0
  74. package/templates/dashboard/js/view-runs.js +676 -0
  75. package/templates/dashboard/js/view-tests.js +294 -0
  76. package/templates/dashboard/js/view-watch.js +242 -0
  77. package/templates/dashboard/js/websocket.js +116 -0
  78. package/templates/dashboard/styles/base.css +69 -0
  79. package/templates/dashboard/styles/components.css +117 -0
  80. package/templates/dashboard/styles/view-live.css +97 -0
  81. package/templates/dashboard/styles/view-runs.css +243 -0
  82. package/templates/dashboard/styles/view-tests.css +96 -0
  83. package/templates/dashboard/styles/view-watch.css +53 -0
  84. package/templates/dashboard/template.html +181 -100
  85. package/templates/dashboard.html +1614 -547
  86. package/templates/sample-test.json +0 -8
  87. package/templates/dashboard/app.js +0 -1152
  88. package/templates/dashboard/styles.css +0 -413
package/src/index.js CHANGED
@@ -8,23 +8,26 @@
8
8
  */
9
9
 
10
10
  export { loadConfig } from './config.js';
11
- export { waitForPool, connectToPool, startPool, stopPool, restartPool, getPoolStatus } from './pool.js';
11
+ export { waitForPool, connectToPool, disconnectFromPool, startPool, stopPool, restartPool, getPoolStatus, clearDriverCache, getCachedDriver, trackCdpSession, releaseCdpSession, releaseSteelSession } from './pool.js';
12
+ export { getPoolUrls, getAllPoolStatuses, getAggregatedPoolStatus, waitForAnyPool, selectPool, selectAndConnect } from './pool-manager.js';
12
13
  export { executeAction } from './actions.js';
13
- export { runTest, runTestsParallel, loadTestFile, loadTestSuite, loadAllSuites, listSuites } from './runner.js';
14
+ export { runTest, runTestsParallel, loadTestFile, loadTestSuite, loadAllSuites, listSuites, fetchAuthToken } from './runner.js';
14
15
  export { generateReport, generateJUnitXML, saveReport, printReport, saveHistory, loadHistory, loadHistoryRun } from './reporter.js';
15
16
  export { startDashboard, stopDashboard } from './dashboard.js';
16
17
  export { fetchIssue, parseIssueUrl, detectProvider, checkCliAuth } from './issues.js';
17
18
  export { buildPrompt, generateTests, hasApiKey } from './ai-generate.js';
18
19
  export { verifyIssue } from './verify.js';
19
20
  export { resolveTestData, loadModuleRegistry, listModules } from './module-resolver.js';
20
- export { learnFromRun, categorizeError } from './learner.js';
21
- export { getLearningsSummary, getFlakySummary, getSelectorStability, getPageHealth, getApiHealth, getErrorPatterns, getTestTrends, getRunInsights } from './learner-sqlite.js';
21
+ export { learnFromRun, categorizeError, isInfraError, INFRA_CATEGORIES } from './learner.js';
22
+ export { getLearningsSummary, getFlakySummary, getSelectorStability, getPageHealth, getApiHealth, getErrorPatterns, getTestTrends, getRunInsights, getTestCreationContext, generateImprovements, getActionHealthScores } from './learner-sqlite.js';
22
23
  export { generateLearningsMarkdown } from './learner-markdown.js';
23
24
  export { writeToGraph, queryGraph, closeNeo4j } from './learner-neo4j.js';
24
25
  export { startNeo4j, stopNeo4j, getNeo4jStatus } from './neo4j-pool.js';
26
+ export { forkAppInstance, destroyFork, destroyAllForks, getAppPoolStatus, isAppPoolEnabled } from './app-pool.js';
27
+ export { compareImages, assertVisualMatch } from './visual-diff.js';
25
28
 
26
29
  import { loadConfig } from './config.js';
27
- import { waitForPool } from './pool.js';
30
+ import { waitForAnyPool, getPoolUrls } from './pool-manager.js';
28
31
  import { runTestsParallel, loadTestFile, loadTestSuite, loadAllSuites } from './runner.js';
29
32
  import { generateReport, saveReport, printReport } from './reporter.js';
30
33
 
@@ -41,7 +44,7 @@ export async function createRunner(userConfig = {}) {
41
44
 
42
45
  /** Runs all test suites from the tests directory */
43
46
  async runAll() {
44
- await waitForPool(config.poolUrl);
47
+ await waitForAnyPool(getPoolUrls(config));
45
48
  const { tests, hooks } = loadAllSuites(config.testsDir, config.modulesDir, config.exclude);
46
49
  const results = await runTestsParallel(tests, config, hooks);
47
50
  const report = generateReport(results);
@@ -52,7 +55,7 @@ export async function createRunner(userConfig = {}) {
52
55
 
53
56
  /** Runs a single suite by name */
54
57
  async runSuite(name) {
55
- await waitForPool(config.poolUrl);
58
+ await waitForAnyPool(getPoolUrls(config));
56
59
  const { tests, hooks } = loadTestSuite(name, config.testsDir, config.modulesDir);
57
60
  const results = await runTestsParallel(tests, config, hooks);
58
61
  const report = generateReport(results);
@@ -63,7 +66,7 @@ export async function createRunner(userConfig = {}) {
63
66
 
64
67
  /** Runs an array of test objects */
65
68
  async runTests(tests) {
66
- await waitForPool(config.poolUrl);
69
+ await waitForAnyPool(getPoolUrls(config));
67
70
  const results = await runTestsParallel(tests, config);
68
71
  const report = generateReport(results);
69
72
  saveReport(report, config.screenshotsDir, config);
@@ -73,7 +76,7 @@ export async function createRunner(userConfig = {}) {
73
76
 
74
77
  /** Runs tests from a JSON file path */
75
78
  async runFile(filePath) {
76
- await waitForPool(config.poolUrl);
79
+ await waitForAnyPool(getPoolUrls(config));
77
80
  const { tests, hooks } = loadTestFile(filePath, config.modulesDir);
78
81
  const results = await runTestsParallel(tests, config, hooks);
79
82
  const report = generateReport(results);
@@ -293,9 +293,160 @@ export function getRunInsights(projectId, report) {
293
293
  }
294
294
  }
295
295
 
296
+ // ── At-Least-One Guarantee: generate positive insights if none exist ──
297
+ if (insights.length === 0 && report.results.length > 0) {
298
+ const allPassed = report.results.every(r => r.success);
299
+
300
+ // Green streak detection
301
+ if (allPassed) {
302
+ const recentRuns = d.prepare(`
303
+ SELECT run_id, MIN(success) AS all_passed
304
+ FROM test_learnings
305
+ WHERE project_id = ?
306
+ GROUP BY run_id
307
+ ORDER BY created_at DESC
308
+ LIMIT 10
309
+ `).all(projectId);
310
+ const streak = recentRuns.findIndex(r => r.all_passed === 0);
311
+ const streakCount = streak === -1 ? recentRuns.length : streak;
312
+ if (streakCount >= 3) {
313
+ insights.push({
314
+ type: 'green-streak',
315
+ streak: streakCount,
316
+ message: `${streakCount}-run green streak — suite is stable.`,
317
+ });
318
+ }
319
+ }
320
+
321
+ // New tests (no historical data)
322
+ const newTests = report.results.filter(r => {
323
+ const h = d.prepare('SELECT COUNT(*) AS c FROM test_learnings WHERE project_id = ? AND test_name = ?').get(projectId, r.name);
324
+ return !h || h.c <= 1; // <= 1 because current run may already be written
325
+ });
326
+ if (newTests.length > 0) {
327
+ insights.push({
328
+ type: 'new-tests',
329
+ tests: newTests.map(t => t.name),
330
+ message: `${newTests.length} new test(s): ${newTests.map(t => t.name).slice(0, 3).join(', ')}${newTests.length > 3 ? '...' : ''}`,
331
+ });
332
+ }
333
+
334
+ // Pass rate improvement vs 7-day average
335
+ const avg7d = d.prepare(`
336
+ SELECT ROUND(AVG(CASE WHEN success = 1 THEN 100.0 ELSE 0.0 END), 1) AS pass_rate
337
+ FROM test_learnings
338
+ WHERE project_id = ? AND created_at >= datetime('now', '-7 days')
339
+ `).get(projectId);
340
+ const thisRunPassRate = Math.round((report.results.filter(r => r.success).length / report.results.length) * 1000) / 10;
341
+ if (avg7d?.pass_rate && thisRunPassRate > avg7d.pass_rate + 5) {
342
+ insights.push({
343
+ type: 'improved-pass-rate',
344
+ message: `Pass rate improved: ${thisRunPassRate}% this run vs ${avg7d.pass_rate}% 7-day average.`,
345
+ });
346
+ }
347
+
348
+ // Performance comparison
349
+ const avgDuration = d.prepare(`
350
+ SELECT ROUND(AVG(duration_ms)) AS avg_ms
351
+ FROM test_learnings
352
+ WHERE project_id = ? AND duration_ms IS NOT NULL AND created_at >= datetime('now', '-30 days')
353
+ `).get(projectId);
354
+ if (avgDuration?.avg_ms && report.results.length > 0) {
355
+ const thisAvg = report.results.reduce((s, r) => {
356
+ const ms = (r.endTime && r.startTime) ? new Date(r.endTime) - new Date(r.startTime) : 0;
357
+ return s + ms;
358
+ }, 0) / report.results.length;
359
+ const delta = Math.round(((thisAvg - avgDuration.avg_ms) / avgDuration.avg_ms) * 100);
360
+ if (Math.abs(delta) > 15) {
361
+ insights.push({
362
+ type: 'performance',
363
+ message: delta < 0
364
+ ? `This run was ${Math.abs(delta)}% faster than the 30-day average.`
365
+ : `This run was ${delta}% slower than the 30-day average — check for new slow pages.`,
366
+ });
367
+ }
368
+ }
369
+
370
+ // Stable selectors confirmed
371
+ if (allPassed) {
372
+ const usedSelectors = new Set();
373
+ for (const r of report.results) {
374
+ if (!r.actions) continue;
375
+ for (const a of r.actions) {
376
+ if (a.selector) usedSelectors.add(a.selector);
377
+ }
378
+ }
379
+ if (usedSelectors.size > 0) {
380
+ const stableCount = d.prepare(`
381
+ SELECT COUNT(DISTINCT selector) AS c
382
+ FROM selector_learnings
383
+ WHERE project_id = ? AND selector IN (${[...usedSelectors].map(() => '?').join(',')})
384
+ GROUP BY selector
385
+ HAVING SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) = 0 AND COUNT(*) > 3
386
+ `).all(projectId, ...usedSelectors).length;
387
+ if (stableCount > 0) {
388
+ insights.push({
389
+ type: 'stable-selectors',
390
+ count: stableCount,
391
+ message: `${stableCount} selector(s) confirmed stable across multiple runs.`,
392
+ });
393
+ }
394
+ }
395
+ }
396
+
397
+ // Fallback: if still no insights, report basic run stats
398
+ if (insights.length === 0) {
399
+ const passed = report.results.filter(r => r.success).length;
400
+ insights.push({
401
+ type: 'run-summary',
402
+ message: `${passed}/${report.results.length} tests passed (${thisRunPassRate}%).`,
403
+ });
404
+ }
405
+ }
406
+
296
407
  return insights;
297
408
  }
298
409
 
410
+ /**
411
+ * Compact health snapshot for a project — used by CLI, MCP, and Dashboard.
412
+ * Returns null if no historical data exists.
413
+ */
414
+ export function getHealthSnapshot(projectId) {
415
+ const summary = getLearningsSummary(projectId);
416
+ if (!summary || summary.totalRuns === 0) return null;
417
+
418
+ const flakyCount = summary.flakyTests ? summary.flakyTests.length : 0;
419
+ const unstableSelectorCount = summary.unstableSelectors ? summary.unstableSelectors.length : 0;
420
+ const topError = summary.topErrors && summary.topErrors.length > 0
421
+ ? { pattern: summary.topErrors[0].pattern, count: summary.topErrors[0].occurrence_count, category: summary.topErrors[0].category }
422
+ : null;
423
+
424
+ // Compute trend from recent daily data
425
+ let passRateTrend = 'stable'; // 'improving', 'declining', 'stable'
426
+ let trendDelta = 0;
427
+
428
+ const trends = getTestTrends(projectId, 7);
429
+ const trendData = trends?.data || trends || [];
430
+ if (Array.isArray(trendData) && trendData.length >= 2) {
431
+ const recent = trendData[trendData.length - 1].pass_rate;
432
+ const prior = trendData.slice(0, -1).reduce((s, t) => s + t.pass_rate, 0) / (trendData.length - 1);
433
+ trendDelta = Math.round((recent - prior) * 10) / 10;
434
+ if (trendDelta > 2) passRateTrend = 'improving';
435
+ else if (trendDelta < -2) passRateTrend = 'declining';
436
+ }
437
+
438
+ return {
439
+ passRate: summary.overallPassRate,
440
+ passRateTrend,
441
+ trendDelta,
442
+ flakyCount,
443
+ unstableSelectorCount,
444
+ topErrorPattern: topError,
445
+ totalRuns: summary.totalRuns,
446
+ totalTests: summary.totalTests,
447
+ };
448
+ }
449
+
299
450
  /** Drill-down: history for a specific test. */
300
451
  export function getTestHistory(projectId, testName, days = 30) {
301
452
  const d = getDb();
@@ -352,3 +503,310 @@ export function getSelectorHistory(projectId, selector, days = 30) {
352
503
  ORDER BY created_at DESC
353
504
  `).all(projectId, selector, days);
354
505
  }
506
+
507
+ /**
508
+ * Aggregated context for test authoring — curates the most actionable learnings
509
+ * into a compact object that AI agents can use to write better tests.
510
+ */
511
+ /**
512
+ * Action health scores — composite per-action metrics aggregated by (action_type, selector).
513
+ * Score = (success_rate * 0.5) + (speed_score * 0.3) + (collateral_score * 0.2)
514
+ */
515
+ export function getActionHealthScores(projectId, days = 30) {
516
+ const d = getDb();
517
+ const rows = d.prepare(`
518
+ SELECT
519
+ action_type,
520
+ selector,
521
+ page_url,
522
+ COUNT(*) AS total_uses,
523
+ ROUND(AVG(CASE WHEN success = 1 THEN 100.0 ELSE 0.0 END), 1) AS success_rate,
524
+ ROUND(AVG(duration_ms)) AS avg_duration_ms,
525
+ MAX(duration_ms) AS max_duration_ms,
526
+ ROUND(AVG(console_errors_after + network_errors_after), 1) AS avg_collateral_errors,
527
+ COUNT(DISTINCT test_name) AS used_by_tests
528
+ FROM action_health
529
+ WHERE project_id = ? AND created_at >= datetime('now', '-' || ? || ' days')
530
+ GROUP BY action_type, selector
531
+ HAVING total_uses >= 2
532
+ ORDER BY success_rate ASC, total_uses DESC
533
+ `).all(projectId, days);
534
+
535
+ return rows.map(r => {
536
+ const speedScore = 100 - Math.min(100, ((r.avg_duration_ms || 0) / 5000) * 100);
537
+ const collateralScore = 100 - Math.min(100, (r.avg_collateral_errors || 0) * 20);
538
+ const healthScore = Math.round(r.success_rate * 0.5 + speedScore * 0.3 + collateralScore * 0.2);
539
+ return {
540
+ actionType: r.action_type,
541
+ selector: r.selector,
542
+ pageUrl: r.page_url,
543
+ totalUses: r.total_uses,
544
+ successRate: r.success_rate,
545
+ avgDurationMs: r.avg_duration_ms,
546
+ maxDurationMs: r.max_duration_ms,
547
+ avgCollateralErrors: r.avg_collateral_errors,
548
+ usedByTests: r.used_by_tests,
549
+ healthScore,
550
+ };
551
+ });
552
+ }
553
+
554
+ export function getTestCreationContext(projectId) {
555
+ const d = getDb();
556
+ const ctx = {};
557
+
558
+ // Top 5 unstable selectors (>20% fail rate)
559
+ const unstable = d.prepare(`
560
+ SELECT
561
+ selector,
562
+ ROUND(AVG(CASE WHEN success = 0 THEN 100.0 ELSE 0.0 END), 1) AS fail_rate,
563
+ MAX(CASE WHEN success = 0 THEN error END) AS last_error,
564
+ COUNT(*) AS total_uses
565
+ FROM selector_learnings
566
+ WHERE project_id = ? AND created_at >= datetime('now', '-30 days')
567
+ GROUP BY selector
568
+ HAVING fail_rate > 20
569
+ ORDER BY fail_rate DESC
570
+ LIMIT 5
571
+ `).all(projectId);
572
+
573
+ if (unstable.length > 0) {
574
+ ctx.unstableSelectors = unstable.map(s => ({
575
+ selector: s.selector,
576
+ failRate: s.fail_rate,
577
+ lastError: s.last_error,
578
+ suggestion: suggestSelectorFix(s.selector),
579
+ }));
580
+ }
581
+
582
+ // Top 10 stable selectors (0% fail rate, >5 uses)
583
+ const stable = d.prepare(`
584
+ SELECT
585
+ selector,
586
+ COUNT(*) AS total_uses,
587
+ COUNT(DISTINCT test_name) AS used_by_tests
588
+ FROM selector_learnings
589
+ WHERE project_id = ? AND created_at >= datetime('now', '-30 days')
590
+ GROUP BY selector
591
+ HAVING total_uses > 5 AND SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) = 0
592
+ ORDER BY total_uses DESC
593
+ LIMIT 10
594
+ `).all(projectId);
595
+
596
+ if (stable.length > 0) {
597
+ ctx.stableSelectors = stable.map(s => ({
598
+ selector: s.selector,
599
+ uses: s.total_uses,
600
+ tests: s.used_by_tests,
601
+ }));
602
+ }
603
+
604
+ // Top 5 error patterns
605
+ const errors = d.prepare(`
606
+ SELECT pattern, category, occurrence_count
607
+ FROM error_patterns
608
+ WHERE project_id = ?
609
+ ORDER BY occurrence_count DESC
610
+ LIMIT 5
611
+ `).all(projectId);
612
+
613
+ if (errors.length > 0) {
614
+ ctx.errorPatterns = errors.map(e => ({
615
+ pattern: e.pattern,
616
+ category: e.category,
617
+ count: e.occurrence_count,
618
+ }));
619
+ }
620
+
621
+ // Slow pages (avg load > 3s)
622
+ const slowPages = d.prepare(`
623
+ SELECT
624
+ url_path,
625
+ ROUND(AVG(load_time_ms)) AS avg_load_ms
626
+ FROM page_learnings
627
+ WHERE project_id = ? AND created_at >= datetime('now', '-30 days')
628
+ GROUP BY url_path
629
+ HAVING avg_load_ms > 3000
630
+ ORDER BY avg_load_ms DESC
631
+ LIMIT 5
632
+ `).all(projectId);
633
+
634
+ if (slowPages.length > 0) {
635
+ ctx.slowPages = slowPages.map(p => ({
636
+ page: p.url_path,
637
+ avgLoadMs: p.avg_load_ms,
638
+ }));
639
+ }
640
+
641
+ // Flaky tests
642
+ const flaky = d.prepare(`
643
+ SELECT test_name, SUM(flaky) AS flaky_count, COUNT(*) AS total_runs
644
+ FROM test_learnings
645
+ WHERE project_id = ? AND created_at >= datetime('now', '-30 days')
646
+ GROUP BY test_name
647
+ HAVING flaky_count > 0
648
+ ORDER BY flaky_count DESC
649
+ LIMIT 5
650
+ `).all(projectId);
651
+
652
+ if (flaky.length > 0) {
653
+ ctx.flakyTests = flaky.map(f => ({
654
+ name: f.test_name,
655
+ flakyCount: f.flaky_count,
656
+ totalRuns: f.total_runs,
657
+ }));
658
+ }
659
+
660
+ // API endpoints with >10% error rate
661
+ const apiIssues = d.prepare(`
662
+ SELECT
663
+ endpoint,
664
+ ROUND(AVG(CASE WHEN is_error = 1 THEN 100.0 ELSE 0.0 END), 1) AS error_rate,
665
+ COUNT(*) AS total_calls
666
+ FROM api_learnings
667
+ WHERE project_id = ? AND created_at >= datetime('now', '-30 days')
668
+ GROUP BY endpoint
669
+ HAVING error_rate > 10
670
+ ORDER BY error_rate DESC
671
+ LIMIT 5
672
+ `).all(projectId);
673
+
674
+ if (apiIssues.length > 0) {
675
+ ctx.apiIssues = apiIssues.map(a => ({
676
+ endpoint: a.endpoint,
677
+ errorRate: a.error_rate,
678
+ totalCalls: a.total_calls,
679
+ }));
680
+ }
681
+
682
+ // Overall pass rate
683
+ const stats = d.prepare(`
684
+ SELECT
685
+ COUNT(*) AS total_tests,
686
+ ROUND(AVG(CASE WHEN success = 1 THEN 100.0 ELSE 0.0 END), 1) AS pass_rate
687
+ FROM test_learnings
688
+ WHERE project_id = ? AND created_at >= datetime('now', '-30 days')
689
+ `).get(projectId);
690
+
691
+ if (stats && stats.total_tests > 0) {
692
+ ctx.passRate = stats.pass_rate;
693
+ }
694
+
695
+ return Object.keys(ctx).length > 0 ? ctx : null;
696
+ }
697
+
698
+ /** Suggest a fix for an unstable selector based on its pattern. */
699
+ function suggestSelectorFix(selector) {
700
+ if (/^\.Mui|^\.css-|^\.sc-/.test(selector)) return 'Prefer [data-testid] or click by text — generated class names are brittle';
701
+ if (/\s>\s/.test(selector) && selector.split('>').length > 3) return 'Deeply nested selector — simplify or use [data-testid]';
702
+ if (/nth-child|nth-of-type/.test(selector)) return 'Positional selector — prefer [data-testid] or text-based selection';
703
+ return 'Consider using [data-testid] or a more stable selector';
704
+ }
705
+
706
+ /**
707
+ * Cross-reference a run report with historical learnings to produce actionable
708
+ * improvement suggestions for the AI agent.
709
+ */
710
+ export function generateImprovements(projectId, report) {
711
+ const d = getDb();
712
+ const improvements = [];
713
+
714
+ if (!report?.results) return improvements;
715
+
716
+ // Build a map of stable alternatives for unstable selectors
717
+ const stableAlts = d.prepare(`
718
+ SELECT selector, COUNT(*) AS uses
719
+ FROM selector_learnings
720
+ WHERE project_id = ? AND created_at >= datetime('now', '-30 days')
721
+ GROUP BY selector
722
+ HAVING uses > 3 AND SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) = 0
723
+ ORDER BY uses DESC
724
+ `).all(projectId);
725
+
726
+ const stableSet = new Set(stableAlts.map(s => s.selector));
727
+
728
+ // Unstable selectors with their fail rates
729
+ const unstableMap = new Map();
730
+ const unstableRows = d.prepare(`
731
+ SELECT
732
+ selector,
733
+ ROUND(AVG(CASE WHEN success = 0 THEN 100.0 ELSE 0.0 END), 1) AS fail_rate
734
+ FROM selector_learnings
735
+ WHERE project_id = ? AND created_at >= datetime('now', '-30 days')
736
+ GROUP BY selector
737
+ HAVING fail_rate > 20
738
+ `).all(projectId);
739
+ for (const row of unstableRows) unstableMap.set(row.selector, row.fail_rate);
740
+
741
+ // Flaky test counts
742
+ const flakyMap = new Map();
743
+ const flakyRows = d.prepare(`
744
+ SELECT test_name, SUM(flaky) AS flaky_count
745
+ FROM test_learnings
746
+ WHERE project_id = ? AND created_at >= datetime('now', '-30 days')
747
+ GROUP BY test_name
748
+ HAVING flaky_count > 0
749
+ `).all(projectId);
750
+ for (const row of flakyRows) flakyMap.set(row.test_name, row.flaky_count);
751
+
752
+ for (const result of report.results) {
753
+ // Failed selector suggestions — find stable alternatives on the same page
754
+ if (!result.success && result.error) {
755
+ const selectorMatch = result.error.match(/selector ["']([^"']+)["']/i)
756
+ || result.error.match(/waiting for selector (.+)/i);
757
+ if (selectorMatch) {
758
+ const failedSelector = selectorMatch[1];
759
+ const failRate = unstableMap.get(failedSelector);
760
+ if (failRate) {
761
+ improvements.push({
762
+ type: 'unstable-selector',
763
+ test: result.name,
764
+ message: `Selector \`${failedSelector}\` failed (${failRate}% historical fail rate) → ${suggestSelectorFix(failedSelector)}`,
765
+ });
766
+ }
767
+ }
768
+
769
+ // Timeout suggestions
770
+ if (/timeout|timed?\s*out/i.test(result.error)) {
771
+ improvements.push({
772
+ type: 'timeout',
773
+ test: result.name,
774
+ message: `Test "${result.name}" timed out → add explicit { type: "wait", text: "..." } or increase timeout`,
775
+ });
776
+ }
777
+ }
778
+
779
+ // Check for tests using known unstable selectors (even if they passed this time)
780
+ if (result.actions) {
781
+ for (const action of result.actions) {
782
+ if (action.selector && unstableMap.has(action.selector)) {
783
+ const failRate = unstableMap.get(action.selector);
784
+ improvements.push({
785
+ type: 'at-risk-selector',
786
+ test: result.name,
787
+ message: `Selector \`${action.selector}\` has ${failRate}% fail rate → ${suggestSelectorFix(action.selector)}`,
788
+ });
789
+ }
790
+ }
791
+ }
792
+
793
+ // Flaky test suggestions
794
+ const flakyCount = flakyMap.get(result.name);
795
+ if (flakyCount && flakyCount >= 2) {
796
+ improvements.push({
797
+ type: 'flaky',
798
+ test: result.name,
799
+ message: `Test "${result.name}" is flaky (${flakyCount} flaky runs) → add { retries: 2 } to the test config`,
800
+ });
801
+ }
802
+ }
803
+
804
+ // Deduplicate by type+test (keep first occurrence)
805
+ const seen = new Set();
806
+ return improvements.filter(imp => {
807
+ const key = `${imp.type}:${imp.test}:${imp.message.slice(0, 60)}`;
808
+ if (seen.has(key)) return false;
809
+ seen.add(key);
810
+ return true;
811
+ });
812
+ }