@matware/e2e-runner 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/.claude-plugin/marketplace.json +37 -6
  2. package/.claude-plugin/plugin.json +17 -3
  3. package/LICENSE +190 -0
  4. package/README.md +151 -527
  5. package/agents/test-creator.md +4 -2
  6. package/agents/test-improver.md +5 -3
  7. package/bin/cli.js +84 -20
  8. package/commands/capture.md +45 -0
  9. package/package.json +3 -2
  10. package/skills/e2e-testing/SKILL.md +3 -2
  11. package/skills/e2e-testing/references/action-types.md +22 -4
  12. package/skills/e2e-testing/references/test-json-format.md +23 -0
  13. package/src/actions.js +321 -14
  14. package/src/ai-generate.js +81 -0
  15. package/src/app-pool.js +339 -0
  16. package/src/config.js +131 -7
  17. package/src/dashboard.js +209 -11
  18. package/src/db.js +74 -7
  19. package/src/index.js +6 -4
  20. package/src/learner-sqlite.js +154 -0
  21. package/src/learner.js +70 -3
  22. package/src/mcp-tools.js +259 -34
  23. package/src/module-analysis.js +247 -0
  24. package/src/module-resolver.js +35 -2
  25. package/src/narrate.js +42 -1
  26. package/src/pool-manager.js +68 -17
  27. package/src/pool.js +464 -37
  28. package/src/reporter.js +4 -1
  29. package/src/runner.js +410 -63
  30. package/src/visual-diff.js +515 -0
  31. package/src/websocket.js +14 -3
  32. package/src/wizard.js +184 -0
  33. package/templates/build-dashboard.js +3 -0
  34. package/templates/dashboard/js/api.js +62 -3
  35. package/templates/dashboard/js/init.js +46 -0
  36. package/templates/dashboard/js/keyboard.js +8 -7
  37. package/templates/dashboard/js/quicksearch.js +277 -0
  38. package/templates/dashboard/js/state.js +61 -7
  39. package/templates/dashboard/js/toast.js +1 -1
  40. package/templates/dashboard/js/utils.js +20 -0
  41. package/templates/dashboard/js/view-live.js +240 -9
  42. package/templates/dashboard/js/view-runs.js +540 -94
  43. package/templates/dashboard/js/view-tests.js +157 -16
  44. package/templates/dashboard/js/view-tools.js +234 -0
  45. package/templates/dashboard/js/view-watch.js +2 -2
  46. package/templates/dashboard/js/websocket.js +36 -0
  47. package/templates/dashboard/styles/base.css +489 -53
  48. package/templates/dashboard/styles/components.css +719 -77
  49. package/templates/dashboard/styles/view-live.css +463 -59
  50. package/templates/dashboard/styles/view-runs.css +793 -155
  51. package/templates/dashboard/styles/view-tests.css +440 -77
  52. package/templates/dashboard/styles/view-tools.css +206 -0
  53. package/templates/dashboard/styles/view-watch.css +198 -41
  54. package/templates/dashboard/template.html +369 -56
  55. package/templates/dashboard.html +5375 -901
  56. package/templates/docker-compose-lightpanda.yml +7 -0
@@ -293,6 +293,117 @@ export function getRunInsights(projectId, report) {
293
293
  }
294
294
  }
295
295
 
296
+ // ── At-Least-One Guarantee: generate positive insights if none exist ──
297
+ if (insights.length === 0 && report.results.length > 0) {
298
+ const allPassed = report.results.every(r => r.success);
299
+
300
+ // Green streak detection
301
+ if (allPassed) {
302
+ const recentRuns = d.prepare(`
303
+ SELECT run_id, MIN(success) AS all_passed
304
+ FROM test_learnings
305
+ WHERE project_id = ?
306
+ GROUP BY run_id
307
+ ORDER BY created_at DESC
308
+ LIMIT 10
309
+ `).all(projectId);
310
+ const streak = recentRuns.findIndex(r => r.all_passed === 0);
311
+ const streakCount = streak === -1 ? recentRuns.length : streak;
312
+ if (streakCount >= 3) {
313
+ insights.push({
314
+ type: 'green-streak',
315
+ streak: streakCount,
316
+ message: `${streakCount}-run green streak — suite is stable.`,
317
+ });
318
+ }
319
+ }
320
+
321
+ // New tests (no historical data)
322
+ const newTests = report.results.filter(r => {
323
+ const h = d.prepare('SELECT COUNT(*) AS c FROM test_learnings WHERE project_id = ? AND test_name = ?').get(projectId, r.name);
324
+ return !h || h.c <= 1; // <= 1 because current run may already be written
325
+ });
326
+ if (newTests.length > 0) {
327
+ insights.push({
328
+ type: 'new-tests',
329
+ tests: newTests.map(t => t.name),
330
+ message: `${newTests.length} new test(s): ${newTests.map(t => t.name).slice(0, 3).join(', ')}${newTests.length > 3 ? '...' : ''}`,
331
+ });
332
+ }
333
+
334
+ // Pass rate improvement vs 7-day average
335
+ const avg7d = d.prepare(`
336
+ SELECT ROUND(AVG(CASE WHEN success = 1 THEN 100.0 ELSE 0.0 END), 1) AS pass_rate
337
+ FROM test_learnings
338
+ WHERE project_id = ? AND created_at >= datetime('now', '-7 days')
339
+ `).get(projectId);
340
+ const thisRunPassRate = Math.round((report.results.filter(r => r.success).length / report.results.length) * 1000) / 10;
341
+ if (avg7d?.pass_rate && thisRunPassRate > avg7d.pass_rate + 5) {
342
+ insights.push({
343
+ type: 'improved-pass-rate',
344
+ message: `Pass rate improved: ${thisRunPassRate}% this run vs ${avg7d.pass_rate}% 7-day average.`,
345
+ });
346
+ }
347
+
348
+ // Performance comparison
349
+ const avgDuration = d.prepare(`
350
+ SELECT ROUND(AVG(duration_ms)) AS avg_ms
351
+ FROM test_learnings
352
+ WHERE project_id = ? AND duration_ms IS NOT NULL AND created_at >= datetime('now', '-30 days')
353
+ `).get(projectId);
354
+ if (avgDuration?.avg_ms && report.results.length > 0) {
355
+ const thisAvg = report.results.reduce((s, r) => {
356
+ const ms = (r.endTime && r.startTime) ? new Date(r.endTime) - new Date(r.startTime) : 0;
357
+ return s + ms;
358
+ }, 0) / report.results.length;
359
+ const delta = Math.round(((thisAvg - avgDuration.avg_ms) / avgDuration.avg_ms) * 100);
360
+ if (Math.abs(delta) > 15) {
361
+ insights.push({
362
+ type: 'performance',
363
+ message: delta < 0
364
+ ? `This run was ${Math.abs(delta)}% faster than the 30-day average.`
365
+ : `This run was ${delta}% slower than the 30-day average — check for new slow pages.`,
366
+ });
367
+ }
368
+ }
369
+
370
+ // Stable selectors confirmed
371
+ if (allPassed) {
372
+ const usedSelectors = new Set();
373
+ for (const r of report.results) {
374
+ if (!r.actions) continue;
375
+ for (const a of r.actions) {
376
+ if (a.selector) usedSelectors.add(a.selector);
377
+ }
378
+ }
379
+ if (usedSelectors.size > 0) {
380
+ const stableCount = d.prepare(`
381
+ SELECT COUNT(DISTINCT selector) AS c
382
+ FROM selector_learnings
383
+ WHERE project_id = ? AND selector IN (${[...usedSelectors].map(() => '?').join(',')})
384
+ GROUP BY selector
385
+ HAVING SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) = 0 AND COUNT(*) > 3
386
+ `).all(projectId, ...usedSelectors).length;
387
+ if (stableCount > 0) {
388
+ insights.push({
389
+ type: 'stable-selectors',
390
+ count: stableCount,
391
+ message: `${stableCount} selector(s) confirmed stable across multiple runs.`,
392
+ });
393
+ }
394
+ }
395
+ }
396
+
397
+ // Fallback: if still no insights, report basic run stats
398
+ if (insights.length === 0) {
399
+ const passed = report.results.filter(r => r.success).length;
400
+ insights.push({
401
+ type: 'run-summary',
402
+ message: `${passed}/${report.results.length} tests passed (${thisRunPassRate}%).`,
403
+ });
404
+ }
405
+ }
406
+
296
407
  return insights;
297
408
  }
298
409
 
@@ -397,6 +508,49 @@ export function getSelectorHistory(projectId, selector, days = 30) {
397
508
  * Aggregated context for test authoring — curates the most actionable learnings
398
509
  * into a compact object that AI agents can use to write better tests.
399
510
  */
511
+ /**
512
+ * Action health scores — composite per-action metrics aggregated by (action_type, selector).
513
+ * Score = (success_rate * 0.5) + (speed_score * 0.3) + (collateral_score * 0.2)
514
+ */
515
+ export function getActionHealthScores(projectId, days = 30) {
516
+ const d = getDb();
517
+ const rows = d.prepare(`
518
+ SELECT
519
+ action_type,
520
+ selector,
521
+ page_url,
522
+ COUNT(*) AS total_uses,
523
+ ROUND(AVG(CASE WHEN success = 1 THEN 100.0 ELSE 0.0 END), 1) AS success_rate,
524
+ ROUND(AVG(duration_ms)) AS avg_duration_ms,
525
+ MAX(duration_ms) AS max_duration_ms,
526
+ ROUND(AVG(console_errors_after + network_errors_after), 1) AS avg_collateral_errors,
527
+ COUNT(DISTINCT test_name) AS used_by_tests
528
+ FROM action_health
529
+ WHERE project_id = ? AND created_at >= datetime('now', '-' || ? || ' days')
530
+ GROUP BY action_type, selector
531
+ HAVING total_uses >= 2
532
+ ORDER BY success_rate ASC, total_uses DESC
533
+ `).all(projectId, days);
534
+
535
+ return rows.map(r => {
536
+ const speedScore = 100 - Math.min(100, ((r.avg_duration_ms || 0) / 5000) * 100);
537
+ const collateralScore = 100 - Math.min(100, (r.avg_collateral_errors || 0) * 20);
538
+ const healthScore = Math.round(r.success_rate * 0.5 + speedScore * 0.3 + collateralScore * 0.2);
539
+ return {
540
+ actionType: r.action_type,
541
+ selector: r.selector,
542
+ pageUrl: r.page_url,
543
+ totalUses: r.total_uses,
544
+ successRate: r.success_rate,
545
+ avgDurationMs: r.avg_duration_ms,
546
+ maxDurationMs: r.max_duration_ms,
547
+ avgCollateralErrors: r.avg_collateral_errors,
548
+ usedByTests: r.used_by_tests,
549
+ healthScore,
550
+ };
551
+ });
552
+ }
553
+
400
554
  export function getTestCreationContext(projectId) {
401
555
  const d = getDb();
402
556
  const ctx = {};
package/src/learner.js CHANGED
@@ -17,8 +17,12 @@ const ERROR_CATEGORIES = [
17
17
  { pattern: /waitForSelector/i, category: 'selector-not-found' },
18
18
  { pattern: /not visible/i, category: 'selector-not-found' },
19
19
  { pattern: /navigation/i, category: 'navigation-error' },
20
- { pattern: /net::ERR_/i, category: 'connection-refused' },
20
+ { pattern: /ERR_NAME_NOT_RESOLVED/i, category: 'dns-resolution' },
21
21
  { pattern: /ERR_CONNECTION_REFUSED/i, category: 'connection-refused' },
22
+ { pattern: /ECONNREFUSED/i, category: 'connection-refused' },
23
+ { pattern: /Chrome Pool unavailable/i, category: 'pool-unavailable' },
24
+ { pattern: /Failed to connect to pool/i, category: 'pool-connect-failed' },
25
+ { pattern: /net::ERR_/i, category: 'network-error' },
22
26
  { pattern: /assert_text/i, category: 'assert-text-failed' },
23
27
  { pattern: /assert_url/i, category: 'assert-url-failed' },
24
28
  { pattern: /assert_visible/i, category: 'assert-visible-failed' },
@@ -35,6 +39,18 @@ const ERROR_CATEGORIES = [
35
39
  { pattern: /evaluate.*ERROR/i, category: 'evaluate-error' },
36
40
  ];
37
41
 
42
+ /** Categories that indicate infrastructure failures — not test/app issues. */
43
+ export const INFRA_CATEGORIES = new Set([
44
+ 'connection-refused', 'dns-resolution', 'pool-unavailable', 'pool-connect-failed', 'network-error',
45
+ ]);
46
+
47
+ /** Returns true if the error is an infrastructure issue (pool down, DNS, connection refused). */
48
+ export function isInfraError(errorMsg) {
49
+ if (!errorMsg) return false;
50
+ const { category } = categorizeError(errorMsg);
51
+ return INFRA_CATEGORIES.has(category);
52
+ }
53
+
38
54
  export function categorizeError(errorMsg) {
39
55
  if (!errorMsg) return { category: 'unknown', pattern: 'unknown' };
40
56
 
@@ -204,6 +220,11 @@ export function learnFromRun(projectId, runDbId, report, config, suiteName) {
204
220
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
205
221
  `);
206
222
 
223
+ const insertActionHealth = d.prepare(`
224
+ INSERT INTO action_health (project_id, run_id, test_name, action_index, action_type, selector, success, duration_ms, console_errors_after, network_errors_after, page_url)
225
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
226
+ `);
227
+
207
228
  const upsertErrorPattern = d.prepare(`
208
229
  INSERT INTO error_patterns (project_id, pattern, category, occurrence_count, first_seen, last_seen, example_error, example_test)
209
230
  VALUES (?, ?, ?, 1, datetime('now'), datetime('now'), ?, ?)
@@ -214,23 +235,40 @@ export function learnFromRun(projectId, runDbId, report, config, suiteName) {
214
235
  example_test = excluded.example_test
215
236
  `);
216
237
 
238
+ let infraCount = 0;
239
+
217
240
  const tx = d.transaction(() => {
218
241
  for (const result of results) {
219
242
  const durationMs = (result.endTime && result.startTime)
220
243
  ? new Date(result.endTime) - new Date(result.startTime)
221
244
  : null;
222
- const isFlaky = result.success && (result.attempt || 1) > 1 ? 1 : 0;
245
+ const isFlaky = result.flaky ? 1 : (result.success && (result.attempt || 1) > 1 ? 1 : 0);
223
246
 
224
247
  // Categorize error
225
248
  let errorPattern = null;
249
+ let infraFailure = false;
226
250
  if (result.error) {
227
251
  const { category, pattern } = categorizeError(result.error);
228
252
  errorPattern = category;
253
+ infraFailure = INFRA_CATEGORIES.has(category);
229
254
 
230
- // Track error pattern
255
+ // Always track error patterns (even infra) for awareness
231
256
  upsertErrorPattern.run(projectId, pattern, category, result.error, result.name);
232
257
  }
233
258
 
259
+ if (infraFailure) {
260
+ infraCount++;
261
+ // Still write test_learnings so run counts are accurate,
262
+ // but skip selector/page/api learnings to avoid polluting metrics
263
+ insertTestLearning.run(
264
+ projectId, runDbId, result.name,
265
+ result.success ? 1 : 0, durationMs, isFlaky,
266
+ result.attempt || 1, result.maxAttempts || 1,
267
+ errorPattern
268
+ );
269
+ continue;
270
+ }
271
+
234
272
  // Test-level learning
235
273
  insertTestLearning.run(
236
274
  projectId, runDbId, result.name,
@@ -275,6 +313,33 @@ export function learnFromRun(projectId, runDbId, report, config, suiteName) {
275
313
  api.isError, result.name
276
314
  );
277
315
  }
316
+
317
+ // Action health — per-action metrics with collateral error estimation
318
+ if (result.actions?.length) {
319
+ const totalConsoleErrors = (result.consoleLogs || []).filter(l => l.type === 'error').length;
320
+ const totalNetworkErrors = (result.networkErrors || []).length;
321
+ const actionCount = result.actions.length;
322
+ let currentPage = '/';
323
+
324
+ for (let i = 0; i < actionCount; i++) {
325
+ const action = result.actions[i];
326
+ if (action.type === 'goto' || action.type === 'navigate') {
327
+ try { currentPage = new URL(action.value, 'http://placeholder').pathname; } catch { currentPage = action.value || '/'; }
328
+ }
329
+ // Estimate collateral errors: later actions inherit more errors (weighted distribution)
330
+ const weight = (i + 1) / actionCount;
331
+ const consoleAfter = action.success === false ? Math.round(totalConsoleErrors * weight) : 0;
332
+ const networkAfter = action.success === false ? Math.round(totalNetworkErrors * weight) : 0;
333
+
334
+ insertActionHealth.run(
335
+ projectId, runDbId, result.name, i,
336
+ action.type || 'unknown', action.selector || null,
337
+ action.success === false ? 0 : 1,
338
+ action.duration || null,
339
+ consoleAfter, networkAfter, currentPage
340
+ );
341
+ }
342
+ }
278
343
  }
279
344
  });
280
345
 
@@ -287,6 +352,8 @@ export function learnFromRun(projectId, runDbId, report, config, suiteName) {
287
352
  if (config?.learningsNeo4j) {
288
353
  writeToGraph(projectId, runDbId, report, config, suiteName).catch(() => {});
289
354
  }
355
+
356
+ return { infraCount };
290
357
  }
291
358
 
292
359
  // ── Summary cache ─────────────────────────────────────────────────────────────