a2acalling 0.6.60 → 0.6.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,879 @@
1
+ # A2A-42: E2E Test Persistence & Dashboard Integration
2
+
3
+ > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
4
+
5
+ **Goal:** Add local result persistence, regression detection, a cron-ready wrapper, and a dashboard Health tab so E2E test results are stored, compared, and visible without external dependencies.
6
+
7
+ **Architecture:** A new `test/e2e/persist.js` module handles writing timestamped JSON results to `~/.config/openclaw/test-results/`, reading history, and detecting regressions. The existing orchestrator gets a `--persist` flag that calls persist after each run. A shell script wraps orchestrate for cron. The dashboard gets a new Health tab backed by a single `GET /dashboard/api/test-results` endpoint that reads from the persist layer.
8
+
9
+ **Tech Stack:** Node.js builtins only (fs, path). No new npm dependencies. Shoelace web components for dashboard UI.
10
+
11
+ **Linear Ticket:** A2A-42
12
+
13
+ ---
14
+
15
+ ## Conventions Reminder
16
+
17
+ - Logger: `const { createLogger } = require('./logger'); const logger = createLogger({ component: 'a2a.test' });`
18
+ - No `console.log` in production code (test orchestrator already uses stderr — that's fine)
19
+ - CommonJS modules (`require`/`module.exports`)
20
+ - Config dir: use the same resolution as `src/lib/config.js` — `process.env.A2A_CONFIG_DIR || process.env.OPENCLAW_CONFIG_DIR || ~/.config/openclaw/`
21
+ - File naming: kebab-case
22
+ - Test files: `*.test.js` in `test/unit/`
23
+
24
+ ---
25
+
26
+ ### Task 1: Create `test/e2e/persist.js` — Result Persistence Module
27
+
28
+ **Files:**
29
+ - Create: `test/e2e/persist.js`
30
+
31
+ **Step 1: Write the persist module**
32
+
33
+ This module provides four functions: `saveResult`, `getLatest`, `getHistory`, `detectRegression`. It stores results as timestamped JSON files in `<configDir>/test-results/` with a `latest.json` that is a regular file copy (not symlink — avoids cross-platform issues).
34
+
35
+ ```js
36
+ /**
37
+ * E2E Test Result Persistence
38
+ *
39
+ * Stores test results as timestamped JSON in ~/.config/openclaw/test-results/.
40
+ * Provides history retrieval and regression detection.
41
+ *
42
+ * A2A-42: Local-first result storage — no external dependencies.
43
+ */
44
+
45
+ const fs = require('fs');
46
+ const path = require('path');
47
+
48
+ // A2A-42: Default config dir matches src/lib/config.js resolution.
49
+ // Accept configDir parameter for testability (reviewer feedback: module-level
50
+ // constants prevent testing the null-path without subprocess gymnastics).
51
+ const DEFAULT_CONFIG_DIR = process.env.A2A_CONFIG_DIR ||
52
+ process.env.OPENCLAW_CONFIG_DIR ||
53
+ path.join(process.env.HOME || '/tmp', '.config', 'openclaw');
54
+
55
+ const MAX_HISTORY = 20;
56
+
57
+ function resolveDir(configDir) {
58
+ const base = configDir || DEFAULT_CONFIG_DIR;
59
+ return {
60
+ resultsDir: path.join(base, 'test-results'),
61
+ latestFile: path.join(base, 'test-results', 'latest.json')
62
+ };
63
+ }
64
+
65
+ // Module-level defaults for callers that don't pass configDir
66
+ const RESULTS_DIR = resolveDir().resultsDir;
67
+ const LATEST_FILE = resolveDir().latestFile;
68
+
69
+ function ensureDir(dir) {
70
+ if (!fs.existsSync(dir)) {
71
+ fs.mkdirSync(dir, { recursive: true });
72
+ }
73
+ }
74
+
75
+ /**
76
+ * Save a test report result to disk.
77
+ * Writes a timestamped file and updates latest.json.
78
+ * Prunes history beyond MAX_HISTORY entries.
79
+ *
80
+ * @param {object} report - Output from TestReport.toJSON()
81
+ * @returns {{ file: string, latest: string, regression: object }}
82
+ */
83
+ function saveResult(report, options = {}) {
84
+ const { resultsDir, latestFile } = resolveDir(options.configDir);
85
+ ensureDir(resultsDir);
86
+
87
+ const ts = new Date().toISOString().replace(/[:.]/g, '-');
88
+ const filename = `result-${ts}.json`;
89
+ const filepath = path.join(resultsDir, filename);
90
+
91
+ // A2A-42: Detect regression before writing, so we can include it in the saved result
92
+ const previous = getLatest(options);
93
+ const regression = previous ? detectRegression(report, previous) : {
94
+ detected: false,
95
+ newFailures: [],
96
+ fixedTests: []
97
+ };
98
+
99
+ const enriched = { ...report, regression };
100
+ const json = JSON.stringify(enriched, null, 2);
101
+
102
+ // A2A-42: Atomic write via tmp+rename — matches pattern from src/lib/config.js:290
103
+ // Prevents truncated reads if the server reads latest.json mid-write.
104
+ const tmpTimestamped = filepath + '.tmp';
105
+ fs.writeFileSync(tmpTimestamped, json);
106
+ fs.renameSync(tmpTimestamped, filepath);
107
+
108
+ const tmpLatest = latestFile + '.tmp';
109
+ fs.writeFileSync(tmpLatest, json);
110
+ fs.renameSync(tmpLatest, latestFile);
111
+
112
+ pruneHistory(options);
113
+
114
+ return { file: filepath, latest: latestFile, regression };
115
+ }
116
+
117
+ /**
118
+ * Read the most recent test result.
119
+ * @returns {object|null}
120
+ */
121
+ function getLatest(options = {}) {
122
+ const { latestFile } = resolveDir(options.configDir);
123
+ if (!fs.existsSync(latestFile)) return null;
124
+ try {
125
+ return JSON.parse(fs.readFileSync(latestFile, 'utf8'));
126
+ } catch {
127
+ return null;
128
+ }
129
+ }
130
+
131
+ /**
132
+ * Read the last N results, newest first.
133
+ * @param {number} [limit=20]
134
+ * @returns {object[]}
135
+ */
136
+ function getHistory(limit = MAX_HISTORY, options = {}) {
137
+ const { resultsDir } = resolveDir(options.configDir);
138
+ if (!fs.existsSync(resultsDir)) return [];
139
+
140
+ const files = fs.readdirSync(resultsDir)
141
+ .filter(f => f.startsWith('result-') && f.endsWith('.json'))
142
+ .sort()
143
+ .reverse()
144
+ .slice(0, Math.max(1, limit));
145
+
146
+ return files.map(f => {
147
+ try {
148
+ return JSON.parse(fs.readFileSync(path.join(resultsDir, f), 'utf8'));
149
+ } catch {
150
+ return null;
151
+ }
152
+ }).filter(Boolean);
153
+ }
154
+
155
+ /**
156
+ * Compare current vs previous result for regressions.
157
+ * A regression is a step that passed before but fails now.
158
+ * A fix is a step that failed before but passes now.
159
+ *
160
+ * @param {object} current - Current report JSON
161
+ * @param {object} previous - Previous report JSON
162
+ * @returns {{ detected: boolean, newFailures: string[], fixedTests: string[] }}
163
+ */
164
+ function detectRegression(current, previous) {
165
+ const prevSteps = new Map();
166
+ for (const step of (previous.steps || [])) {
167
+ prevSteps.set(step.name, step.status);
168
+ }
169
+
170
+ const newFailures = [];
171
+ const fixedTests = [];
172
+
173
+ for (const step of (current.steps || [])) {
174
+ const prevStatus = prevSteps.get(step.name);
175
+ if (!prevStatus) continue; // new step, not a regression
176
+ if (step.status === 'fail' && prevStatus === 'pass') {
177
+ newFailures.push(step.name);
178
+ }
179
+ if (step.status === 'pass' && prevStatus === 'fail') {
180
+ fixedTests.push(step.name);
181
+ }
182
+ }
183
+
184
+ return {
185
+ detected: newFailures.length > 0,
186
+ newFailures,
187
+ fixedTests
188
+ };
189
+ }
190
+
191
+ /**
192
+ * Remove old result files beyond MAX_HISTORY.
193
+ */
194
+ function pruneHistory(options = {}) {
195
+ const { resultsDir } = resolveDir(options.configDir);
196
+ if (!fs.existsSync(resultsDir)) return;
197
+
198
+ const files = fs.readdirSync(resultsDir)
199
+ .filter(f => f.startsWith('result-') && f.endsWith('.json'))
200
+ .sort();
201
+
202
+ while (files.length > MAX_HISTORY) {
203
+ const oldest = files.shift();
204
+ try {
205
+ fs.unlinkSync(path.join(resultsDir, oldest));
206
+ } catch {
207
+ // best effort
208
+ }
209
+ }
210
+ }
211
+
212
+ module.exports = {
213
+ saveResult,
214
+ getLatest,
215
+ getHistory,
216
+ detectRegression,
217
+ RESULTS_DIR,
218
+ LATEST_FILE,
219
+ MAX_HISTORY,
220
+ resolveDir
221
+ };
222
+ ```
223
+
224
+ **Step 2: Commit**
225
+
226
+ ```bash
227
+ git add test/e2e/persist.js
228
+ git commit -m "feat(A2A-42): add E2E result persistence module"
229
+ ```
230
+
231
+ ---
232
+
233
+ ### Task 2: Unit Tests for `persist.js`
234
+
235
+ **Files:**
236
+ - Create: `test/unit/persist.test.js`
237
+
238
+ **Step 1: Write the tests**
239
+
240
+ ```js
241
+ /**
242
+ * Unit tests for test/e2e/persist.js
243
+ *
244
+ * Uses configDir parameter for isolation — no env var hacks.
245
+ * Each test gets a fresh temp directory.
246
+ */
247
+
248
+ const fs = require('fs');
249
+ const path = require('path');
250
+ const os = require('os');
251
+
252
+ const { saveResult, getLatest, getHistory, detectRegression, resolveDir } = require('../e2e/persist');
253
+
254
+ function makeTmpDir() {
255
+ return fs.mkdtempSync(path.join(os.tmpdir(), 'persist-test-'));
256
+ }
257
+
258
+ function makeReport(overrides = {}) {
259
+ return {
260
+ name: 'E2E Orchestrator',
261
+ status: overrides.status || 'passed',
262
+ startedAt: Date.now() - 500,
263
+ finishedAt: Date.now(),
264
+ duration: 500,
265
+ summary: { passed: 8, failed: 0, skipped: 0, total: 8 },
266
+ steps: overrides.steps || [
267
+ { name: 'Create harness', status: 'pass', timestamp: Date.now() },
268
+ { name: 'Start servers', status: 'pass', timestamp: Date.now() },
269
+ { name: 'Ping both agents', status: 'pass', timestamp: Date.now() },
270
+ { name: 'Create tokens', status: 'pass', timestamp: Date.now() },
271
+ { name: 'Exchange invites', status: 'pass', timestamp: Date.now() },
272
+ { name: 'B calls A', status: 'pass', timestamp: Date.now() },
273
+ { name: 'A calls B', status: 'pass', timestamp: Date.now() },
274
+ { name: 'Verify response integrity', status: 'pass', timestamp: Date.now() }
275
+ ],
276
+ ...overrides
277
+ };
278
+ }
279
+
280
+ module.exports = ({ test, assert }) => {
281
+ test('saveResult creates results directory and files', () => {
282
+ const configDir = makeTmpDir();
283
+ const report = makeReport();
284
+ const result = saveResult(report, { configDir });
285
+
286
+ assert.ok(fs.existsSync(result.file), 'Should create timestamped file');
287
+ assert.ok(fs.existsSync(result.latest), 'Should create latest.json');
288
+
289
+ const saved = JSON.parse(fs.readFileSync(result.file, 'utf8'));
290
+ assert.equal(saved.status, 'passed');
291
+ assert.equal(saved.summary.passed, 8);
292
+ assert.ok(saved.regression, 'Should include regression field');
293
+ assert.equal(saved.regression.detected, false);
294
+ });
295
+
296
+ test('getLatest returns the most recent result', () => {
297
+ const configDir = makeTmpDir();
298
+ const report = makeReport({ duration: 999 });
299
+ saveResult(report, { configDir });
300
+ const latest = getLatest({ configDir });
301
+ assert.ok(latest, 'Should return a result');
302
+ assert.equal(latest.duration, 999);
303
+ });
304
+
305
+ test('getLatest returns null when no results exist', () => {
306
+ const configDir = makeTmpDir();
307
+ const latest = getLatest({ configDir });
308
+ assert.equal(latest, null, 'Should return null for empty directory');
309
+ });
310
+
311
+ test('getHistory returns results newest first', () => {
312
+ const configDir = makeTmpDir();
313
+ for (let i = 0; i < 3; i++) {
314
+ saveResult(makeReport({ duration: 100 + i }), { configDir });
315
+ }
316
+ const history = getHistory(10, { configDir });
317
+ assert.ok(history.length >= 3, 'Should have at least 3 results');
318
+ assert.ok(history[0].duration >= history[history.length - 1].duration,
319
+ 'Should be sorted newest first');
320
+ });
321
+
322
+ test('getHistory returns empty array for missing directory', () => {
323
+ const configDir = makeTmpDir();
324
+ const history = getHistory(10, { configDir });
325
+ assert.equal(history.length, 0, 'Should return empty array');
326
+ });
327
+
328
+ test('detectRegression identifies new failures', () => {
329
+ const previous = makeReport();
330
+ const current = makeReport({
331
+ status: 'failed',
332
+ steps: [
333
+ { name: 'Create harness', status: 'pass', timestamp: Date.now() },
334
+ { name: 'Start servers', status: 'fail', timestamp: Date.now() },
335
+ { name: 'Ping both agents', status: 'pass', timestamp: Date.now() }
336
+ ]
337
+ });
338
+
339
+ const result = detectRegression(current, previous);
340
+ assert.equal(result.detected, true);
341
+ assert.ok(result.newFailures.includes('Start servers'));
342
+ assert.equal(result.fixedTests.length, 0);
343
+ });
344
+
345
+ test('detectRegression identifies fixed tests', () => {
346
+ const previous = makeReport({
347
+ steps: [
348
+ { name: 'Create harness', status: 'pass', timestamp: Date.now() },
349
+ { name: 'Start servers', status: 'fail', timestamp: Date.now() }
350
+ ]
351
+ });
352
+ const current = makeReport({
353
+ steps: [
354
+ { name: 'Create harness', status: 'pass', timestamp: Date.now() },
355
+ { name: 'Start servers', status: 'pass', timestamp: Date.now() }
356
+ ]
357
+ });
358
+
359
+ const result = detectRegression(current, previous);
360
+ assert.equal(result.detected, false);
361
+ assert.ok(result.fixedTests.includes('Start servers'));
362
+ });
363
+
364
+ test('pruneHistory keeps only MAX_HISTORY files', () => {
365
+ const configDir = makeTmpDir();
366
+ for (let i = 0; i < 25; i++) {
367
+ saveResult(makeReport({ duration: i }), { configDir });
368
+ }
369
+
370
+ const { resultsDir } = resolveDir(configDir);
371
+ const files = fs.readdirSync(resultsDir)
372
+ .filter(f => f.startsWith('result-') && f.endsWith('.json'));
373
+
374
+ assert.ok(files.length <= 20, `Should have at most 20 files, got ${files.length}`);
375
+ });
376
+
377
+ test('saveResult uses atomic write (tmp+rename)', () => {
378
+ const configDir = makeTmpDir();
379
+ const report = makeReport();
380
+ const result = saveResult(report, { configDir });
381
+
382
+ // No .tmp files should remain after write
383
+ const { resultsDir } = resolveDir(configDir);
384
+ const tmpFiles = fs.readdirSync(resultsDir).filter(f => f.endsWith('.tmp'));
385
+ assert.equal(tmpFiles.length, 0, 'No .tmp files should remain after atomic write');
386
+ });
387
+ };
388
+ ```
389
+
390
+ **Step 2: Run tests**
391
+
392
+ ```bash
393
+ node test/run.js --filter persist
394
+ ```
395
+
396
+ Expected: All persist tests pass.
397
+
398
+ **Step 3: Commit**
399
+
400
+ ```bash
401
+ git add test/unit/persist.test.js
402
+ git commit -m "test(A2A-42): add unit tests for E2E result persistence"
403
+ ```
404
+
405
+ ---
406
+
407
+ ### Task 3: Add `--persist` Flag to Orchestrator
408
+
409
+ **Files:**
410
+ - Modify: `test/e2e/orchestrate.js`
411
+
412
+ **Step 1: Add persist flag handling**
413
+
414
+ After line 18 (`const jsonOutput = args.includes('--json');`), add:
415
+
416
+ ```js
417
+ const persistResults = args.includes('--persist');
418
+ ```
419
+
420
+ Insert after `report.finish();` (line 238) and before the `// Output report` comment (line 240). The insertion point is inside `main()` after the `finally` block closes, at the same brace depth as `report.finish()`. The new code goes between line 238 (`report.finish();`) and line 240 (`// Output report`). Add:
421
+
422
+ ```js
423
+ // A2A-42: Persist results to local storage for regression tracking
424
+ if (persistResults) {
425
+ try {
426
+ const { saveResult } = require('./persist');
427
+ const persisted = saveResult(report.toJSON());
428
+ if (!jsonOutput) {
429
+ process.stderr.write(`Results saved to ${persisted.file}\n`);
430
+ }
431
+ if (persisted.regression.detected) {
432
+ process.stderr.write(`⚠ REGRESSION DETECTED: ${persisted.regression.newFailures.join(', ')}\n`);
433
+ }
434
+ if (persisted.regression.fixedTests.length > 0) {
435
+ process.stderr.write(`✓ Fixed: ${persisted.regression.fixedTests.join(', ')}\n`);
436
+ }
437
+ } catch (err) {
438
+ process.stderr.write(`Warning: Failed to persist results: ${err.message}\n`);
439
+ }
440
+ }
441
+ ```
442
+
443
+ Also update the usage comment at the top to document the new flag:
444
+
445
+ ```
446
+ * Usage:
447
+ * node test/e2e/orchestrate.js # markdown report to stderr
448
+ * node test/e2e/orchestrate.js --json # JSON report to stdout
449
+ * node test/e2e/orchestrate.js --persist # save results to disk
450
+ * node test/e2e/orchestrate.js --json --persist # both
451
+ * node test/e2e/orchestrate.js --verbose # verbose output
452
+ ```
453
+
454
+ **Step 2: Verify it works manually**
455
+
456
+ ```bash
457
+ node test/e2e/orchestrate.js --json --persist 2>/dev/null | head -5
458
+ ls ~/.config/openclaw/test-results/
459
+ ```
460
+
461
+ Expected: JSON output, and `test-results/` directory with `latest.json` and a timestamped file.
462
+
463
+ **Step 3: Run the full test suite**
464
+
465
+ ```bash
466
+ npm test
467
+ ```
468
+
469
+ Expected: 328 passing, 2 failing (same pre-existing failures).
470
+
471
+ **Step 4: Commit**
472
+
473
+ ```bash
474
+ git add test/e2e/orchestrate.js
475
+ git commit -m "feat(A2A-42): add --persist flag to E2E orchestrator"
476
+ ```
477
+
478
+ ---
479
+
480
+ ### Task 4: Create `scripts/run-e2e.sh` — Cron/CI Wrapper
481
+
482
+ **Files:**
483
+ - Create: `scripts/run-e2e.sh`
484
+
485
+ **Step 1: Write the wrapper script**
486
+
487
+ ```bash
488
+ #!/usr/bin/env bash
489
+ # A2A-42: Cron/CI wrapper for E2E test orchestrator.
490
+ # Runs orchestration, persists results, optionally alerts on failure.
491
+ #
492
+ # Usage:
493
+ # scripts/run-e2e.sh # run + persist
494
+ # scripts/run-e2e.sh --alert # run + persist + alert on failure
495
+ #
496
+ # Cron example (every 6 hours):
497
+ # 0 */6 * * * /root/a2acalling/scripts/run-e2e.sh --alert >> /var/log/a2a-e2e.log 2>&1
498
+
499
+ set -euo pipefail
500
+
501
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
502
+ PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
503
+ ALERT_SCRIPT="/root/maestro/scripts/alert.sh"
504
+ ALERT_ON_FAILURE=false
505
+
506
+ for arg in "$@"; do
507
+ case "$arg" in
508
+ --alert) ALERT_ON_FAILURE=true ;;
509
+ esac
510
+ done
511
+
512
+ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] Starting E2E orchestration..."
513
+
514
+ cd "$PROJECT_DIR"
515
+
516
+ # A2A-42: Run orchestrator with JSON output and persistence.
517
+ # stdout (JSON) goes to /dev/null; stderr (regression messages, logs) passes through
518
+ # so cron log captures warnings like "REGRESSION DETECTED: ..."
519
+ if node test/e2e/orchestrate.js --json --persist > /dev/null; then
520
+ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] E2E: PASSED"
521
+ exit 0
522
+ else
523
+ EXIT_CODE=$?
524
+ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] E2E: FAILED (exit $EXIT_CODE)"
525
+
526
+ if [ "$ALERT_ON_FAILURE" = true ] && [ -x "$ALERT_SCRIPT" ]; then
527
+ "$ALERT_SCRIPT" error "E2E test failure detected — check ~/.config/openclaw/test-results/latest.json"
528
+ fi
529
+
530
+ exit "$EXIT_CODE"
531
+ fi
532
+ ```
533
+
534
+ **Step 2: Make it executable**
535
+
536
+ ```bash
537
+ chmod +x scripts/run-e2e.sh
538
+ ```
539
+
540
+ **Step 3: Test it runs**
541
+
542
+ ```bash
543
+ scripts/run-e2e.sh
544
+ echo $?
545
+ ```
546
+
547
+ Expected: exit 0 with "PASSED" message.
548
+
549
+ **Step 4: Commit**
550
+
551
+ ```bash
552
+ git add scripts/run-e2e.sh
553
+ git commit -m "feat(A2A-42): add cron/CI wrapper script for E2E tests"
554
+ ```
555
+
556
+ ---
557
+
558
+ ### Task 5: Dashboard API Endpoint — `GET /dashboard/api/test-results`
559
+
560
+ **Files:**
561
+ - Modify: `src/routes/dashboard.js` (add endpoint inside `createDashboardApiRouter`, after the `/logs/stats` route ~line 884)
562
+
563
+ **Step 1: Add the endpoint**
564
+
565
+ At the top of the `createDashboardApiRouter` function (after `const context = buildContext(options);` on line 449), add the persist require with a try/catch so it degrades gracefully in installed-package environments where test/e2e/ doesn't exist:
566
+
567
+ ```js
568
+ // A2A-42: Load E2E persist layer for Health tab. Gracefully null if not available
569
+ // (e.g., installed as npm package without test files).
570
+ let persistModule = null;
571
+ try {
572
+ persistModule = require(path.join(__dirname, '..', '..', 'test', 'e2e', 'persist'));
573
+ } catch {
574
+ // test/e2e/persist.js not available — Health tab will show "no results"
575
+ }
576
+ ```
577
+
578
+ Then add this route after the `router.get('/logs/stats', ...)` block (around line 884):
579
+
580
+ ```js
581
+ // A2A-42: Serve E2E test results for the Health tab.
582
+ // Reads from local persist layer — no external dependencies.
583
+ router.get('/test-results', (req, res) => {
584
+ if (!persistModule) {
585
+ return res.json({
586
+ success: true,
587
+ latest: null,
588
+ history: [],
589
+ has_results: false,
590
+ message: 'Test results module not available'
591
+ });
592
+ }
593
+
594
+ const latest = persistModule.getLatest();
595
+ const limit = Math.min(20, Math.max(1, Number.parseInt(req.query.limit || '10', 10) || 10));
596
+ const history = persistModule.getHistory(limit);
597
+
598
+ return res.json({
599
+ success: true,
600
+ latest,
601
+ history: history.map(r => ({
602
+ status: r.status,
603
+ duration: r.duration,
604
+ startedAt: r.startedAt,
605
+ finishedAt: r.finishedAt,
606
+ summary: r.summary,
607
+ regression: r.regression || null
608
+ })),
609
+ has_results: latest !== null
610
+ });
611
+ });
612
+ ```
613
+
614
+ **Step 2: Run full test suite**
615
+
616
+ ```bash
617
+ npm test
618
+ ```
619
+
620
+ Expected: 328 passing, 2 failing (same pre-existing).
621
+
622
+ **Step 3: Commit**
623
+
624
+ ```bash
625
+ git add src/routes/dashboard.js
626
+ git commit -m "feat(A2A-42): add dashboard API endpoint for test results"
627
+ ```
628
+
629
+ ---
630
+
631
+ ### Task 6: Dashboard UI — Health Tab
632
+
633
+ **Files:**
634
+ - Modify: `src/dashboard/public/index.html` (add Health tab)
635
+ - Modify: `src/dashboard/public/app.js` (add Health tab rendering and loader)
636
+
637
+ **Step 1: Add tab to index.html**
638
+
639
+ After the Logs tab (`<sl-tab slot="nav" panel="logs">Logs</sl-tab>`) on line 23, add:
640
+
641
+ ```html
642
+ <sl-tab slot="nav" panel="health">Health</sl-tab>
643
+ ```
644
+
645
+ Before the closing `</sl-tab-group>` (line 257), add the Health panel:
646
+
647
+ ```html
648
+ <sl-tab-panel name="health">
649
+ <h2>E2E Health</h2>
650
+ <sl-card id="health-latest">
651
+ <p>No test results available. Run <code>node test/e2e/orchestrate.js --persist</code> to generate results.</p>
652
+ </sl-card>
653
+ <h3>History</h3>
654
+ <table id="health-history-table">
655
+ <thead>
656
+ <tr>
657
+ <th>Status</th>
658
+ <th>Duration</th>
659
+ <th>Passed</th>
660
+ <th>Failed</th>
661
+ <th>Regression</th>
662
+ <th>Time</th>
663
+ </tr>
664
+ </thead>
665
+ <tbody></tbody>
666
+ </table>
667
+ </sl-tab-panel>
668
+ ```
669
+
670
+ **Step 2: Add Health tab JS to app.js**
671
+
672
+ At the end of `app.js`, before the `bootstrap()` call (around line 2138), add:
673
+
674
+ ```js
675
+ // === Health Tab (A2A-42) ===
676
+
677
+ // A2A-42: Escape HTML entities for safe innerHTML rendering of step names/errors.
678
+ function escapeHtml(s) {
679
+ return String(s).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
680
+ }
681
+
682
+ async function loadHealth() {
683
+ try {
684
+ const res = await fetch('./api/test-results');
685
+ const data = await res.json();
686
+ renderHealthLatest(data.latest);
687
+ renderHealthHistory(data.history || []);
688
+ } catch (err) {
689
+ renderHealthLatest(null);
690
+ renderHealthHistory([]);
691
+ }
692
+ }
693
+
694
+ function renderHealthLatest(latest) {
695
+ const card = document.getElementById('health-latest');
696
+ if (!card) return;
697
+
698
+ if (!latest) {
699
+ card.innerHTML = '<p>No test results available. Run <code>node test/e2e/orchestrate.js --persist</code> to generate.</p>';
700
+ return;
701
+ }
702
+
703
+ const statusBadge = latest.status === 'passed'
704
+ ? '<sl-badge variant="success">PASSED</sl-badge>'
705
+ : '<sl-badge variant="danger">FAILED</sl-badge>';
706
+
707
+ const regression = latest.regression;
708
+ let regressionHtml = '';
709
+ if (regression && regression.detected) {
710
+ regressionHtml = `<p><sl-badge variant="warning">REGRESSION</sl-badge> New failures: ${regression.newFailures.join(', ')}</p>`;
711
+ }
712
+ if (regression && regression.fixedTests && regression.fixedTests.length > 0) {
713
+ regressionHtml += `<p><sl-badge variant="success">FIXED</sl-badge> ${regression.fixedTests.join(', ')}</p>`;
714
+ }
715
+
716
+ const ts = latest.finishedAt ? new Date(latest.finishedAt).toLocaleString() : 'unknown';
717
+ const summary = latest.summary || {};
718
+
719
+ card.innerHTML = `
720
+ <div class="row">
721
+ <strong>Latest Run</strong> ${statusBadge}
722
+ </div>
723
+ <p><strong>Duration:</strong> ${latest.duration || 0}ms &middot;
724
+ <strong>Passed:</strong> ${summary.passed || 0} &middot;
725
+ <strong>Failed:</strong> ${summary.failed || 0} &middot;
726
+ <strong>Skipped:</strong> ${summary.skipped || 0} &middot;
727
+ <strong>Time:</strong> ${ts}</p>
728
+ ${regressionHtml}
729
+ <details>
730
+ <summary>Steps (${(latest.steps || []).length})</summary>
731
+ <ul>
732
+ ${(latest.steps || []).map(s => {
733
+ const icon = s.status === 'pass' ? '&#x2705;' : s.status === 'fail' ? '&#x274C;' : '&#x23ED;';
734
+ const err = s.error ? ` — <code>${escapeHtml(String(s.error).slice(0, 120))}</code>` : '';
735
+ return `<li>${icon} ${escapeHtml(s.name)}${err}</li>`;
736
+ }).join('')}
737
+ </ul>
738
+ </details>
739
+ `;
740
+ }
741
+
742
+ function renderHealthHistory(history) {
743
+ const tbody = document.querySelector('#health-history-table tbody');
744
+ if (!tbody) return;
745
+
746
+ if (!history || history.length === 0) {
747
+ tbody.innerHTML = '<tr><td colspan="6">No history</td></tr>';
748
+ return;
749
+ }
750
+
751
+ tbody.innerHTML = history.map(r => {
752
+ const badge = r.status === 'passed'
753
+ ? '<sl-badge variant="success" size="small">PASS</sl-badge>'
754
+ : '<sl-badge variant="danger" size="small">FAIL</sl-badge>';
755
+ const summary = r.summary || {};
756
+ const regression = r.regression;
757
+ const regText = regression && regression.detected
758
+ ? `<sl-badge variant="warning" size="small">${regression.newFailures.length} new</sl-badge>`
759
+ : '-';
760
+ const ts = r.finishedAt ? new Date(r.finishedAt).toLocaleString() : '-';
761
+ return `<tr>
762
+ <td>${badge}</td>
763
+ <td>${r.duration || 0}ms</td>
764
+ <td>${summary.passed || 0}</td>
765
+ <td>${summary.failed || 0}</td>
766
+ <td>${regText}</td>
767
+ <td>${ts}</td>
768
+ </tr>`;
769
+ }).join('');
770
+ }
771
+ ```
772
+
773
+ **Step 3: Register the Health tab in tabLoaders**
774
+
775
+ In `app.js`, find the `tabLoaders` object (line ~2077) and add:
776
+
777
+ ```js
778
+ health: loadHealth,
779
+ ```
780
+
781
+ So it becomes:
782
+
783
+ ```js
784
+ const tabLoaders = {
785
+ contacts: loadContacts,
786
+ calls: loadCalls,
787
+ logs: () => { loadLogs(); loadLogStats(); },
788
+ permissions: () => {},
789
+ invites: loadInvites,
790
+ health: loadHealth,
791
+ };
792
+ ```
793
+
794
+ **Step 4: Run full test suite**
795
+
796
+ ```bash
797
+ npm test
798
+ ```
799
+
800
+ Expected: 328 passing, 2 failing (same pre-existing).
801
+
802
+ **Step 5: Commit**
803
+
804
+ ```bash
805
+ git add src/dashboard/public/index.html src/dashboard/public/app.js
806
+ git commit -m "feat(A2A-42): add Health tab to dashboard with E2E results display"
807
+ ```
808
+
809
+ ---
810
+
811
+ ### Task 7: Final Verification & Squash
812
+
813
+ **Step 1: Run E2E orchestrator with persist to seed data**
814
+
815
+ ```bash
816
+ node test/e2e/orchestrate.js --json --persist > /dev/null 2>&1
817
+ cat ~/.config/openclaw/test-results/latest.json | head -20
818
+ ```
819
+
820
+ Expected: JSON with `status`, `summary`, `regression` fields.
821
+
822
+ **Step 2: Run full test suite (all tiers)**
823
+
824
+ ```bash
825
+ npm test
826
+ ```
827
+
828
+ Expected: 328 passing, 2 failing (same pre-existing install-skills failures).
829
+
830
+ **Step 3: Run E2E tier specifically**
831
+
832
+ ```bash
833
+ node test/run.js --e2e
834
+ ```
835
+
836
+ Expected: 51+ passing E2E tests.
837
+
838
+ **Step 4: Verify diff size**
839
+
840
+ ```bash
841
+ git diff --stat origin/main
842
+ ```
843
+
844
+ Expected: Under 500 lines changed, under 15 files.
845
+
846
+ **Step 5: Push and open PR**
847
+
848
+ ```bash
849
+ git push origin feature/a2a-42
850
+ ```
851
+
852
+ ---
853
+
854
+ ## File Summary
855
+
856
+ | Action | File | Description |
857
+ |--------|------|-------------|
858
+ | Create | `test/e2e/persist.js` | Result persistence: save, read, history, regression detect |
859
+ | Create | `test/unit/persist.test.js` | Unit tests for persist module |
860
+ | Create | `scripts/run-e2e.sh` | Cron/CI wrapper script |
861
+ | Modify | `test/e2e/orchestrate.js` | Add `--persist` flag |
862
+ | Modify | `src/routes/dashboard.js` | Add `GET /dashboard/api/test-results` endpoint |
863
+ | Modify | `src/dashboard/public/index.html` | Add Health tab |
864
+ | Modify | `src/dashboard/public/app.js` | Add Health tab rendering + register in tabLoaders |
865
+
866
+ ## Acceptance Criteria Traceability
867
+
868
+ | Criterion | Task |
869
+ |-----------|------|
870
+ | `--json --persist` writes results | Task 1, 3 |
871
+ | `latest.json` reflects most recent run | Task 1 |
872
+ | History with timestamped results (last 20) | Task 1 |
873
+ | Regression detection | Task 1 |
874
+ | `scripts/run-e2e.sh` cron-ready | Task 4 |
875
+ | Dashboard shows latest + trend | Task 5, 6 |
876
+ | Existing 328 tests pass | Task 7 |
877
+ | E2E 51 tests pass | Task 7 |
878
+ | No new npm dependencies | All tasks |
879
+ | Local reporting without Telegram/OpenClaw | Task 1, 4 |