a2acalling 0.6.59 → 0.6.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ # Architecture — A2A Calling
2
+
3
+ ## System Overview
4
+
5
+ A2A Calling enables agent-to-agent communication across OpenClaw instances. Agents create tokens with scoped permissions, share invite URLs, and remote agents call in via HTTP.
6
+
7
+ ```
8
+ ┌──────────────────────────────────────────────────────────────────┐
9
+ │ CLI (bin/cli.js) │
10
+ │ Commands: create, list, revoke, call, contacts, conversations │
11
+ └───────────┬──────────────────────────────────────────────────────┘
12
+
13
+ ┌───────────▼──────────────────────────────────────────────────────┐
14
+ │ Express Server (src/server.js) │
15
+ │ ├─ /api/a2a/* → src/routes/a2a.js (inbound calls, tokens) │
16
+ │ ├─ /api/callbook/* → src/routes/callbook.js (callbook sync) │
17
+ │ └─ /dashboard/* → src/routes/dashboard.js (API + SPA) │
18
+ └───────────┬──────────────────────────────────────────────────────┘
19
+
20
+ ┌───────────▼──────────────────────────────────────────────────────┐
21
+ │ Core Libraries (src/lib/) │
22
+ │ ├─ tokens.js Token CRUD, validation, tiers │
23
+ │ ├─ client.js A2AClient for outbound calls │
24
+ │ ├─ conversations.js ConversationStore (SQLite) │
25
+ │ ├─ conversation-driver.js Multi-turn call orchestration │
26
+ │ ├─ summarizer.js Call summary generation │
27
+ │ ├─ summary-prompt.js Unified summary prompt builder │
28
+ │ ├─ summary-formatter.js Format summaries for display │
29
+ │ ├─ disclosure.js Disclosure level enforcement │
30
+ │ ├─ config.js Config file management │
31
+ │ ├─ logger.js Structured logger (SQLite + stdout) │
32
+ │ ├─ call-monitor.js Active call monitoring │
33
+ │ ├─ callbook.js Contact/callbook management │
34
+ │ ├─ claude-subagent.js Claude API integration for summaries │
35
+ │ ├─ openclaw-integration.js OpenClaw runtime hooks │
36
+ │ ├─ prompt-template.js Prompt template utilities │
37
+ │ ├─ runtime-adapter.js Runtime mode detection (standalone/OCW) │
38
+ │ ├─ dashboard-events.js SSE event broadcasting │
39
+ │ ├─ external-ip.js External IP/hostname detection │
40
+ │ ├─ invite-host.js Invite URL construction │
41
+ │ ├─ port-scanner.js Available port detection │
42
+ │ ├─ pid-file.js PID file management │
43
+ │ ├─ turn-timeout.js Conversation turn timeout handling │
44
+ │ ├─ update-checker.js Version update detection │
45
+ │ └─ update-manager.js Self-update orchestration │
46
+ └──────────────────────────────────────────────────────────────────┘
47
+ ```
48
+
49
+ ## Data Storage
50
+
51
+ - **Tokens**: JSON file at `~/.config/openclaw/a2a.json`
52
+ - **Conversations**: SQLite via `better-sqlite3` at `~/.config/openclaw/a2a-conversations.db`
53
+ - **Logs**: SQLite via `better-sqlite3` at `~/.config/openclaw/a2a-logs.db`
54
+ - **Config**: JSON at `~/.config/openclaw/a2a-config.json`
55
+ - **Disclosure**: JSON at `~/.config/openclaw/a2a-disclosure.json`
56
+
57
+ ## Permission System
58
+
59
+ Three tiers with escalating capabilities:
60
+ - **public**: `context-read` only
61
+ - **friends**: `context-read`, `calendar.read`, `email.read`, `search`
62
+ - **family**: `context-read`, `calendar`, `email`, `search`, `tools`, `memory`
63
+
64
+ Three disclosure levels controlling information sharing:
65
+ - **public**: Shares freely within tier boundaries
66
+ - **minimal**: Direct answers only, no volunteered context
67
+ - **none**: Confirms capability, provides no information
68
+
69
+ ## Dependencies
70
+
71
+ Only two runtime dependencies (intentionally minimal):
72
+ - `express` — HTTP server and routing
73
+ - `better-sqlite3` — SQLite for conversations and logs
74
+
75
+ ## Dashboard
76
+
77
+ Single-page app served from `src/dashboard/public/`. Uses Shoelace web components. Communicates with the API via `/dashboard/api/*` routes.
78
+
79
+ ## Native macOS App
80
+
81
+ Tauri v2 app at `native/macos/` wrapping the dashboard SPA. Provides native menus, notifications, and server lifecycle management.
82
+
83
+ ## Testing
84
+
85
+ Zero-dependency test runner at `test/run.js` with custom assert API. Three test tiers:
86
+ - `test/unit/` — Unit tests for individual modules
87
+ - `test/integration/` — Integration tests for multi-module flows
88
+ - `test/e2e/` — End-to-end tests for full system flows
89
+
90
+ Test profiles at `test/profiles/` represent real personas with distinct permission tiers.
package/CONVENTIONS.md ADDED
@@ -0,0 +1,78 @@
1
+ # Conventions — A2A Calling
2
+
3
+ ## Logging
4
+
5
+ Use the structured logger from `src/lib/logger.js`. Never use bare `console.log`.
6
+
7
+ ```js
8
+ const { createLogger } = require('./logger');
9
+ const logger = createLogger({ component: 'a2a.mymodule' });
10
+ logger.info('Something happened', { event: 'my_event', data: { key: 'val' } });
11
+ ```
12
+
13
+ Components follow dotted naming: `a2a.tokens`, `a2a.server`, `a2a.client`, etc.
14
+
15
+ ## Error Handling
16
+
17
+ - Use the project's existing error patterns (e.g., `A2AError` from `src/lib/client.js`)
18
+ - Log errors with `logger.error()`, including error codes and hints
19
+ - HTTP responses use consistent JSON format: `{ success: false, error: { code, message } }`
20
+ - Do NOT create new error classes without strong justification
21
+
22
+ ## Config Resolution
23
+
24
+ Config directory resolves via:
25
+ 1. `process.env.A2A_CONFIG_DIR`
26
+ 2. `process.env.OPENCLAW_CONFIG_DIR`
27
+ 3. `~/.config/openclaw/`
28
+
29
+ Always use `src/lib/config.js` for config access. Do not hardcode paths.
30
+
31
+ ## Testing
32
+
33
+ - Test runner: `node test/run.js` (zero-dependency, custom assert API)
34
+ - Test files: `*.test.js` in `test/unit/`, `test/integration/`, `test/e2e/`
35
+ - Test helpers: `test/helpers.js`
36
+ - Test profiles: `test/profiles/*.js` — real personas, not generic stubs
37
+ - Prefer testing through the public API of each module
38
+
39
+ ## Dependencies
40
+
41
+ This project is intentionally minimal-dependency. Only two runtime deps:
42
+ - `express` — HTTP
43
+ - `better-sqlite3` — SQLite
44
+
45
+ Do NOT add new npm dependencies without explicit justification. Use Node.js built-ins.
46
+
47
+ ## Module Pattern
48
+
49
+ All modules use CommonJS (`require`/`module.exports`). Each lib file exports a focused API. Large modules export a class (e.g., `TokenStore`, `ConversationStore`, `A2AClient`). Utility modules export functions.
50
+
51
+ ## Naming
52
+
53
+ - Files: kebab-case (`call-monitor.js`, `dashboard-events.js`)
54
+ - Classes: PascalCase (`TokenStore`, `A2AClient`)
55
+ - Functions/variables: camelCase
56
+ - Constants: UPPER_SNAKE_CASE for true constants
57
+ - Token IDs: prefixed with `fed_` (federation tokens)
58
+ - Trace IDs: prefixed with `trace_`
59
+
60
+ ## Dashboard
61
+
62
+ - Single-page app in `src/dashboard/public/`
63
+ - Uses Shoelace web components (`<sl-*>` elements)
64
+ - Communicates via fetch to `/dashboard/api/*` endpoints
65
+ - SSE for real-time updates via `src/lib/dashboard-events.js`
66
+
67
+ ## Permission Tiers
68
+
69
+ Tokens have a tier (`public`, `friends`, `family`) and a disclosure level (`public`, `minimal`, `none`). These are enforced at the route level in `src/routes/a2a.js`.
70
+
71
+ ## Anti-Patterns
72
+
73
+ - Do NOT use `console.log` — use the structured logger
74
+ - Do NOT add npm dependencies for things Node.js builtins handle
75
+ - Do NOT create new error classes — use existing patterns
76
+ - Do NOT hardcode config paths — use config resolution
77
+ - Do NOT use `var` — use `const` or `let`
78
+ - Do NOT use sync file I/O in request handlers (sync is OK in CLI and setup)
@@ -0,0 +1,879 @@
1
+ # A2A-42: E2E Test Persistence & Dashboard Integration
2
+
3
+ > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
4
+
5
+ **Goal:** Add local result persistence, regression detection, a cron-ready wrapper, and a dashboard Health tab so E2E test results are stored, compared, and visible without external dependencies.
6
+
7
+ **Architecture:** A new `test/e2e/persist.js` module handles writing timestamped JSON results to `~/.config/openclaw/test-results/`, reading history, and detecting regressions. The existing orchestrator gets a `--persist` flag that calls persist after each run. A shell script wraps orchestrate for cron. The dashboard gets a new Health tab backed by a single `GET /dashboard/api/test-results` endpoint that reads from the persist layer.
8
+
9
+ **Tech Stack:** Node.js builtins only (fs, path). No new npm dependencies. Shoelace web components for dashboard UI.
10
+
11
+ **Linear Ticket:** A2A-42
12
+
13
+ ---
14
+
15
+ ## Conventions Reminder
16
+
17
+ - Logger: `const { createLogger } = require('./logger'); const logger = createLogger({ component: 'a2a.test' });`
18
+ - No `console.log` in production code (test orchestrator already uses stderr — that's fine)
19
+ - CommonJS modules (`require`/`module.exports`)
20
+ - Config dir: use the same resolution as `src/lib/config.js` — `process.env.A2A_CONFIG_DIR || process.env.OPENCLAW_CONFIG_DIR || ~/.config/openclaw/`
21
+ - File naming: kebab-case
22
+ - Test files: `*.test.js` in `test/unit/`
23
+
24
+ ---
25
+
26
+ ### Task 1: Create `test/e2e/persist.js` — Result Persistence Module
27
+
28
+ **Files:**
29
+ - Create: `test/e2e/persist.js`
30
+
31
+ **Step 1: Write the persist module**
32
+
33
+ This module provides four functions: `saveResult`, `getLatest`, `getHistory`, `detectRegression`. It stores results as timestamped JSON files in `<configDir>/test-results/` with a `latest.json` that is a regular file copy (not symlink — avoids cross-platform issues).
34
+
35
+ ```js
36
+ /**
37
+ * E2E Test Result Persistence
38
+ *
39
+ * Stores test results as timestamped JSON in ~/.config/openclaw/test-results/.
40
+ * Provides history retrieval and regression detection.
41
+ *
42
+ * A2A-42: Local-first result storage — no external dependencies.
43
+ */
44
+
45
+ const fs = require('fs');
46
+ const path = require('path');
47
+
48
+ // A2A-42: Default config dir matches src/lib/config.js resolution.
49
+ // Accept configDir parameter for testability (reviewer feedback: module-level
50
+ // constants prevent testing the null-path without subprocess gymnastics).
51
+ const DEFAULT_CONFIG_DIR = process.env.A2A_CONFIG_DIR ||
52
+ process.env.OPENCLAW_CONFIG_DIR ||
53
+ path.join(process.env.HOME || '/tmp', '.config', 'openclaw');
54
+
55
+ const MAX_HISTORY = 20;
56
+
57
+ function resolveDir(configDir) {
58
+ const base = configDir || DEFAULT_CONFIG_DIR;
59
+ return {
60
+ resultsDir: path.join(base, 'test-results'),
61
+ latestFile: path.join(base, 'test-results', 'latest.json')
62
+ };
63
+ }
64
+
65
+ // Module-level defaults for callers that don't pass configDir
66
+ const RESULTS_DIR = resolveDir().resultsDir;
67
+ const LATEST_FILE = resolveDir().latestFile;
68
+
69
+ function ensureDir(dir) {
70
+ if (!fs.existsSync(dir)) {
71
+ fs.mkdirSync(dir, { recursive: true });
72
+ }
73
+ }
74
+
75
+ /**
76
+ * Save a test report result to disk.
77
+ * Writes a timestamped file and updates latest.json.
78
+ * Prunes history beyond MAX_HISTORY entries.
79
+ *
80
+ * @param {object} report - Output from TestReport.toJSON()
81
+ * @returns {{ file: string, latest: string, regression: object }}
82
+ */
83
+ function saveResult(report, options = {}) {
84
+ const { resultsDir, latestFile } = resolveDir(options.configDir);
85
+ ensureDir(resultsDir);
86
+
87
+ const ts = new Date().toISOString().replace(/[:.]/g, '-');
88
+ const filename = `result-${ts}.json`;
89
+ const filepath = path.join(resultsDir, filename);
90
+
91
+ // A2A-42: Detect regression before writing, so we can include it in the saved result
92
+ const previous = getLatest(options);
93
+ const regression = previous ? detectRegression(report, previous) : {
94
+ detected: false,
95
+ newFailures: [],
96
+ fixedTests: []
97
+ };
98
+
99
+ const enriched = { ...report, regression };
100
+ const json = JSON.stringify(enriched, null, 2);
101
+
102
+ // A2A-42: Atomic write via tmp+rename — matches pattern from src/lib/config.js:290
103
+ // Prevents truncated reads if the server reads latest.json mid-write.
104
+ const tmpTimestamped = filepath + '.tmp';
105
+ fs.writeFileSync(tmpTimestamped, json);
106
+ fs.renameSync(tmpTimestamped, filepath);
107
+
108
+ const tmpLatest = latestFile + '.tmp';
109
+ fs.writeFileSync(tmpLatest, json);
110
+ fs.renameSync(tmpLatest, latestFile);
111
+
112
+ pruneHistory(options);
113
+
114
+ return { file: filepath, latest: latestFile, regression };
115
+ }
116
+
117
+ /**
118
+ * Read the most recent test result.
119
+ * @returns {object|null}
120
+ */
121
+ function getLatest(options = {}) {
122
+ const { latestFile } = resolveDir(options.configDir);
123
+ if (!fs.existsSync(latestFile)) return null;
124
+ try {
125
+ return JSON.parse(fs.readFileSync(latestFile, 'utf8'));
126
+ } catch {
127
+ return null;
128
+ }
129
+ }
130
+
131
+ /**
132
+ * Read the last N results, newest first.
133
+ * @param {number} [limit=20]
134
+ * @returns {object[]}
135
+ */
136
+ function getHistory(limit = MAX_HISTORY, options = {}) {
137
+ const { resultsDir } = resolveDir(options.configDir);
138
+ if (!fs.existsSync(resultsDir)) return [];
139
+
140
+ const files = fs.readdirSync(resultsDir)
141
+ .filter(f => f.startsWith('result-') && f.endsWith('.json'))
142
+ .sort()
143
+ .reverse()
144
+ .slice(0, Math.max(1, limit));
145
+
146
+ return files.map(f => {
147
+ try {
148
+ return JSON.parse(fs.readFileSync(path.join(resultsDir, f), 'utf8'));
149
+ } catch {
150
+ return null;
151
+ }
152
+ }).filter(Boolean);
153
+ }
154
+
155
+ /**
156
+ * Compare current vs previous result for regressions.
157
+ * A regression is a step that passed before but fails now.
158
+ * A fix is a step that failed before but passes now.
159
+ *
160
+ * @param {object} current - Current report JSON
161
+ * @param {object} previous - Previous report JSON
162
+ * @returns {{ detected: boolean, newFailures: string[], fixedTests: string[] }}
163
+ */
164
+ function detectRegression(current, previous) {
165
+ const prevSteps = new Map();
166
+ for (const step of (previous.steps || [])) {
167
+ prevSteps.set(step.name, step.status);
168
+ }
169
+
170
+ const newFailures = [];
171
+ const fixedTests = [];
172
+
173
+ for (const step of (current.steps || [])) {
174
+ const prevStatus = prevSteps.get(step.name);
175
+ if (!prevStatus) continue; // new step, not a regression
176
+ if (step.status === 'fail' && prevStatus === 'pass') {
177
+ newFailures.push(step.name);
178
+ }
179
+ if (step.status === 'pass' && prevStatus === 'fail') {
180
+ fixedTests.push(step.name);
181
+ }
182
+ }
183
+
184
+ return {
185
+ detected: newFailures.length > 0,
186
+ newFailures,
187
+ fixedTests
188
+ };
189
+ }
190
+
191
+ /**
192
+ * Remove old result files beyond MAX_HISTORY.
193
+ */
194
+ function pruneHistory(options = {}) {
195
+ const { resultsDir } = resolveDir(options.configDir);
196
+ if (!fs.existsSync(resultsDir)) return;
197
+
198
+ const files = fs.readdirSync(resultsDir)
199
+ .filter(f => f.startsWith('result-') && f.endsWith('.json'))
200
+ .sort();
201
+
202
+ while (files.length > MAX_HISTORY) {
203
+ const oldest = files.shift();
204
+ try {
205
+ fs.unlinkSync(path.join(resultsDir, oldest));
206
+ } catch {
207
+ // best effort
208
+ }
209
+ }
210
+ }
211
+
212
+ module.exports = {
213
+ saveResult,
214
+ getLatest,
215
+ getHistory,
216
+ detectRegression,
217
+ RESULTS_DIR,
218
+ LATEST_FILE,
219
+ MAX_HISTORY,
220
+ resolveDir
221
+ };
222
+ ```
223
+
224
+ **Step 2: Commit**
225
+
226
+ ```bash
227
+ git add test/e2e/persist.js
228
+ git commit -m "feat(A2A-42): add E2E result persistence module"
229
+ ```
230
+
231
+ ---
232
+
233
+ ### Task 2: Unit Tests for `persist.js`
234
+
235
+ **Files:**
236
+ - Create: `test/unit/persist.test.js`
237
+
238
+ **Step 1: Write the tests**
239
+
240
+ ```js
241
+ /**
242
+ * Unit tests for test/e2e/persist.js
243
+ *
244
+ * Uses configDir parameter for isolation — no env var hacks.
245
+ * Each test gets a fresh temp directory.
246
+ */
247
+
248
+ const fs = require('fs');
249
+ const path = require('path');
250
+ const os = require('os');
251
+
252
+ const { saveResult, getLatest, getHistory, detectRegression, resolveDir } = require('../e2e/persist');
253
+
254
+ function makeTmpDir() {
255
+ return fs.mkdtempSync(path.join(os.tmpdir(), 'persist-test-'));
256
+ }
257
+
258
+ function makeReport(overrides = {}) {
259
+ return {
260
+ name: 'E2E Orchestrator',
261
+ status: overrides.status || 'passed',
262
+ startedAt: Date.now() - 500,
263
+ finishedAt: Date.now(),
264
+ duration: 500,
265
+ summary: { passed: 8, failed: 0, skipped: 0, total: 8 },
266
+ steps: overrides.steps || [
267
+ { name: 'Create harness', status: 'pass', timestamp: Date.now() },
268
+ { name: 'Start servers', status: 'pass', timestamp: Date.now() },
269
+ { name: 'Ping both agents', status: 'pass', timestamp: Date.now() },
270
+ { name: 'Create tokens', status: 'pass', timestamp: Date.now() },
271
+ { name: 'Exchange invites', status: 'pass', timestamp: Date.now() },
272
+ { name: 'B calls A', status: 'pass', timestamp: Date.now() },
273
+ { name: 'A calls B', status: 'pass', timestamp: Date.now() },
274
+ { name: 'Verify response integrity', status: 'pass', timestamp: Date.now() }
275
+ ],
276
+ ...overrides
277
+ };
278
+ }
279
+
280
+ module.exports = ({ test, assert }) => {
281
+ test('saveResult creates results directory and files', () => {
282
+ const configDir = makeTmpDir();
283
+ const report = makeReport();
284
+ const result = saveResult(report, { configDir });
285
+
286
+ assert.ok(fs.existsSync(result.file), 'Should create timestamped file');
287
+ assert.ok(fs.existsSync(result.latest), 'Should create latest.json');
288
+
289
+ const saved = JSON.parse(fs.readFileSync(result.file, 'utf8'));
290
+ assert.equal(saved.status, 'passed');
291
+ assert.equal(saved.summary.passed, 8);
292
+ assert.ok(saved.regression, 'Should include regression field');
293
+ assert.equal(saved.regression.detected, false);
294
+ });
295
+
296
+ test('getLatest returns the most recent result', () => {
297
+ const configDir = makeTmpDir();
298
+ const report = makeReport({ duration: 999 });
299
+ saveResult(report, { configDir });
300
+ const latest = getLatest({ configDir });
301
+ assert.ok(latest, 'Should return a result');
302
+ assert.equal(latest.duration, 999);
303
+ });
304
+
305
+ test('getLatest returns null when no results exist', () => {
306
+ const configDir = makeTmpDir();
307
+ const latest = getLatest({ configDir });
308
+ assert.equal(latest, null, 'Should return null for empty directory');
309
+ });
310
+
311
+ test('getHistory returns results newest first', () => {
312
+ const configDir = makeTmpDir();
313
+ for (let i = 0; i < 3; i++) {
314
+ saveResult(makeReport({ duration: 100 + i }), { configDir });
315
+ }
316
+ const history = getHistory(10, { configDir });
317
+ assert.ok(history.length >= 3, 'Should have at least 3 results');
318
+ assert.ok(history[0].duration >= history[history.length - 1].duration,
319
+ 'Should be sorted newest first');
320
+ });
321
+
322
+ test('getHistory returns empty array for missing directory', () => {
323
+ const configDir = makeTmpDir();
324
+ const history = getHistory(10, { configDir });
325
+ assert.equal(history.length, 0, 'Should return empty array');
326
+ });
327
+
328
+ test('detectRegression identifies new failures', () => {
329
+ const previous = makeReport();
330
+ const current = makeReport({
331
+ status: 'failed',
332
+ steps: [
333
+ { name: 'Create harness', status: 'pass', timestamp: Date.now() },
334
+ { name: 'Start servers', status: 'fail', timestamp: Date.now() },
335
+ { name: 'Ping both agents', status: 'pass', timestamp: Date.now() }
336
+ ]
337
+ });
338
+
339
+ const result = detectRegression(current, previous);
340
+ assert.equal(result.detected, true);
341
+ assert.ok(result.newFailures.includes('Start servers'));
342
+ assert.equal(result.fixedTests.length, 0);
343
+ });
344
+
345
+ test('detectRegression identifies fixed tests', () => {
346
+ const previous = makeReport({
347
+ steps: [
348
+ { name: 'Create harness', status: 'pass', timestamp: Date.now() },
349
+ { name: 'Start servers', status: 'fail', timestamp: Date.now() }
350
+ ]
351
+ });
352
+ const current = makeReport({
353
+ steps: [
354
+ { name: 'Create harness', status: 'pass', timestamp: Date.now() },
355
+ { name: 'Start servers', status: 'pass', timestamp: Date.now() }
356
+ ]
357
+ });
358
+
359
+ const result = detectRegression(current, previous);
360
+ assert.equal(result.detected, false);
361
+ assert.ok(result.fixedTests.includes('Start servers'));
362
+ });
363
+
364
+ test('pruneHistory keeps only MAX_HISTORY files', () => {
365
+ const configDir = makeTmpDir();
366
+ for (let i = 0; i < 25; i++) {
367
+ saveResult(makeReport({ duration: i }), { configDir });
368
+ }
369
+
370
+ const { resultsDir } = resolveDir(configDir);
371
+ const files = fs.readdirSync(resultsDir)
372
+ .filter(f => f.startsWith('result-') && f.endsWith('.json'));
373
+
374
+ assert.ok(files.length <= 20, `Should have at most 20 files, got ${files.length}`);
375
+ });
376
+
377
+ test('saveResult uses atomic write (tmp+rename)', () => {
378
+ const configDir = makeTmpDir();
379
+ const report = makeReport();
380
+ const result = saveResult(report, { configDir });
381
+
382
+ // No .tmp files should remain after write
383
+ const { resultsDir } = resolveDir(configDir);
384
+ const tmpFiles = fs.readdirSync(resultsDir).filter(f => f.endsWith('.tmp'));
385
+ assert.equal(tmpFiles.length, 0, 'No .tmp files should remain after atomic write');
386
+ });
387
+ };
388
+ ```
389
+
390
+ **Step 2: Run tests**
391
+
392
+ ```bash
393
+ node test/run.js --filter persist
394
+ ```
395
+
396
+ Expected: All persist tests pass.
397
+
398
+ **Step 3: Commit**
399
+
400
+ ```bash
401
+ git add test/unit/persist.test.js
402
+ git commit -m "test(A2A-42): add unit tests for E2E result persistence"
403
+ ```
404
+
405
+ ---
406
+
407
+ ### Task 3: Add `--persist` Flag to Orchestrator
408
+
409
+ **Files:**
410
+ - Modify: `test/e2e/orchestrate.js`
411
+
412
+ **Step 1: Add persist flag handling**
413
+
414
+ After line 18 (`const jsonOutput = args.includes('--json');`), add:
415
+
416
+ ```js
417
+ const persistResults = args.includes('--persist');
418
+ ```
419
+
420
+ Insert after `report.finish();` (line 238) and before the `// Output report` comment (line 240). The insertion point is inside `main()` after the `finally` block closes, at the same brace depth as `report.finish()`. The new code goes between line 238 (`report.finish();`) and line 240 (`// Output report`). Add:
421
+
422
+ ```js
423
+ // A2A-42: Persist results to local storage for regression tracking
424
+ if (persistResults) {
425
+ try {
426
+ const { saveResult } = require('./persist');
427
+ const persisted = saveResult(report.toJSON());
428
+ if (!jsonOutput) {
429
+ process.stderr.write(`Results saved to ${persisted.file}\n`);
430
+ }
431
+ if (persisted.regression.detected) {
432
+ process.stderr.write(`⚠ REGRESSION DETECTED: ${persisted.regression.newFailures.join(', ')}\n`);
433
+ }
434
+ if (persisted.regression.fixedTests.length > 0) {
435
+ process.stderr.write(`✓ Fixed: ${persisted.regression.fixedTests.join(', ')}\n`);
436
+ }
437
+ } catch (err) {
438
+ process.stderr.write(`Warning: Failed to persist results: ${err.message}\n`);
439
+ }
440
+ }
441
+ ```
442
+
443
+ Also update the usage comment at the top to document the new flag:
444
+
445
+ ```
446
+ * Usage:
447
+ * node test/e2e/orchestrate.js # markdown report to stderr
448
+ * node test/e2e/orchestrate.js --json # JSON report to stdout
449
+ * node test/e2e/orchestrate.js --persist # save results to disk
450
+ * node test/e2e/orchestrate.js --json --persist # both
451
+ * node test/e2e/orchestrate.js --verbose # verbose output
452
+ ```
453
+
454
+ **Step 2: Verify it works manually**
455
+
456
+ ```bash
457
+ node test/e2e/orchestrate.js --json --persist 2>/dev/null | head -5
458
+ ls ~/.config/openclaw/test-results/
459
+ ```
460
+
461
+ Expected: JSON output, and `test-results/` directory with `latest.json` and a timestamped file.
462
+
463
+ **Step 3: Run the full test suite**
464
+
465
+ ```bash
466
+ npm test
467
+ ```
468
+
469
+ Expected: 328 passing, 2 failing (same pre-existing failures).
470
+
471
+ **Step 4: Commit**
472
+
473
+ ```bash
474
+ git add test/e2e/orchestrate.js
475
+ git commit -m "feat(A2A-42): add --persist flag to E2E orchestrator"
476
+ ```
477
+
478
+ ---
479
+
480
+ ### Task 4: Create `scripts/run-e2e.sh` — Cron/CI Wrapper
481
+
482
+ **Files:**
483
+ - Create: `scripts/run-e2e.sh`
484
+
485
+ **Step 1: Write the wrapper script**
486
+
487
+ ```bash
488
+ #!/usr/bin/env bash
489
+ # A2A-42: Cron/CI wrapper for E2E test orchestrator.
490
+ # Runs orchestration, persists results, optionally alerts on failure.
491
+ #
492
+ # Usage:
493
+ # scripts/run-e2e.sh # run + persist
494
+ # scripts/run-e2e.sh --alert # run + persist + alert on failure
495
+ #
496
+ # Cron example (every 6 hours):
497
+ # 0 */6 * * * /root/a2acalling/scripts/run-e2e.sh --alert >> /var/log/a2a-e2e.log 2>&1
498
+
499
+ set -euo pipefail
500
+
501
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
502
+ PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
503
+ ALERT_SCRIPT="/root/maestro/scripts/alert.sh"
504
+ ALERT_ON_FAILURE=false
505
+
506
+ for arg in "$@"; do
507
+ case "$arg" in
508
+ --alert) ALERT_ON_FAILURE=true ;;
509
+ esac
510
+ done
511
+
512
+ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] Starting E2E orchestration..."
513
+
514
+ cd "$PROJECT_DIR"
515
+
516
+ # A2A-42: Run orchestrator with JSON output and persistence.
517
+ # stdout (JSON) goes to /dev/null; stderr (regression messages, logs) passes through
518
+ # so cron log captures warnings like "REGRESSION DETECTED: ..."
519
+ if node test/e2e/orchestrate.js --json --persist > /dev/null; then
520
+ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] E2E: PASSED"
521
+ exit 0
522
+ else
523
+ EXIT_CODE=$?
524
+ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] E2E: FAILED (exit $EXIT_CODE)"
525
+
526
+ if [ "$ALERT_ON_FAILURE" = true ] && [ -x "$ALERT_SCRIPT" ]; then
527
+ "$ALERT_SCRIPT" error "E2E test failure detected — check ~/.config/openclaw/test-results/latest.json"
528
+ fi
529
+
530
+ exit "$EXIT_CODE"
531
+ fi
532
+ ```
533
+
534
+ **Step 2: Make it executable**
535
+
536
+ ```bash
537
+ chmod +x scripts/run-e2e.sh
538
+ ```
539
+
540
+ **Step 3: Test it runs**
541
+
542
+ ```bash
543
+ scripts/run-e2e.sh
544
+ echo $?
545
+ ```
546
+
547
+ Expected: exit 0 with "PASSED" message.
548
+
549
+ **Step 4: Commit**
550
+
551
+ ```bash
552
+ git add scripts/run-e2e.sh
553
+ git commit -m "feat(A2A-42): add cron/CI wrapper script for E2E tests"
554
+ ```
555
+
556
+ ---
557
+
558
+ ### Task 5: Dashboard API Endpoint — `GET /dashboard/api/test-results`
559
+
560
+ **Files:**
561
+ - Modify: `src/routes/dashboard.js` (add endpoint inside `createDashboardApiRouter`, after the `/logs/stats` route ~line 884)
562
+
563
+ **Step 1: Add the endpoint**
564
+
565
+ At the top of the `createDashboardApiRouter` function (after `const context = buildContext(options);` on line 449), add the persist require with a try/catch so it degrades gracefully in installed-package environments where test/e2e/ doesn't exist:
566
+
567
+ ```js
568
+ // A2A-42: Load E2E persist layer for Health tab. Gracefully null if not available
569
+ // (e.g., installed as npm package without test files).
570
+ let persistModule = null;
571
+ try {
572
+ persistModule = require(path.join(__dirname, '..', '..', 'test', 'e2e', 'persist'));
573
+ } catch {
574
+ // test/e2e/persist.js not available — Health tab will show "no results"
575
+ }
576
+ ```
577
+
578
+ Then add this route after the `router.get('/logs/stats', ...)` block (around line 884):
579
+
580
+ ```js
581
+ // A2A-42: Serve E2E test results for the Health tab.
582
+ // Reads from local persist layer — no external dependencies.
583
+ router.get('/test-results', (req, res) => {
584
+ if (!persistModule) {
585
+ return res.json({
586
+ success: true,
587
+ latest: null,
588
+ history: [],
589
+ has_results: false,
590
+ message: 'Test results module not available'
591
+ });
592
+ }
593
+
594
+ const latest = persistModule.getLatest();
595
+ const limit = Math.min(20, Math.max(1, Number.parseInt(req.query.limit || '10', 10) || 10));
596
+ const history = persistModule.getHistory(limit);
597
+
598
+ return res.json({
599
+ success: true,
600
+ latest,
601
+ history: history.map(r => ({
602
+ status: r.status,
603
+ duration: r.duration,
604
+ startedAt: r.startedAt,
605
+ finishedAt: r.finishedAt,
606
+ summary: r.summary,
607
+ regression: r.regression || null
608
+ })),
609
+ has_results: latest !== null
610
+ });
611
+ });
612
+ ```
613
+
614
+ **Step 2: Run full test suite**
615
+
616
+ ```bash
617
+ npm test
618
+ ```
619
+
620
+ Expected: 328 passing, 2 failing (same pre-existing).
621
+
622
+ **Step 3: Commit**
623
+
624
+ ```bash
625
+ git add src/routes/dashboard.js
626
+ git commit -m "feat(A2A-42): add dashboard API endpoint for test results"
627
+ ```
628
+
629
+ ---
630
+
631
+ ### Task 6: Dashboard UI — Health Tab
632
+
633
+ **Files:**
634
+ - Modify: `src/dashboard/public/index.html` (add Health tab)
635
+ - Modify: `src/dashboard/public/app.js` (add Health tab rendering and loader)
636
+
637
+ **Step 1: Add tab to index.html**
638
+
639
+ After the Logs tab (`<sl-tab slot="nav" panel="logs">Logs</sl-tab>`) on line 23, add:
640
+
641
+ ```html
642
+ <sl-tab slot="nav" panel="health">Health</sl-tab>
643
+ ```
644
+
645
+ Before the closing `</sl-tab-group>` (line 257), add the Health panel:
646
+
647
+ ```html
648
+ <sl-tab-panel name="health">
649
+ <h2>E2E Health</h2>
650
+ <sl-card id="health-latest">
651
+ <p>No test results available. Run <code>node test/e2e/orchestrate.js --persist</code> to generate results.</p>
652
+ </sl-card>
653
+ <h3>History</h3>
654
+ <table id="health-history-table">
655
+ <thead>
656
+ <tr>
657
+ <th>Status</th>
658
+ <th>Duration</th>
659
+ <th>Passed</th>
660
+ <th>Failed</th>
661
+ <th>Regression</th>
662
+ <th>Time</th>
663
+ </tr>
664
+ </thead>
665
+ <tbody></tbody>
666
+ </table>
667
+ </sl-tab-panel>
668
+ ```
669
+
670
+ **Step 2: Add Health tab JS to app.js**
671
+
672
+ At the end of `app.js`, before the `bootstrap()` call (around line 2138), add:
673
+
674
+ ```js
675
+ // === Health Tab (A2A-42) ===
676
+
677
+ // A2A-42: Escape HTML entities for safe innerHTML rendering of step names/errors.
678
+ function escapeHtml(s) {
679
+ return String(s).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
680
+ }
681
+
682
+ async function loadHealth() {
683
+ try {
684
+ const res = await fetch('./api/test-results');
685
+ const data = await res.json();
686
+ renderHealthLatest(data.latest);
687
+ renderHealthHistory(data.history || []);
688
+ } catch (err) {
689
+ renderHealthLatest(null);
690
+ renderHealthHistory([]);
691
+ }
692
+ }
693
+
694
+ function renderHealthLatest(latest) {
695
+ const card = document.getElementById('health-latest');
696
+ if (!card) return;
697
+
698
+ if (!latest) {
699
+ card.innerHTML = '<p>No test results available. Run <code>node test/e2e/orchestrate.js --persist</code> to generate.</p>';
700
+ return;
701
+ }
702
+
703
+ const statusBadge = latest.status === 'passed'
704
+ ? '<sl-badge variant="success">PASSED</sl-badge>'
705
+ : '<sl-badge variant="danger">FAILED</sl-badge>';
706
+
707
+ const regression = latest.regression;
708
+ let regressionHtml = '';
709
+ if (regression && regression.detected) {
710
+ regressionHtml = `<p><sl-badge variant="warning">REGRESSION</sl-badge> New failures: ${regression.newFailures.join(', ')}</p>`;
711
+ }
712
+ if (regression && regression.fixedTests && regression.fixedTests.length > 0) {
713
+ regressionHtml += `<p><sl-badge variant="success">FIXED</sl-badge> ${regression.fixedTests.join(', ')}</p>`;
714
+ }
715
+
716
+ const ts = latest.finishedAt ? new Date(latest.finishedAt).toLocaleString() : 'unknown';
717
+ const summary = latest.summary || {};
718
+
719
+ card.innerHTML = `
720
+ <div class="row">
721
+ <strong>Latest Run</strong> ${statusBadge}
722
+ </div>
723
+ <p><strong>Duration:</strong> ${latest.duration || 0}ms &middot;
724
+ <strong>Passed:</strong> ${summary.passed || 0} &middot;
725
+ <strong>Failed:</strong> ${summary.failed || 0} &middot;
726
+ <strong>Skipped:</strong> ${summary.skipped || 0} &middot;
727
+ <strong>Time:</strong> ${ts}</p>
728
+ ${regressionHtml}
729
+ <details>
730
+ <summary>Steps (${(latest.steps || []).length})</summary>
731
+ <ul>
732
+ ${(latest.steps || []).map(s => {
733
+ const icon = s.status === 'pass' ? '&#x2705;' : s.status === 'fail' ? '&#x274C;' : '&#x23ED;';
734
+ const err = s.error ? ` — <code>${escapeHtml(String(s.error).slice(0, 120))}</code>` : '';
735
+ return `<li>${icon} ${escapeHtml(s.name)}${err}</li>`;
736
+ }).join('')}
737
+ </ul>
738
+ </details>
739
+ `;
740
+ }
741
+
742
+ function renderHealthHistory(history) {
743
+ const tbody = document.querySelector('#health-history-table tbody');
744
+ if (!tbody) return;
745
+
746
+ if (!history || history.length === 0) {
747
+ tbody.innerHTML = '<tr><td colspan="6">No history</td></tr>';
748
+ return;
749
+ }
750
+
751
+ tbody.innerHTML = history.map(r => {
752
+ const badge = r.status === 'passed'
753
+ ? '<sl-badge variant="success" size="small">PASS</sl-badge>'
754
+ : '<sl-badge variant="danger" size="small">FAIL</sl-badge>';
755
+ const summary = r.summary || {};
756
+ const regression = r.regression;
757
+ const regText = regression && regression.detected
758
+ ? `<sl-badge variant="warning" size="small">${regression.newFailures.length} new</sl-badge>`
759
+ : '-';
760
+ const ts = r.finishedAt ? new Date(r.finishedAt).toLocaleString() : '-';
761
+ return `<tr>
762
+ <td>${badge}</td>
763
+ <td>${r.duration || 0}ms</td>
764
+ <td>${summary.passed || 0}</td>
765
+ <td>${summary.failed || 0}</td>
766
+ <td>${regText}</td>
767
+ <td>${ts}</td>
768
+ </tr>`;
769
+ }).join('');
770
+ }
771
+ ```
772
+
773
+ **Step 3: Register the Health tab in tabLoaders**
774
+
775
+ In `app.js`, find the `tabLoaders` object (line ~2077) and add:
776
+
777
+ ```js
778
+ health: loadHealth,
779
+ ```
780
+
781
+ So it becomes:
782
+
783
+ ```js
784
+ const tabLoaders = {
785
+ contacts: loadContacts,
786
+ calls: loadCalls,
787
+ logs: () => { loadLogs(); loadLogStats(); },
788
+ permissions: () => {},
789
+ invites: loadInvites,
790
+ health: loadHealth,
791
+ };
792
+ ```
793
+
794
+ **Step 4: Run full test suite**
795
+
796
+ ```bash
797
+ npm test
798
+ ```
799
+
800
+ Expected: 328 passing, 2 failing (same pre-existing).
801
+
802
+ **Step 5: Commit**
803
+
804
+ ```bash
805
+ git add src/dashboard/public/index.html src/dashboard/public/app.js
806
+ git commit -m "feat(A2A-42): add Health tab to dashboard with E2E results display"
807
+ ```
808
+
809
+ ---
810
+
811
+ ### Task 7: Final Verification & Squash
812
+
813
+ **Step 1: Run E2E orchestrator with persist to seed data**
814
+
815
+ ```bash
816
+ node test/e2e/orchestrate.js --json --persist > /dev/null 2>&1
817
+ cat ~/.config/openclaw/test-results/latest.json | head -20
818
+ ```
819
+
820
+ Expected: JSON with `status`, `summary`, `regression` fields.
821
+
822
+ **Step 2: Run full test suite (all tiers)**
823
+
824
+ ```bash
825
+ npm test
826
+ ```
827
+
828
+ Expected: 328 passing, 2 failing (same pre-existing install-skills failures).
829
+
830
+ **Step 3: Run E2E tier specifically**
831
+
832
+ ```bash
833
+ node test/run.js --e2e
834
+ ```
835
+
836
+ Expected: 51+ passing E2E tests.
837
+
838
+ **Step 4: Verify diff size**
839
+
840
+ ```bash
841
+ git diff --stat origin/main
842
+ ```
843
+
844
+ Expected: Under 500 lines changed, under 15 files.
845
+
846
+ **Step 5: Push and open PR**
847
+
848
+ ```bash
849
+ git push origin feature/a2a-42
850
+ ```
851
+
852
+ ---
853
+
854
+ ## File Summary
855
+
856
+ | Action | File | Description |
857
+ |--------|------|-------------|
858
+ | Create | `test/e2e/persist.js` | Result persistence: save, read, history, regression detect |
859
+ | Create | `test/unit/persist.test.js` | Unit tests for persist module |
860
+ | Create | `scripts/run-e2e.sh` | Cron/CI wrapper script |
861
+ | Modify | `test/e2e/orchestrate.js` | Add `--persist` flag |
862
+ | Modify | `src/routes/dashboard.js` | Add `GET /dashboard/api/test-results` endpoint |
863
+ | Modify | `src/dashboard/public/index.html` | Add Health tab |
864
+ | Modify | `src/dashboard/public/app.js` | Add Health tab rendering + register in tabLoaders |
865
+
866
+ ## Acceptance Criteria Traceability
867
+
868
+ | Criterion | Task |
869
+ |-----------|------|
870
+ | `--json --persist` writes results | Task 1, 3 |
871
+ | `latest.json` reflects most recent run | Task 1 |
872
+ | History with timestamped results (last 20) | Task 1 |
873
+ | Regression detection | Task 1 |
874
+ | `scripts/run-e2e.sh` cron-ready | Task 4 |
875
+ | Dashboard shows latest + trend | Task 5, 6 |
876
+ | Existing 328 tests pass | Task 7 |
877
+ | E2E 51 tests pass | Task 7 |
878
+ | No new npm dependencies | All tasks |
879
+ | Local reporting without Telegram/OpenClaw | Task 1, 4 |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "a2acalling",
3
- "version": "0.6.59",
3
+ "version": "0.6.61",
4
4
  "description": "Agent-to-agent calling for OpenClaw - A2A agent communication",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -37,12 +37,19 @@ const SKILL_FILES = [
37
37
  // Copied to .claude/ so Claude Code discovers it naturally without grepping
38
38
  // node_modules. This is opt-in context: only loaded when the agent looks.
39
39
  { src: 'SKILL.md', dest: '.claude/a2a-skill-reference.md' },
40
- // Claude Code slash commands
40
+ // Claude Code slash commands — core (A2A-28)
41
41
  { src: '.claude/commands/a2a-call.md', dest: '.claude/commands/a2a-call.md' },
42
42
  { src: '.claude/commands/a2a-invite.md', dest: '.claude/commands/a2a-invite.md' },
43
43
  { src: '.claude/commands/a2a-contacts.md', dest: '.claude/commands/a2a-contacts.md' },
44
44
  { src: '.claude/commands/a2a-status.md', dest: '.claude/commands/a2a-status.md' },
45
45
  { src: '.claude/commands/a2a-setup.md', dest: '.claude/commands/a2a-setup.md' },
46
+ // Claude Code slash commands — extended (A2A-43)
47
+ { src: '.claude/commands/a2a-update.md', dest: '.claude/commands/a2a-update.md' },
48
+ { src: '.claude/commands/a2a-uninstall.md', dest: '.claude/commands/a2a-uninstall.md' },
49
+ { src: '.claude/commands/a2a-app.md', dest: '.claude/commands/a2a-app.md' },
50
+ { src: '.claude/commands/a2a-conversations.md', dest: '.claude/commands/a2a-conversations.md' },
51
+ { src: '.claude/commands/a2a-gui.md', dest: '.claude/commands/a2a-gui.md' },
52
+ { src: '.claude/commands/a2a-skills.md', dest: '.claude/commands/a2a-skills.md' },
46
53
  // Codex agent instructions
47
54
  { src: '.codex/AGENTS.md', dest: '.codex/AGENTS.md' }
48
55
  ];
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env bash
2
+ # A2A-42: Cron/CI wrapper for E2E test orchestrator.
3
+ # Runs orchestration, persists results, optionally alerts on failure.
4
+ #
5
+ # Usage:
6
+ # scripts/run-e2e.sh # run + persist
7
+ # scripts/run-e2e.sh --alert # run + persist + alert on failure
8
+ #
9
+ # Cron example (every 6 hours):
10
+ # 0 */6 * * * /root/a2acalling/scripts/run-e2e.sh --alert >> /var/log/a2a-e2e.log 2>&1
11
+
12
+ set -euo pipefail
13
+
14
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
15
+ PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
16
+ ALERT_SCRIPT="/root/maestro/scripts/alert.sh"
17
+ ALERT_ON_FAILURE=false
18
+
19
+ for arg in "$@"; do
20
+ case "$arg" in
21
+ --alert) ALERT_ON_FAILURE=true ;;
22
+ esac
23
+ done
24
+
25
+ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] Starting E2E orchestration..."
26
+
27
+ cd "$PROJECT_DIR"
28
+
29
+ # A2A-42: Run orchestrator with JSON output and persistence.
30
+ # stdout (JSON) goes to /dev/null; stderr (regression messages, logs) passes through
31
+ # so cron log captures warnings like "REGRESSION DETECTED: ..."
32
+ if node test/e2e/orchestrate.js --json --persist > /dev/null; then
33
+ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] E2E: PASSED"
34
+ exit 0
35
+ else
36
+ EXIT_CODE=$?
37
+ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] E2E: FAILED (exit $EXIT_CODE)"
38
+
39
+ if [ "$ALERT_ON_FAILURE" = true ] && [ -x "$ALERT_SCRIPT" ]; then
40
+ "$ALERT_SCRIPT" error "E2E test failure detected — check ~/.config/openclaw/test-results/latest.json"
41
+ fi
42
+
43
+ exit "$EXIT_CODE"
44
+ fi
@@ -2080,6 +2080,7 @@ const tabLoaders = {
2080
2080
  logs: () => { loadLogs(); loadLogStats(); },
2081
2081
  permissions: () => {},
2082
2082
  invites: loadInvites,
2083
+ health: loadHealth,
2083
2084
  };
2084
2085
 
2085
2086
  function startPolling() {
@@ -2102,6 +2103,102 @@ function onTabSwitch(tabName) {
2102
2103
  startPolling(); // reset the 5s timer
2103
2104
  }
2104
2105
 
2106
+ // === Health Tab (A2A-42) ===
2107
+
2108
+ // A2A-42: Escape HTML entities for safe innerHTML rendering of step names/errors.
2109
+ function escapeHtml(s) {
2110
+ return String(s).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
2111
+ }
2112
+
2113
+ async function loadHealth() {
2114
+ try {
2115
+ const data = await request('/test-results');
2116
+ renderHealthLatest(data.latest);
2117
+ renderHealthHistory(data.history || []);
2118
+ } catch (err) {
2119
+ renderHealthLatest(null);
2120
+ renderHealthHistory([]);
2121
+ }
2122
+ }
2123
+
2124
+ function renderHealthLatest(latest) {
2125
+ const card = document.getElementById('health-latest');
2126
+ if (!card) return;
2127
+
2128
+ if (!latest) {
2129
+ card.innerHTML = '<p>No test results available. Run <code>node test/e2e/orchestrate.js --persist</code> to generate.</p>';
2130
+ return;
2131
+ }
2132
+
2133
+ const statusBadge = latest.status === 'passed'
2134
+ ? '<sl-badge variant="success">PASSED</sl-badge>'
2135
+ : '<sl-badge variant="danger">FAILED</sl-badge>';
2136
+
2137
+ const regression = latest.regression;
2138
+ let regressionHtml = '';
2139
+ if (regression && regression.detected) {
2140
+ regressionHtml = `<p><sl-badge variant="warning">REGRESSION</sl-badge> New failures: ${regression.newFailures.map(escapeHtml).join(', ')}</p>`;
2141
+ }
2142
+ if (regression && regression.fixedTests && regression.fixedTests.length > 0) {
2143
+ regressionHtml += `<p><sl-badge variant="success">FIXED</sl-badge> ${regression.fixedTests.map(escapeHtml).join(', ')}</p>`;
2144
+ }
2145
+
2146
+ const ts = latest.finishedAt ? new Date(latest.finishedAt).toLocaleString() : 'unknown';
2147
+ const summary = latest.summary || {};
2148
+
2149
+ card.innerHTML = `
2150
+ <div class="row">
2151
+ <strong>Latest Run</strong> ${statusBadge}
2152
+ </div>
2153
+ <p><strong>Duration:</strong> ${latest.duration || 0}ms &middot;
2154
+ <strong>Passed:</strong> ${summary.passed || 0} &middot;
2155
+ <strong>Failed:</strong> ${summary.failed || 0} &middot;
2156
+ <strong>Skipped:</strong> ${summary.skipped || 0} &middot;
2157
+ <strong>Time:</strong> ${ts}</p>
2158
+ ${regressionHtml}
2159
+ <details>
2160
+ <summary>Steps (${(latest.steps || []).length})</summary>
2161
+ <ul>
2162
+ ${(latest.steps || []).map(s => {
2163
+ const icon = s.status === 'pass' ? '&#x2705;' : s.status === 'fail' ? '&#x274C;' : '&#x23ED;';
2164
+ const err = s.error ? ` — <code>${escapeHtml(String(s.error).slice(0, 120))}</code>` : '';
2165
+ return `<li>${icon} ${escapeHtml(s.name)}${err}</li>`;
2166
+ }).join('')}
2167
+ </ul>
2168
+ </details>
2169
+ `;
2170
+ }
2171
+
2172
+ function renderHealthHistory(history) {
2173
+ const tbody = document.querySelector('#health-history-table tbody');
2174
+ if (!tbody) return;
2175
+
2176
+ if (!history || history.length === 0) {
2177
+ tbody.innerHTML = '<tr><td colspan="6">No history</td></tr>';
2178
+ return;
2179
+ }
2180
+
2181
+ tbody.innerHTML = history.map(r => {
2182
+ const badge = r.status === 'passed'
2183
+ ? '<sl-badge variant="success" size="small">PASS</sl-badge>'
2184
+ : '<sl-badge variant="danger" size="small">FAIL</sl-badge>';
2185
+ const summary = r.summary || {};
2186
+ const regression = r.regression;
2187
+ const regText = regression && regression.detected
2188
+ ? `<sl-badge variant="warning" size="small">${regression.newFailures.length} new</sl-badge>`
2189
+ : '-';
2190
+ const ts = r.finishedAt ? new Date(r.finishedAt).toLocaleString() : '-';
2191
+ return `<tr>
2192
+ <td>${badge}</td>
2193
+ <td>${r.duration || 0}ms</td>
2194
+ <td>${summary.passed || 0}</td>
2195
+ <td>${summary.failed || 0}</td>
2196
+ <td>${regText}</td>
2197
+ <td>${ts}</td>
2198
+ </tr>`;
2199
+ }).join('');
2200
+ }
2201
+
2105
2202
  async function bootstrap() {
2106
2203
  bindTabs();
2107
2204
  bindContactsActions();
@@ -21,6 +21,7 @@
21
21
  <sl-tab slot="nav" panel="permissions">Permissions</sl-tab>
22
22
  <sl-tab slot="nav" panel="invites">Invites</sl-tab>
23
23
  <sl-tab slot="nav" panel="logs">Logs</sl-tab>
24
+ <sl-tab slot="nav" panel="health">Health</sl-tab>
24
25
 
25
26
  <sl-tab-panel name="contacts">
26
27
  <h2>Contacts</h2>
@@ -254,6 +255,27 @@
254
255
 
255
256
  <sl-card id="trace-detail"></sl-card>
256
257
  </sl-tab-panel>
258
+
259
+ <sl-tab-panel name="health">
260
+ <h2>E2E Health</h2>
261
+ <sl-card id="health-latest">
262
+ <p>No test results available. Run <code>node test/e2e/orchestrate.js --persist</code> to generate results.</p>
263
+ </sl-card>
264
+ <h3>History</h3>
265
+ <table id="health-history-table">
266
+ <thead>
267
+ <tr>
268
+ <th>Status</th>
269
+ <th>Duration</th>
270
+ <th>Passed</th>
271
+ <th>Failed</th>
272
+ <th>Regression</th>
273
+ <th>Time</th>
274
+ </tr>
275
+ </thead>
276
+ <tbody></tbody>
277
+ </table>
278
+ </sl-tab-panel>
257
279
  </sl-tab-group>
258
280
 
259
281
  <div id="notice"></div>
@@ -448,6 +448,16 @@ function createDashboardApiRouter(options = {}) {
448
448
  const router = express.Router();
449
449
  const context = buildContext(options);
450
450
  router.use(express.json());
451
+
452
+ // A2A-42: Load E2E persist layer for Health tab. Gracefully null if not available
453
+ // (e.g., installed as npm package without test files).
454
+ let persistModule = null;
455
+ try {
456
+ persistModule = require(path.join(__dirname, '..', '..', 'test', 'e2e', 'persist'));
457
+ } catch {
458
+ // test/e2e/persist.js not available — Health tab will show "no results"
459
+ }
460
+
451
461
  const ensureDashboardAccess = makeEnsureDashboardAccess(context);
452
462
  const writeSseEvent = (res, event) => {
453
463
  const eventName = sanitizeString(event?.type || 'message', 80) || 'message';
@@ -883,6 +893,38 @@ function createDashboardApiRouter(options = {}) {
883
893
  return res.json({ success: true, stats });
884
894
  });
885
895
 
896
+ // A2A-42: Serve E2E test results for the Health tab.
897
+ // Reads from local persist layer — no external dependencies.
898
+ router.get('/test-results', (req, res) => {
899
+ if (!persistModule) {
900
+ return res.json({
901
+ success: true,
902
+ latest: null,
903
+ history: [],
904
+ has_results: false,
905
+ message: 'Test results module not available'
906
+ });
907
+ }
908
+
909
+ const latest = persistModule.getLatest();
910
+ const limit = Math.min(20, Math.max(1, Number.parseInt(req.query.limit || '10', 10) || 10));
911
+ const history = persistModule.getHistory(limit);
912
+
913
+ return res.json({
914
+ success: true,
915
+ latest,
916
+ history: history.map(r => ({
917
+ status: r.status,
918
+ duration: r.duration,
919
+ startedAt: r.startedAt,
920
+ finishedAt: r.finishedAt,
921
+ summary: r.summary,
922
+ regression: r.regression || null
923
+ })),
924
+ has_results: latest !== null
925
+ });
926
+ });
927
+
886
928
  router.get('/debug/call', (req, res) => {
887
929
  const traceId = sanitizeString(req.query.trace_id || req.query.traceId || '', 120);
888
930
  const conversationId = sanitizeString(req.query.conversation_id || req.query.conversationId || '', 120);