a2acalling 0.6.48 → 0.6.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +23 -0
- package/docs/plans/2026-02-16-auto-updater.md +1284 -0
- package/docs/plans/2026-02-16-e2e-test-prompt-sequence.md +3085 -0
- package/docs/plans/2026-02-17-claude-code-codex-skills.md +770 -0
- package/docs/prompts/e2e-test-agent.md +368 -0
- package/docs/protocol.md +79 -0
- package/package.json +1 -1
- package/src/dashboard/public/app.js +108 -1
- package/src/dashboard/public/index.html +9 -0
- package/src/dashboard/public/style.css +27 -0
- package/src/lib/config.js +41 -0
- package/src/lib/conversation-driver.js +62 -21
- package/src/lib/openclaw-integration.js +22 -66
- package/src/lib/summary-formatter.js +168 -0
- package/src/lib/summary-prompt.js +203 -0
- package/src/lib/update-checker.js +93 -0
- package/src/lib/update-manager.js +313 -0
- package/src/routes/a2a.js +8 -1
- package/src/routes/dashboard.js +103 -1
- package/src/server.js +115 -26
|
@@ -0,0 +1,3085 @@
|
|
|
1
|
+
# E2E Test & Prompt Sequence for A2A Install/Onboarding/Invite Flow
|
|
2
|
+
|
|
3
|
+
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
|
4
|
+
|
|
5
|
+
**Goal:** Build an AI-agent-driven E2E testing system where an orchestrator spawns a subagent that installs a2acalling from npm, runs onboarding, exercises the invite flow between two isolated servers, and reports results (including auto-filing bugs in Linear).
|
|
6
|
+
|
|
7
|
+
**Architecture:** The test system uses isolated temp directories (extending the existing `tmpConfigDir` pattern) to spin up two independent a2a servers on ephemeral ports. A CLI runner wraps all `a2a` commands with structured output parsing. The orchestrator script coordinates the full sequence: environment setup → install verification → onboarding → token creation → invite exchange → cross-server call → report generation. A prompt document gives a Claude subagent the exact steps and expected outcomes.
|
|
8
|
+
|
|
9
|
+
**Tech Stack:** Node.js, Express (ephemeral ports), child_process (CLI invocation), existing zero-dependency test runner, Linear API (for bug filing via MCP or REST)
|
|
10
|
+
|
|
11
|
+
**Linear ticket:** A2A-21
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Phase 1: E2E Environment & CLI Runner
|
|
16
|
+
|
|
17
|
+
### Task 1: Create E2E environment isolation utility
|
|
18
|
+
|
|
19
|
+
**Files:**
|
|
20
|
+
- Create: `test/e2e/env.js`
|
|
21
|
+
- Test: `test/e2e/env.test.js`
|
|
22
|
+
|
|
23
|
+
**Step 1: Write the failing test**
|
|
24
|
+
|
|
25
|
+
```javascript
|
|
26
|
+
// test/e2e/env.test.js
|
|
27
|
+
module.exports = function (test, assert, helpers) {
|
|
28
|
+
test('createE2EEnv returns isolated dir with cleanup', () => {
|
|
29
|
+
const { createE2EEnv } = require('./env');
|
|
30
|
+
const env = createE2EEnv('test-basic');
|
|
31
|
+
|
|
32
|
+
assert.ok(env.dir, 'Should have a directory');
|
|
33
|
+
assert.ok(env.configDir, 'Should have a config directory');
|
|
34
|
+
assert.ok(env.env.A2A_CONFIG_DIR, 'Should set A2A_CONFIG_DIR');
|
|
35
|
+
|
|
36
|
+
const fs = require('fs');
|
|
37
|
+
assert.ok(fs.existsSync(env.dir), 'Directory should exist');
|
|
38
|
+
assert.ok(fs.existsSync(env.configDir), 'Config dir should exist');
|
|
39
|
+
|
|
40
|
+
env.cleanup();
|
|
41
|
+
assert.equal(fs.existsSync(env.dir), false, 'Should clean up');
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
test('createE2EEnv provides isolated process env', () => {
|
|
45
|
+
const { createE2EEnv } = require('./env');
|
|
46
|
+
const envA = createE2EEnv('env-a');
|
|
47
|
+
const envB = createE2EEnv('env-b');
|
|
48
|
+
|
|
49
|
+
assert.ok(envA.configDir !== envB.configDir, 'Should be different dirs');
|
|
50
|
+
|
|
51
|
+
envA.cleanup();
|
|
52
|
+
envB.cleanup();
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
test('createE2EEnv finds available port', async () => {
|
|
56
|
+
const { createE2EEnv } = require('./env');
|
|
57
|
+
const env = createE2EEnv('port-test');
|
|
58
|
+
|
|
59
|
+
const port = await env.findAvailablePort();
|
|
60
|
+
assert.ok(port >= 3001 && port <= 65535, 'Should return valid port');
|
|
61
|
+
|
|
62
|
+
env.cleanup();
|
|
63
|
+
});
|
|
64
|
+
};
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
**Step 2: Run test to verify it fails**
|
|
68
|
+
|
|
69
|
+
Run: `node test/run.js --filter "createE2EEnv"`
|
|
70
|
+
Expected: FAIL — module not found
|
|
71
|
+
|
|
72
|
+
**Step 3: Write minimal implementation**
|
|
73
|
+
|
|
74
|
+
```javascript
|
|
75
|
+
// test/e2e/env.js
|
|
76
|
+
const fs = require('fs');
|
|
77
|
+
const path = require('path');
|
|
78
|
+
const os = require('os');
|
|
79
|
+
const net = require('net');
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Create a fully isolated E2E test environment.
|
|
83
|
+
*
|
|
84
|
+
* Returns { dir, configDir, env, findAvailablePort, cleanup }.
|
|
85
|
+
*
|
|
86
|
+
* - dir: root temp directory for this test run
|
|
87
|
+
* - configDir: path that A2A_CONFIG_DIR points to
|
|
88
|
+
* - env: process.env clone with A2A_CONFIG_DIR set
|
|
89
|
+
* - findAvailablePort(): resolves to an unused port
|
|
90
|
+
* - cleanup(): removes all temp files
|
|
91
|
+
*/
|
|
92
|
+
function createE2EEnv(prefix = 'a2a-e2e') {
|
|
93
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), `${prefix}-`));
|
|
94
|
+
const configDir = path.join(dir, 'config');
|
|
95
|
+
fs.mkdirSync(configDir, { recursive: true });
|
|
96
|
+
|
|
97
|
+
const env = {
|
|
98
|
+
...process.env,
|
|
99
|
+
A2A_CONFIG_DIR: configDir,
|
|
100
|
+
// Prevent postinstall from running quickstart
|
|
101
|
+
CI: 'true'
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
function findAvailablePort(startPort = 3001) {
|
|
105
|
+
return new Promise((resolve, reject) => {
|
|
106
|
+
const server = net.createServer();
|
|
107
|
+
server.listen(0, '127.0.0.1', () => {
|
|
108
|
+
const port = server.address().port;
|
|
109
|
+
server.close(() => resolve(port));
|
|
110
|
+
});
|
|
111
|
+
server.on('error', reject);
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function cleanup() {
|
|
116
|
+
try {
|
|
117
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
118
|
+
} catch (e) { /* best-effort */ }
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return { dir, configDir, env, findAvailablePort, cleanup };
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
module.exports = { createE2EEnv };
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
**Step 4: Run test to verify it passes**
|
|
128
|
+
|
|
129
|
+
Run: `node test/run.js --filter "createE2EEnv"`
|
|
130
|
+
Expected: PASS (all 3 tests)
|
|
131
|
+
|
|
132
|
+
**Step 5: Commit**
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
git add test/e2e/env.js test/e2e/env.test.js
|
|
136
|
+
git commit -m "feat(e2e): add isolated environment utility"
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
### Task 2: Create CLI runner utility
|
|
142
|
+
|
|
143
|
+
**Files:**
|
|
144
|
+
- Create: `test/e2e/cli-runner.js`
|
|
145
|
+
- Test: `test/e2e/cli-runner.test.js`
|
|
146
|
+
|
|
147
|
+
**Step 1: Write the failing test**
|
|
148
|
+
|
|
149
|
+
```javascript
|
|
150
|
+
// test/e2e/cli-runner.test.js
|
|
151
|
+
module.exports = function (test, assert, helpers) {
|
|
152
|
+
const { createE2EEnv } = require('./env');
|
|
153
|
+
|
|
154
|
+
test('CLIRunner.run executes a2a command and returns output', async () => {
|
|
155
|
+
const env = createE2EEnv('cli-run');
|
|
156
|
+
const { CLIRunner } = require('./cli-runner');
|
|
157
|
+
const runner = new CLIRunner(env);
|
|
158
|
+
|
|
159
|
+
// 'a2a help' should work without onboarding
|
|
160
|
+
const result = await runner.run('help');
|
|
161
|
+
assert.equal(result.exitCode, 0, 'Should exit 0');
|
|
162
|
+
assert.ok(result.stdout.length > 0, 'Should have stdout');
|
|
163
|
+
|
|
164
|
+
env.cleanup();
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
test('CLIRunner.run captures non-zero exit codes', async () => {
|
|
168
|
+
const env = createE2EEnv('cli-fail');
|
|
169
|
+
const { CLIRunner } = require('./cli-runner');
|
|
170
|
+
const runner = new CLIRunner(env);
|
|
171
|
+
|
|
172
|
+
// 'a2a call' without onboarding should fail
|
|
173
|
+
const result = await runner.run('call', ['nobody', 'hello']);
|
|
174
|
+
assert.ok(result.exitCode !== 0, 'Should exit non-zero');
|
|
175
|
+
|
|
176
|
+
env.cleanup();
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
test('CLIRunner.run respects timeout', async () => {
|
|
180
|
+
const env = createE2EEnv('cli-timeout');
|
|
181
|
+
const { CLIRunner } = require('./cli-runner');
|
|
182
|
+
const runner = new CLIRunner(env, { timeout: 500 });
|
|
183
|
+
|
|
184
|
+
// Running a command that hangs should time out
|
|
185
|
+
const result = await runner.run('server', ['99999'], { timeout: 500 });
|
|
186
|
+
assert.ok(result.timedOut || result.exitCode !== 0, 'Should timeout or fail');
|
|
187
|
+
|
|
188
|
+
env.cleanup();
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
test('CLIRunner.onboard completes full onboarding via --submit', async () => {
|
|
192
|
+
const env = createE2EEnv('cli-onboard');
|
|
193
|
+
const { CLIRunner } = require('./cli-runner');
|
|
194
|
+
const runner = new CLIRunner(env);
|
|
195
|
+
|
|
196
|
+
const fs = require('fs');
|
|
197
|
+
const path = require('path');
|
|
198
|
+
|
|
199
|
+
// Pre-set config to awaiting_disclosure (skip port detection step)
|
|
200
|
+
const configPath = path.join(env.configDir, 'a2a-config.json');
|
|
201
|
+
fs.writeFileSync(configPath, JSON.stringify({
|
|
202
|
+
onboarding: { version: 2, step: 'awaiting_disclosure' },
|
|
203
|
+
agent: { hostname: 'localhost:3001', name: 'e2e-test-agent' },
|
|
204
|
+
tiers: {}
|
|
205
|
+
}));
|
|
206
|
+
|
|
207
|
+
const result = await runner.onboard({
|
|
208
|
+
personalityNotes: 'E2E test agent — direct and minimal',
|
|
209
|
+
topics: [{ topic: 'Testing', description: 'Automated E2E tests' }]
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
assert.ok(result.success, 'Onboarding should succeed');
|
|
213
|
+
assert.ok(result.stdout.includes('Onboarding complete'), 'Should say complete');
|
|
214
|
+
|
|
215
|
+
env.cleanup();
|
|
216
|
+
});
|
|
217
|
+
};
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
**Step 2: Run test to verify it fails**
|
|
221
|
+
|
|
222
|
+
Run: `node test/run.js --filter "CLIRunner"`
|
|
223
|
+
Expected: FAIL — module not found
|
|
224
|
+
|
|
225
|
+
**Step 3: Write minimal implementation**
|
|
226
|
+
|
|
227
|
+
```javascript
|
|
228
|
+
// test/e2e/cli-runner.js
|
|
229
|
+
const { execFile } = require('child_process');
|
|
230
|
+
const path = require('path');
|
|
231
|
+
|
|
232
|
+
const CLI_PATH = path.join(__dirname, '..', '..', 'bin', 'cli.js');
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Wraps the a2a CLI for structured E2E testing.
|
|
236
|
+
*
|
|
237
|
+
* Each method runs the CLI as a child process in the
|
|
238
|
+
* given E2E environment, returning { stdout, stderr, exitCode, timedOut }.
|
|
239
|
+
*/
|
|
240
|
+
class CLIRunner {
|
|
241
|
+
constructor(e2eEnv, options = {}) {
|
|
242
|
+
this.env = e2eEnv;
|
|
243
|
+
this.defaultTimeout = options.timeout || 30000;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Run an a2a CLI command.
|
|
248
|
+
* @param {string} command - The a2a subcommand (e.g., 'list', 'create')
|
|
249
|
+
* @param {string[]} args - Additional arguments
|
|
250
|
+
* @param {object} options - { timeout }
|
|
251
|
+
* @returns {Promise<{stdout, stderr, exitCode, timedOut}>}
|
|
252
|
+
*/
|
|
253
|
+
run(command, args = [], options = {}) {
|
|
254
|
+
const timeout = options.timeout || this.defaultTimeout;
|
|
255
|
+
const fullArgs = [CLI_PATH, command, ...args];
|
|
256
|
+
|
|
257
|
+
return new Promise((resolve) => {
|
|
258
|
+
const child = execFile(process.execPath, fullArgs, {
|
|
259
|
+
env: this.env.env,
|
|
260
|
+
encoding: 'utf8',
|
|
261
|
+
timeout,
|
|
262
|
+
maxBuffer: 1024 * 1024
|
|
263
|
+
}, (error, stdout, stderr) => {
|
|
264
|
+
resolve({
|
|
265
|
+
stdout: stdout || '',
|
|
266
|
+
stderr: stderr || '',
|
|
267
|
+
exitCode: error ? (error.code || 1) : 0,
|
|
268
|
+
timedOut: error && error.killed
|
|
269
|
+
});
|
|
270
|
+
});
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Complete onboarding programmatically via `onboard --submit`.
|
|
276
|
+
*
|
|
277
|
+
* @param {object} disclosure - { personalityNotes, topics, objectives, neverDisclose }
|
|
278
|
+
* @returns {Promise<{success, stdout, stderr}>}
|
|
279
|
+
*/
|
|
280
|
+
async onboard(disclosure = {}) {
|
|
281
|
+
const submission = {
|
|
282
|
+
tiers: {
|
|
283
|
+
public: {
|
|
284
|
+
topics: disclosure.topics || [{ topic: 'General', description: 'Open discussion' }],
|
|
285
|
+
objectives: disclosure.objectives || [],
|
|
286
|
+
do_not_discuss: disclosure.doNotDiscuss || []
|
|
287
|
+
},
|
|
288
|
+
friends: { topics: [], objectives: [], do_not_discuss: [] },
|
|
289
|
+
family: { topics: [], objectives: [], do_not_discuss: [] }
|
|
290
|
+
},
|
|
291
|
+
never_disclose: disclosure.neverDisclose || [],
|
|
292
|
+
personality_notes: disclosure.personalityNotes || 'E2E test agent'
|
|
293
|
+
};
|
|
294
|
+
|
|
295
|
+
const result = await this.run('onboard', ['--submit', JSON.stringify(submission)]);
|
|
296
|
+
return {
|
|
297
|
+
success: result.exitCode === 0 && result.stdout.includes('Onboarding complete'),
|
|
298
|
+
stdout: result.stdout,
|
|
299
|
+
stderr: result.stderr,
|
|
300
|
+
exitCode: result.exitCode
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Create a token and return the parsed output.
|
|
306
|
+
* @param {object} options - { name, tier, expires, maxCalls, topics }
|
|
307
|
+
* @returns {Promise<{success, token, inviteUrl, stdout}>}
|
|
308
|
+
*/
|
|
309
|
+
async createToken(options = {}) {
|
|
310
|
+
const args = [];
|
|
311
|
+
if (options.name) args.push('--name', options.name);
|
|
312
|
+
if (options.tier) args.push('--tier', options.tier);
|
|
313
|
+
if (options.expires) args.push('--expires', options.expires);
|
|
314
|
+
if (options.maxCalls) args.push('--max-calls', String(options.maxCalls));
|
|
315
|
+
if (options.topics) args.push('--topics', options.topics);
|
|
316
|
+
|
|
317
|
+
const result = await this.run('create', args);
|
|
318
|
+
|
|
319
|
+
// Parse invite URL from output (format: a2a://host/token)
|
|
320
|
+
const urlMatch = result.stdout.match(/a2a:\/\/[^\s]+/);
|
|
321
|
+
const tokenMatch = result.stdout.match(/fed_[A-Za-z0-9_-]+/);
|
|
322
|
+
|
|
323
|
+
return {
|
|
324
|
+
success: result.exitCode === 0,
|
|
325
|
+
inviteUrl: urlMatch ? urlMatch[0] : null,
|
|
326
|
+
token: tokenMatch ? tokenMatch[0] : null,
|
|
327
|
+
stdout: result.stdout,
|
|
328
|
+
stderr: result.stderr
|
|
329
|
+
};
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* Add a contact from an invite URL.
|
|
334
|
+
* @param {string} inviteUrl - a2a://host/token URL
|
|
335
|
+
* @param {string} name - Contact name
|
|
336
|
+
* @returns {Promise<{success, stdout, stderr}>}
|
|
337
|
+
*/
|
|
338
|
+
async addContact(inviteUrl, name) {
|
|
339
|
+
const result = await this.run('add', [inviteUrl, name]);
|
|
340
|
+
return {
|
|
341
|
+
success: result.exitCode === 0,
|
|
342
|
+
stdout: result.stdout,
|
|
343
|
+
stderr: result.stderr
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* List tokens.
|
|
349
|
+
* @returns {Promise<{success, stdout}>}
|
|
350
|
+
*/
|
|
351
|
+
async listTokens() {
|
|
352
|
+
const result = await this.run('list');
|
|
353
|
+
return {
|
|
354
|
+
success: result.exitCode === 0,
|
|
355
|
+
stdout: result.stdout,
|
|
356
|
+
stderr: result.stderr
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* List contacts.
|
|
362
|
+
* @returns {Promise<{success, stdout}>}
|
|
363
|
+
*/
|
|
364
|
+
async listContacts() {
|
|
365
|
+
const result = await this.run('contacts');
|
|
366
|
+
return {
|
|
367
|
+
success: result.exitCode === 0,
|
|
368
|
+
stdout: result.stdout,
|
|
369
|
+
stderr: result.stderr
|
|
370
|
+
};
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Ping a remote agent.
|
|
375
|
+
* @param {string} target - URL or contact name
|
|
376
|
+
* @returns {Promise<{success, stdout}>}
|
|
377
|
+
*/
|
|
378
|
+
async ping(target) {
|
|
379
|
+
const result = await this.run('ping', [target]);
|
|
380
|
+
return {
|
|
381
|
+
success: result.exitCode === 0,
|
|
382
|
+
stdout: result.stdout,
|
|
383
|
+
stderr: result.stderr
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
module.exports = { CLIRunner };
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
**Step 4: Run test to verify it passes**
|
|
392
|
+
|
|
393
|
+
Run: `node test/run.js --filter "CLIRunner"`
|
|
394
|
+
Expected: PASS (all 4 tests)
|
|
395
|
+
|
|
396
|
+
**Step 5: Commit**
|
|
397
|
+
|
|
398
|
+
```bash
|
|
399
|
+
git add test/e2e/cli-runner.js test/e2e/cli-runner.test.js
|
|
400
|
+
git commit -m "feat(e2e): add CLI runner utility for structured command execution"
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
---
|
|
404
|
+
|
|
405
|
+
## Phase 2: Two-Server E2E Test
|
|
406
|
+
|
|
407
|
+
### Task 3: Create two-server test harness
|
|
408
|
+
|
|
409
|
+
**Files:**
|
|
410
|
+
- Create: `test/e2e/two-server.js`
|
|
411
|
+
- Test: `test/e2e/two-server.test.js`
|
|
412
|
+
|
|
413
|
+
**Step 1: Write the failing test**
|
|
414
|
+
|
|
415
|
+
```javascript
|
|
416
|
+
// test/e2e/two-server.test.js
|
|
417
|
+
module.exports = function (test, assert, helpers) {
|
|
418
|
+
const { TwoServerHarness } = require('./two-server');
|
|
419
|
+
|
|
420
|
+
test('TwoServerHarness starts two isolated servers', async () => {
|
|
421
|
+
const harness = new TwoServerHarness();
|
|
422
|
+
await harness.setup();
|
|
423
|
+
|
|
424
|
+
assert.ok(harness.agentA, 'Agent A should exist');
|
|
425
|
+
assert.ok(harness.agentB, 'Agent B should exist');
|
|
426
|
+
assert.ok(harness.agentA.port, 'Agent A should have a port');
|
|
427
|
+
assert.ok(harness.agentB.port, 'Agent B should have a port');
|
|
428
|
+
assert.ok(harness.agentA.port !== harness.agentB.port, 'Ports should differ');
|
|
429
|
+
|
|
430
|
+
// Both should respond to ping
|
|
431
|
+
const http = require('http');
|
|
432
|
+
const pingA = await httpGet(`http://127.0.0.1:${harness.agentA.port}/api/a2a/ping`);
|
|
433
|
+
assert.ok(pingA.pong, 'Agent A should respond to ping');
|
|
434
|
+
|
|
435
|
+
const pingB = await httpGet(`http://127.0.0.1:${harness.agentB.port}/api/a2a/ping`);
|
|
436
|
+
assert.ok(pingB.pong, 'Agent B should respond to ping');
|
|
437
|
+
|
|
438
|
+
await harness.teardown();
|
|
439
|
+
});
|
|
440
|
+
|
|
441
|
+
test('TwoServerHarness provides token stores for each agent', async () => {
|
|
442
|
+
const harness = new TwoServerHarness();
|
|
443
|
+
await harness.setup();
|
|
444
|
+
|
|
445
|
+
// Create token on Agent A
|
|
446
|
+
const { token } = harness.agentA.tokenStore.create({ name: 'TestToken' });
|
|
447
|
+
assert.match(token, /^fed_/);
|
|
448
|
+
|
|
449
|
+
// Token should NOT exist on Agent B
|
|
450
|
+
const validation = harness.agentB.tokenStore.validate(token);
|
|
451
|
+
assert.equal(validation.valid, false);
|
|
452
|
+
|
|
453
|
+
await harness.teardown();
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
// Helper to make GET request
|
|
457
|
+
function httpGet(url) {
|
|
458
|
+
const http = require('http');
|
|
459
|
+
return new Promise((resolve, reject) => {
|
|
460
|
+
http.get(url, (res) => {
|
|
461
|
+
let data = '';
|
|
462
|
+
res.on('data', chunk => data += chunk);
|
|
463
|
+
res.on('end', () => {
|
|
464
|
+
try { resolve(JSON.parse(data)); }
|
|
465
|
+
catch { resolve(data); }
|
|
466
|
+
});
|
|
467
|
+
}).on('error', reject);
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
};
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
**Step 2: Run test to verify it fails**
|
|
474
|
+
|
|
475
|
+
Run: `node test/run.js --filter "TwoServerHarness"`
|
|
476
|
+
Expected: FAIL — module not found
|
|
477
|
+
|
|
478
|
+
**Step 3: Write minimal implementation**
|
|
479
|
+
|
|
480
|
+
```javascript
|
|
481
|
+
// test/e2e/two-server.js
|
|
482
|
+
const { createE2EEnv } = require('./env');
|
|
483
|
+
const path = require('path');
|
|
484
|
+
|
|
485
|
+
/**
|
|
486
|
+
* Starts two independent A2A servers on ephemeral ports,
|
|
487
|
+
* each with their own config directory and token store.
|
|
488
|
+
*
|
|
489
|
+
* This simulates two separate agents that can exchange
|
|
490
|
+
* invites and call each other over HTTP.
|
|
491
|
+
*/
|
|
492
|
+
class TwoServerHarness {
|
|
493
|
+
constructor(options = {}) {
|
|
494
|
+
this.agentA = null;
|
|
495
|
+
this.agentB = null;
|
|
496
|
+
this.handleMessageA = options.handleMessageA || defaultHandler('AgentA');
|
|
497
|
+
this.handleMessageB = options.handleMessageB || defaultHandler('AgentB');
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
async setup() {
|
|
501
|
+
this.agentA = await this._startAgent('agent-a', this.handleMessageA);
|
|
502
|
+
this.agentB = await this._startAgent('agent-b', this.handleMessageB);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
async _startAgent(name, handleMessage) {
|
|
506
|
+
const env = createE2EEnv(`e2e-${name}`);
|
|
507
|
+
const port = await env.findAvailablePort();
|
|
508
|
+
|
|
509
|
+
// Fresh requires to get isolated instances
|
|
510
|
+
delete require.cache[require.resolve('../../src/lib/tokens')];
|
|
511
|
+
delete require.cache[require.resolve('../../src/routes/a2a')];
|
|
512
|
+
|
|
513
|
+
const express = require('express');
|
|
514
|
+
const { TokenStore } = require('../../src/lib/tokens');
|
|
515
|
+
const { createRoutes } = require('../../src/routes/a2a');
|
|
516
|
+
|
|
517
|
+
const tokenStore = new TokenStore(env.configDir);
|
|
518
|
+
const app = express();
|
|
519
|
+
app.use(express.json({ limit: '100kb' }));
|
|
520
|
+
|
|
521
|
+
app.use('/api/a2a', createRoutes({
|
|
522
|
+
tokenStore,
|
|
523
|
+
handleMessage,
|
|
524
|
+
notifyOwner: () => Promise.resolve()
|
|
525
|
+
}));
|
|
526
|
+
|
|
527
|
+
const server = await new Promise((resolve) => {
|
|
528
|
+
const s = app.listen(port, '127.0.0.1', () => resolve(s));
|
|
529
|
+
});
|
|
530
|
+
|
|
531
|
+
return {
|
|
532
|
+
name,
|
|
533
|
+
port,
|
|
534
|
+
env,
|
|
535
|
+
tokenStore,
|
|
536
|
+
app,
|
|
537
|
+
server,
|
|
538
|
+
hostname: `127.0.0.1:${port}`,
|
|
539
|
+
inviteBase: `a2a://127.0.0.1:${port}`
|
|
540
|
+
};
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
async teardown() {
|
|
544
|
+
if (this.agentA) {
|
|
545
|
+
await closeServer(this.agentA.server);
|
|
546
|
+
this.agentA.env.cleanup();
|
|
547
|
+
}
|
|
548
|
+
if (this.agentB) {
|
|
549
|
+
await closeServer(this.agentB.server);
|
|
550
|
+
this.agentB.env.cleanup();
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
function defaultHandler(name) {
|
|
556
|
+
return async function (message, context) {
|
|
557
|
+
return {
|
|
558
|
+
text: `${name} received: ${message.slice(0, 100)}`,
|
|
559
|
+
canContinue: true
|
|
560
|
+
};
|
|
561
|
+
};
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
function closeServer(server) {
|
|
565
|
+
return new Promise((resolve) => {
|
|
566
|
+
if (server) server.close(resolve);
|
|
567
|
+
else resolve();
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
module.exports = { TwoServerHarness };
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
**Step 4: Run test to verify it passes**
|
|
575
|
+
|
|
576
|
+
Run: `node test/run.js --filter "TwoServerHarness"`
|
|
577
|
+
Expected: PASS (both tests)
|
|
578
|
+
|
|
579
|
+
**Step 5: Commit**
|
|
580
|
+
|
|
581
|
+
```bash
|
|
582
|
+
git add test/e2e/two-server.js test/e2e/two-server.test.js
|
|
583
|
+
git commit -m "feat(e2e): add two-server harness for cross-agent testing"
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
---
|
|
587
|
+
|
|
588
|
+
### Task 4: Write the full E2E install + onboard + invite test
|
|
589
|
+
|
|
590
|
+
**Files:**
|
|
591
|
+
- Create: `test/e2e/full-flow.test.js`
|
|
592
|
+
|
|
593
|
+
This is the core test that exercises the complete user journey across two agents.
|
|
594
|
+
|
|
595
|
+
**Step 1: Write the test**
|
|
596
|
+
|
|
597
|
+
```javascript
|
|
598
|
+
// test/e2e/full-flow.test.js
|
|
599
|
+
/**
|
|
600
|
+
* Full E2E Flow Test
|
|
601
|
+
*
|
|
602
|
+
* Simulates the complete A2A user journey between two agents:
|
|
603
|
+
*
|
|
604
|
+
* 1. Both agents start with fresh environments
|
|
605
|
+
* 2. Agent A completes onboarding
|
|
606
|
+
* 3. Agent A creates an invite token
|
|
607
|
+
* 4. Agent B adds Agent A as a contact using the invite URL
|
|
608
|
+
* 5. Agent B calls Agent A via HTTP
|
|
609
|
+
* 6. Agent A responds
|
|
610
|
+
* 7. Multi-turn conversation works
|
|
611
|
+
* 8. Conversation ends cleanly
|
|
612
|
+
*/
|
|
613
|
+
module.exports = function (test, assert, helpers) {
|
|
614
|
+
const http = require('http');
|
|
615
|
+
const { TwoServerHarness } = require('./two-server');
|
|
616
|
+
|
|
617
|
+
let harness = null;
|
|
618
|
+
|
|
619
|
+
async function teardown() {
|
|
620
|
+
if (harness) await harness.teardown();
|
|
621
|
+
harness = null;
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
// ── Full Flow: Onboard → Create Token → Invite → Call ──
|
|
625
|
+
|
|
626
|
+
test('full E2E: Agent B calls Agent A via invite URL', async () => {
|
|
627
|
+
harness = new TwoServerHarness();
|
|
628
|
+
await harness.setup();
|
|
629
|
+
|
|
630
|
+
const agentA = harness.agentA;
|
|
631
|
+
const agentB = harness.agentB;
|
|
632
|
+
|
|
633
|
+
// Step 1: Agent A creates a token for Agent B
|
|
634
|
+
const { token, record } = agentA.tokenStore.create({
|
|
635
|
+
name: 'AgentB-Access',
|
|
636
|
+
permissions: 'public',
|
|
637
|
+
expires: '1h',
|
|
638
|
+
maxCalls: 10,
|
|
639
|
+
allowedTopics: ['testing', 'automation']
|
|
640
|
+
});
|
|
641
|
+
|
|
642
|
+
assert.match(token, /^fed_/, 'Token should start with fed_');
|
|
643
|
+
assert.equal(record.name, 'AgentB-Access');
|
|
644
|
+
assert.equal(record.tier, 'public');
|
|
645
|
+
|
|
646
|
+
// Step 2: Construct invite URL
|
|
647
|
+
const inviteUrl = `${agentA.inviteBase}/${token}`;
|
|
648
|
+
assert.match(inviteUrl, /^a2a:\/\//, 'Invite should be a2a:// URL');
|
|
649
|
+
|
|
650
|
+
// Step 3: Agent B adds Agent A as a contact
|
|
651
|
+
agentB.tokenStore.addContact(inviteUrl, {
|
|
652
|
+
name: 'AgentA',
|
|
653
|
+
notes: 'E2E test partner'
|
|
654
|
+
});
|
|
655
|
+
|
|
656
|
+
const contacts = agentB.tokenStore.listContacts();
|
|
657
|
+
assert.equal(contacts.length, 1);
|
|
658
|
+
assert.equal(contacts[0].name, 'AgentA');
|
|
659
|
+
|
|
660
|
+
// Step 4: Agent B retrieves the stored token and calls Agent A
|
|
661
|
+
const contact = agentB.tokenStore.getContact('AgentA');
|
|
662
|
+
assert.equal(contact.host, agentA.hostname);
|
|
663
|
+
assert.equal(contact.token, token);
|
|
664
|
+
|
|
665
|
+
// Step 5: Make the actual HTTP call (Agent B → Agent A)
|
|
666
|
+
const callResult = await httpPost(
|
|
667
|
+
`http://${agentA.hostname}/api/a2a/invoke`,
|
|
668
|
+
{
|
|
669
|
+
message: 'Hello Agent A, this is Agent B calling.',
|
|
670
|
+
caller: { name: 'AgentB', owner: 'E2E Test' }
|
|
671
|
+
},
|
|
672
|
+
{ Authorization: `Bearer ${token}` }
|
|
673
|
+
);
|
|
674
|
+
|
|
675
|
+
assert.equal(callResult.statusCode, 200);
|
|
676
|
+
assert.ok(callResult.body.success);
|
|
677
|
+
assert.match(callResult.body.conversation_id, /^conv_/);
|
|
678
|
+
assert.ok(callResult.body.response.includes('AgentA received'));
|
|
679
|
+
assert.equal(callResult.body.can_continue, true);
|
|
680
|
+
assert.equal(callResult.body.tokens_remaining, 9);
|
|
681
|
+
|
|
682
|
+
// Step 6: Multi-turn — send follow-up on same conversation
|
|
683
|
+
const followUp = await httpPost(
|
|
684
|
+
`http://${agentA.hostname}/api/a2a/invoke`,
|
|
685
|
+
{
|
|
686
|
+
message: 'Follow-up question from Agent B.',
|
|
687
|
+
conversation_id: callResult.body.conversation_id,
|
|
688
|
+
caller: { name: 'AgentB', owner: 'E2E Test' }
|
|
689
|
+
},
|
|
690
|
+
{ Authorization: `Bearer ${token}` }
|
|
691
|
+
);
|
|
692
|
+
|
|
693
|
+
assert.equal(followUp.statusCode, 200);
|
|
694
|
+
assert.ok(followUp.body.success);
|
|
695
|
+
assert.equal(followUp.body.conversation_id, callResult.body.conversation_id);
|
|
696
|
+
assert.equal(followUp.body.tokens_remaining, 8);
|
|
697
|
+
|
|
698
|
+
// Step 7: Verify token usage was tracked
|
|
699
|
+
const tokenRecord = agentA.tokenStore.findById(record.id);
|
|
700
|
+
assert.equal(tokenRecord.calls_made, 2);
|
|
701
|
+
|
|
702
|
+
await teardown();
|
|
703
|
+
});
|
|
704
|
+
|
|
705
|
+
test('full E2E: bidirectional — both agents exchange invites', async () => {
|
|
706
|
+
harness = new TwoServerHarness();
|
|
707
|
+
await harness.setup();
|
|
708
|
+
|
|
709
|
+
const agentA = harness.agentA;
|
|
710
|
+
const agentB = harness.agentB;
|
|
711
|
+
|
|
712
|
+
// Agent A creates token for B
|
|
713
|
+
const tokenAtoB = agentA.tokenStore.create({
|
|
714
|
+
name: 'ForAgentB', permissions: 'friends', maxCalls: 5
|
|
715
|
+
});
|
|
716
|
+
|
|
717
|
+
// Agent B creates token for A
|
|
718
|
+
const tokenBtoA = agentB.tokenStore.create({
|
|
719
|
+
name: 'ForAgentA', permissions: 'public', maxCalls: 5
|
|
720
|
+
});
|
|
721
|
+
|
|
722
|
+
// Exchange invites
|
|
723
|
+
const inviteA = `${agentA.inviteBase}/${tokenAtoB.token}`;
|
|
724
|
+
const inviteB = `${agentB.inviteBase}/${tokenBtoA.token}`;
|
|
725
|
+
|
|
726
|
+
agentB.tokenStore.addContact(inviteA, { name: 'AgentA' });
|
|
727
|
+
agentA.tokenStore.addContact(inviteB, { name: 'AgentB' });
|
|
728
|
+
|
|
729
|
+
// B calls A
|
|
730
|
+
const resBA = await httpPost(
|
|
731
|
+
`http://${agentA.hostname}/api/a2a/invoke`,
|
|
732
|
+
{ message: 'B calling A', caller: { name: 'AgentB' } },
|
|
733
|
+
{ Authorization: `Bearer ${tokenAtoB.token}` }
|
|
734
|
+
);
|
|
735
|
+
assert.equal(resBA.statusCode, 200);
|
|
736
|
+
assert.ok(resBA.body.success);
|
|
737
|
+
|
|
738
|
+
// A calls B
|
|
739
|
+
const resAB = await httpPost(
|
|
740
|
+
`http://${agentB.hostname}/api/a2a/invoke`,
|
|
741
|
+
{ message: 'A calling B', caller: { name: 'AgentA' } },
|
|
742
|
+
{ Authorization: `Bearer ${tokenBtoA.token}` }
|
|
743
|
+
);
|
|
744
|
+
assert.equal(resAB.statusCode, 200);
|
|
745
|
+
assert.ok(resAB.body.success);
|
|
746
|
+
|
|
747
|
+
await teardown();
|
|
748
|
+
});
|
|
749
|
+
|
|
750
|
+
test('full E2E: revoked token rejected mid-conversation', async () => {
|
|
751
|
+
harness = new TwoServerHarness();
|
|
752
|
+
await harness.setup();
|
|
753
|
+
|
|
754
|
+
const { token, record } = harness.agentA.tokenStore.create({
|
|
755
|
+
name: 'Revocable', maxCalls: 10
|
|
756
|
+
});
|
|
757
|
+
|
|
758
|
+
// First call succeeds
|
|
759
|
+
const res1 = await httpPost(
|
|
760
|
+
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
761
|
+
{ message: 'First call', caller: { name: 'Tester' } },
|
|
762
|
+
{ Authorization: `Bearer ${token}` }
|
|
763
|
+
);
|
|
764
|
+
assert.equal(res1.statusCode, 200);
|
|
765
|
+
|
|
766
|
+
// Revoke the token
|
|
767
|
+
harness.agentA.tokenStore.revoke(record.id);
|
|
768
|
+
|
|
769
|
+
// Second call rejected
|
|
770
|
+
const res2 = await httpPost(
|
|
771
|
+
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
772
|
+
{ message: 'After revoke', caller: { name: 'Tester' } },
|
|
773
|
+
{ Authorization: `Bearer ${token}` }
|
|
774
|
+
);
|
|
775
|
+
assert.equal(res2.statusCode, 401);
|
|
776
|
+
assert.equal(res2.body.error, 'unauthorized');
|
|
777
|
+
|
|
778
|
+
await teardown();
|
|
779
|
+
});
|
|
780
|
+
|
|
781
|
+
test('full E2E: expired token rejected', async () => {
|
|
782
|
+
harness = new TwoServerHarness();
|
|
783
|
+
await harness.setup();
|
|
784
|
+
|
|
785
|
+
// Create token that expires immediately (1ms)
|
|
786
|
+
const { token } = harness.agentA.tokenStore.create({
|
|
787
|
+
name: 'ShortLived', expires: '1ms'
|
|
788
|
+
});
|
|
789
|
+
|
|
790
|
+
// Wait for expiry
|
|
791
|
+
await new Promise(r => setTimeout(r, 50));
|
|
792
|
+
|
|
793
|
+
const res = await httpPost(
|
|
794
|
+
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
795
|
+
{ message: 'Too late', caller: { name: 'Tester' } },
|
|
796
|
+
{ Authorization: `Bearer ${token}` }
|
|
797
|
+
);
|
|
798
|
+
assert.equal(res.statusCode, 401);
|
|
799
|
+
assert.equal(res.body.error, 'unauthorized');
|
|
800
|
+
|
|
801
|
+
await teardown();
|
|
802
|
+
});
|
|
803
|
+
|
|
804
|
+
test('full E2E: max calls enforcement across multi-turn', async () => {
|
|
805
|
+
harness = new TwoServerHarness();
|
|
806
|
+
await harness.setup();
|
|
807
|
+
|
|
808
|
+
const { token } = harness.agentA.tokenStore.create({
|
|
809
|
+
name: 'LimitedCalls', maxCalls: 2
|
|
810
|
+
});
|
|
811
|
+
|
|
812
|
+
// Call 1 OK
|
|
813
|
+
const r1 = await httpPost(
|
|
814
|
+
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
815
|
+
{ message: 'Call 1', caller: { name: 'Tester' } },
|
|
816
|
+
{ Authorization: `Bearer ${token}` }
|
|
817
|
+
);
|
|
818
|
+
assert.equal(r1.statusCode, 200);
|
|
819
|
+
|
|
820
|
+
// Call 2 OK
|
|
821
|
+
const r2 = await httpPost(
|
|
822
|
+
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
823
|
+
{ message: 'Call 2', caller: { name: 'Tester' } },
|
|
824
|
+
{ Authorization: `Bearer ${token}` }
|
|
825
|
+
);
|
|
826
|
+
assert.equal(r2.statusCode, 200);
|
|
827
|
+
|
|
828
|
+
// Call 3 rejected
|
|
829
|
+
const r3 = await httpPost(
|
|
830
|
+
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
831
|
+
{ message: 'Call 3', caller: { name: 'Tester' } },
|
|
832
|
+
{ Authorization: `Bearer ${token}` }
|
|
833
|
+
);
|
|
834
|
+
assert.equal(r3.statusCode, 401);
|
|
835
|
+
|
|
836
|
+
await teardown();
|
|
837
|
+
});
|
|
838
|
+
|
|
839
|
+
// ── HTTP helper ──
|
|
840
|
+
function httpPost(url, body, headers = {}) {
|
|
841
|
+
const urlObj = new URL(url);
|
|
842
|
+
return new Promise((resolve, reject) => {
|
|
843
|
+
const data = JSON.stringify(body);
|
|
844
|
+
const req = http.request({
|
|
845
|
+
hostname: urlObj.hostname,
|
|
846
|
+
port: urlObj.port,
|
|
847
|
+
path: urlObj.pathname,
|
|
848
|
+
method: 'POST',
|
|
849
|
+
headers: {
|
|
850
|
+
'Content-Type': 'application/json',
|
|
851
|
+
'Content-Length': Buffer.byteLength(data),
|
|
852
|
+
...headers
|
|
853
|
+
}
|
|
854
|
+
}, (res) => {
|
|
855
|
+
let responseData = '';
|
|
856
|
+
res.on('data', chunk => responseData += chunk);
|
|
857
|
+
res.on('end', () => {
|
|
858
|
+
let parsed;
|
|
859
|
+
try { parsed = JSON.parse(responseData); } catch { parsed = responseData; }
|
|
860
|
+
resolve({ statusCode: res.statusCode, headers: res.headers, body: parsed });
|
|
861
|
+
});
|
|
862
|
+
});
|
|
863
|
+
req.on('error', reject);
|
|
864
|
+
req.write(data);
|
|
865
|
+
req.end();
|
|
866
|
+
});
|
|
867
|
+
}
|
|
868
|
+
};
|
|
869
|
+
```
|
|
870
|
+
|
|
871
|
+
**Step 2: Run test to verify it passes**
|
|
872
|
+
|
|
873
|
+
Run: `node test/run.js --filter "full E2E"`
|
|
874
|
+
Expected: PASS (all 5 tests)
|
|
875
|
+
|
|
876
|
+
**Step 3: Commit**
|
|
877
|
+
|
|
878
|
+
```bash
|
|
879
|
+
git add test/e2e/full-flow.test.js
|
|
880
|
+
git commit -m "feat(e2e): add full flow tests — onboard, invite, cross-agent call"
|
|
881
|
+
```
|
|
882
|
+
|
|
883
|
+
---
|
|
884
|
+
|
|
885
|
+
## Phase 3: Agent Prompt Sequence & Report
|
|
886
|
+
|
|
887
|
+
### Task 5: Create the E2E test agent prompt sequence
|
|
888
|
+
|
|
889
|
+
**Files:**
|
|
890
|
+
- Create: `docs/prompts/e2e-test-agent.md`
|
|
891
|
+
|
|
892
|
+
This is the prompt document that an AI orchestrator gives to a subagent. The subagent follows these steps to test the A2A system.
|
|
893
|
+
|
|
894
|
+
**Step 1: Write the prompt document**
|
|
895
|
+
|
|
896
|
+
```markdown
|
|
897
|
+
# A2A E2E Test Agent — Prompt Sequence
|
|
898
|
+
|
|
899
|
+
You are an E2E test agent for the `a2acalling` npm package. Your job is to verify
|
|
900
|
+
that a fresh install, onboarding, and invite flow all work correctly.
|
|
901
|
+
|
|
902
|
+
## Your Environment
|
|
903
|
+
|
|
904
|
+
You have been given a clean working directory. You will:
|
|
905
|
+
1. Install `a2acalling` from npm (or use a local tarball if provided)
|
|
906
|
+
2. Run through the full onboarding flow
|
|
907
|
+
3. Create tokens and test the invite flow
|
|
908
|
+
4. Verify the server responds correctly
|
|
909
|
+
5. Report all findings
|
|
910
|
+
|
|
911
|
+
## Pre-Flight
|
|
912
|
+
|
|
913
|
+
Before starting, verify:
|
|
914
|
+
- [ ] Node.js >= 18 is available (`node --version`)
|
|
915
|
+
- [ ] npm is available (`npm --version`)
|
|
916
|
+
- [ ] Working directory is clean and writable
|
|
917
|
+
- [ ] No existing A2A config (`ls ~/.config/openclaw/` should not exist or be empty)
|
|
918
|
+
|
|
919
|
+
If any pre-flight check fails, report the failure and stop.
|
|
920
|
+
|
|
921
|
+
## Step 1: Install a2acalling
|
|
922
|
+
|
|
923
|
+
```bash
|
|
924
|
+
npm install -g a2acalling
|
|
925
|
+
```
|
|
926
|
+
|
|
927
|
+
**Expected:**
|
|
928
|
+
- Exit code 0
|
|
929
|
+
- `a2a` command is now available
|
|
930
|
+
- `a2a --version` prints a version number
|
|
931
|
+
|
|
932
|
+
**Report if:** Install fails, postinstall errors, command not found after install.
|
|
933
|
+
|
|
934
|
+
## Step 2: Run Quickstart (Onboarding)
|
|
935
|
+
|
|
936
|
+
```bash
|
|
937
|
+
a2a quickstart
|
|
938
|
+
```
|
|
939
|
+
|
|
940
|
+
**Expected:**
|
|
941
|
+
- Step 1: Port detection — finds an available port (3001-3020)
|
|
942
|
+
- Step 2: Server starts on the detected port
|
|
943
|
+
- Step 3: Disclosure prompt appears — asking for topics, objectives, personality
|
|
944
|
+
- The agent should be in `awaiting_disclosure` state
|
|
945
|
+
|
|
946
|
+
**Then submit disclosure:**
|
|
947
|
+
|
|
948
|
+
```bash
|
|
949
|
+
a2a quickstart --submit '{
|
|
950
|
+
"tiers": {
|
|
951
|
+
"public": {
|
|
952
|
+
"topics": [{"topic": "Testing", "description": "Automated system testing"}],
|
|
953
|
+
"objectives": [{"objective": "Verify install", "description": "Confirm the package works"}],
|
|
954
|
+
"do_not_discuss": []
|
|
955
|
+
},
|
|
956
|
+
"friends": {"topics": [], "objectives": [], "do_not_discuss": []},
|
|
957
|
+
"family": {"topics": [], "objectives": [], "do_not_discuss": []}
|
|
958
|
+
},
|
|
959
|
+
"never_disclose": ["Test secrets"],
|
|
960
|
+
"personality_notes": "Direct and methodical test agent"
|
|
961
|
+
}'
|
|
962
|
+
```
|
|
963
|
+
|
|
964
|
+
**Expected:**
|
|
965
|
+
- "Disclosure manifest saved"
|
|
966
|
+
- "Onboarding complete"
|
|
967
|
+
- Step numbers are sequential (no duplicates)
|
|
968
|
+
- Config file exists at `~/.config/openclaw/a2a-config.json` with `onboarding.step === 'complete'`
|
|
969
|
+
- First invite URL is generated (`a2a://hostname/fed_...`)
|
|
970
|
+
|
|
971
|
+
**Report if:** Onboarding hangs, step numbers are wrong, manifest not saved, invite not generated.
|
|
972
|
+
|
|
973
|
+
## Step 3: Verify Server Health
|
|
974
|
+
|
|
975
|
+
```bash
|
|
976
|
+
a2a ping a2a://localhost:<port>/test
|
|
977
|
+
```
|
|
978
|
+
|
|
979
|
+
Or directly:
|
|
980
|
+
|
|
981
|
+
```bash
|
|
982
|
+
curl http://localhost:<port>/api/a2a/ping
|
|
983
|
+
```
|
|
984
|
+
|
|
985
|
+
**Expected:** `{"pong": true, "timestamp": "..."}`
|
|
986
|
+
|
|
987
|
+
```bash
|
|
988
|
+
curl http://localhost:<port>/api/a2a/status
|
|
989
|
+
```
|
|
990
|
+
|
|
991
|
+
**Expected:** `{"a2a": true, "version": "...", "capabilities": ["invoke", "multi-turn", ...]}`
|
|
992
|
+
|
|
993
|
+
**Report if:** Server not running, ping fails, status missing expected fields.
|
|
994
|
+
|
|
995
|
+
## Step 4: Create Invite Token
|
|
996
|
+
|
|
997
|
+
```bash
|
|
998
|
+
a2a create --name "E2E-Tester" --tier public --expires 1h --max-calls 20
|
|
999
|
+
```
|
|
1000
|
+
|
|
1001
|
+
**Expected:**
|
|
1002
|
+
- Token created successfully
|
|
1003
|
+
- Invite URL printed: `a2a://hostname/fed_...`
|
|
1004
|
+
- Token appears in `a2a list`
|
|
1005
|
+
|
|
1006
|
+
**Report if:** Token creation fails, URL format wrong, not in list.
|
|
1007
|
+
|
|
1008
|
+
## Step 5: Test Inbound Call
|
|
1009
|
+
|
|
1010
|
+
Using the invite URL from Step 4, make a direct HTTP call:
|
|
1011
|
+
|
|
1012
|
+
```bash
|
|
1013
|
+
TOKEN="<token from step 4>"
|
|
1014
|
+
PORT="<port from step 2>"
|
|
1015
|
+
curl -X POST http://localhost:$PORT/api/a2a/invoke \
|
|
1016
|
+
-H "Content-Type: application/json" \
|
|
1017
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
1018
|
+
-d '{"message": "Hello from E2E test agent", "caller": {"name": "E2E-Tester", "owner": "Automated Test"}}'
|
|
1019
|
+
```
|
|
1020
|
+
|
|
1021
|
+
**Expected Response:**
|
|
1022
|
+
```json
|
|
1023
|
+
{
|
|
1024
|
+
"success": true,
|
|
1025
|
+
"conversation_id": "conv_...",
|
|
1026
|
+
"response": "...",
|
|
1027
|
+
"can_continue": true,
|
|
1028
|
+
"tokens_remaining": 19
|
|
1029
|
+
}
|
|
1030
|
+
```
|
|
1031
|
+
|
|
1032
|
+
**Report if:** 401 unauthorized, 500 error, missing conversation_id, unexpected response shape.
|
|
1033
|
+
|
|
1034
|
+
## Step 6: Test Multi-Turn Conversation
|
|
1035
|
+
|
|
1036
|
+
Using the `conversation_id` from Step 5:
|
|
1037
|
+
|
|
1038
|
+
```bash
|
|
1039
|
+
curl -X POST http://localhost:$PORT/api/a2a/invoke \
|
|
1040
|
+
-H "Content-Type: application/json" \
|
|
1041
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
1042
|
+
-d '{"message": "Follow-up message", "conversation_id": "<conv_id>", "caller": {"name": "E2E-Tester"}}'
|
|
1043
|
+
```
|
|
1044
|
+
|
|
1045
|
+
**Expected:**
|
|
1046
|
+
- Same `conversation_id` returned
|
|
1047
|
+
- `tokens_remaining` decremented by 1
|
|
1048
|
+
- `can_continue` is true
|
|
1049
|
+
|
|
1050
|
+
**Report if:** Conversation ID changes, token count wrong, can_continue unexpected.
|
|
1051
|
+
|
|
1052
|
+
## Step 7: Test Error Cases
|
|
1053
|
+
|
|
1054
|
+
### 7a. No Authorization
|
|
1055
|
+
```bash
|
|
1056
|
+
curl -X POST http://localhost:$PORT/api/a2a/invoke \
|
|
1057
|
+
-H "Content-Type: application/json" \
|
|
1058
|
+
-d '{"message": "No auth"}'
|
|
1059
|
+
```
|
|
1060
|
+
**Expected:** 401, `{"error": "missing_token"}`
|
|
1061
|
+
|
|
1062
|
+
### 7b. Invalid Token
|
|
1063
|
+
```bash
|
|
1064
|
+
curl -X POST http://localhost:$PORT/api/a2a/invoke \
|
|
1065
|
+
-H "Content-Type: application/json" \
|
|
1066
|
+
-H "Authorization: Bearer fed_invalid_garbage" \
|
|
1067
|
+
-d '{"message": "Bad token"}'
|
|
1068
|
+
```
|
|
1069
|
+
**Expected:** 401, `{"error": "unauthorized"}`
|
|
1070
|
+
|
|
1071
|
+
### 7c. Missing Message
|
|
1072
|
+
```bash
|
|
1073
|
+
curl -X POST http://localhost:$PORT/api/a2a/invoke \
|
|
1074
|
+
-H "Content-Type: application/json" \
|
|
1075
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
1076
|
+
-d '{}'
|
|
1077
|
+
```
|
|
1078
|
+
**Expected:** 400, `{"error": "missing_message"}`
|
|
1079
|
+
|
|
1080
|
+
**Report if:** Any error case returns unexpected status code or error format.
|
|
1081
|
+
|
|
1082
|
+
## Step 8: Token Revocation
|
|
1083
|
+
|
|
1084
|
+
```bash
|
|
1085
|
+
# Get token ID from list
|
|
1086
|
+
a2a list
|
|
1087
|
+
|
|
1088
|
+
# Revoke it
|
|
1089
|
+
a2a revoke <token_id>
|
|
1090
|
+
|
|
1091
|
+
# Verify call fails
|
|
1092
|
+
curl -X POST http://localhost:$PORT/api/a2a/invoke \
|
|
1093
|
+
-H "Content-Type: application/json" \
|
|
1094
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
1095
|
+
-d '{"message": "Should fail"}'
|
|
1096
|
+
```
|
|
1097
|
+
|
|
1098
|
+
**Expected:** 401 after revocation.
|
|
1099
|
+
|
|
1100
|
+
**Report if:** Revoked token still works.
|
|
1101
|
+
|
|
1102
|
+
## Step 9: Cleanup
|
|
1103
|
+
|
|
1104
|
+
```bash
|
|
1105
|
+
a2a uninstall
|
|
1106
|
+
```
|
|
1107
|
+
|
|
1108
|
+
**Expected:** Server stopped, config files removed.
|
|
1109
|
+
|
|
1110
|
+
## Reporting Format
|
|
1111
|
+
|
|
1112
|
+
After completing all steps, produce a report in this format:
|
|
1113
|
+
|
|
1114
|
+
```markdown
|
|
1115
|
+
# A2A E2E Test Report
|
|
1116
|
+
|
|
1117
|
+
**Date:** YYYY-MM-DD HH:MM:SS
|
|
1118
|
+
**Package Version:** x.y.z
|
|
1119
|
+
**Node Version:** vXX.X.X
|
|
1120
|
+
**Platform:** linux/darwin/win32
|
|
1121
|
+
|
|
1122
|
+
## Results
|
|
1123
|
+
|
|
1124
|
+
| Step | Name | Status | Notes |
|
|
1125
|
+
|------|------|--------|-------|
|
|
1126
|
+
| 1 | Install | PASS/FAIL | ... |
|
|
1127
|
+
| 2 | Onboarding | PASS/FAIL | ... |
|
|
1128
|
+
| 3 | Server Health | PASS/FAIL | ... |
|
|
1129
|
+
| 4 | Create Token | PASS/FAIL | ... |
|
|
1130
|
+
| 5 | Inbound Call | PASS/FAIL | ... |
|
|
1131
|
+
| 6 | Multi-Turn | PASS/FAIL | ... |
|
|
1132
|
+
| 7 | Error Cases | PASS/FAIL | ... |
|
|
1133
|
+
| 8 | Revocation | PASS/FAIL | ... |
|
|
1134
|
+
| 9 | Cleanup | PASS/FAIL | ... |
|
|
1135
|
+
|
|
1136
|
+
## Issues Found
|
|
1137
|
+
|
|
1138
|
+
### Issue 1: [Title]
|
|
1139
|
+
**Step:** N
|
|
1140
|
+
**Severity:** critical/high/medium/low
|
|
1141
|
+
**Description:** What happened
|
|
1142
|
+
**Expected:** What should have happened
|
|
1143
|
+
**Actual:** What actually happened
|
|
1144
|
+
**Reproduction:** Exact commands to reproduce
|
|
1145
|
+
```
|
|
1146
|
+
|
|
1147
|
+
For each issue found, the orchestrator should create a Linear ticket
|
|
1148
|
+
with the "Todo" status on the "a2a calling" team, labeled "Bug" and "E2E".
|
|
1149
|
+
```
|
|
1150
|
+
|
|
1151
|
+
**Step 2: Commit**
|
|
1152
|
+
|
|
1153
|
+
```bash
|
|
1154
|
+
git add docs/prompts/e2e-test-agent.md
|
|
1155
|
+
git commit -m "docs: add E2E test agent prompt sequence"
|
|
1156
|
+
```
|
|
1157
|
+
|
|
1158
|
+
---
|
|
1159
|
+
|
|
1160
|
+
### Task 6: Create report generator with Linear integration
|
|
1161
|
+
|
|
1162
|
+
**Files:**
|
|
1163
|
+
- Create: `test/e2e/report.js`
|
|
1164
|
+
- Test: `test/e2e/report.test.js`
|
|
1165
|
+
|
|
1166
|
+
**Step 1: Write the failing test**
|
|
1167
|
+
|
|
1168
|
+
```javascript
|
|
1169
|
+
// test/e2e/report.test.js
|
|
1170
|
+
module.exports = function (test, assert, helpers) {
|
|
1171
|
+
const { E2EReport } = require('./report');
|
|
1172
|
+
|
|
1173
|
+
test('E2EReport tracks step results', () => {
|
|
1174
|
+
const report = new E2EReport({ version: '0.6.44', nodeVersion: 'v20.0.0' });
|
|
1175
|
+
|
|
1176
|
+
report.pass(1, 'Install', 'Installed successfully');
|
|
1177
|
+
report.pass(2, 'Onboarding', 'Completed in 3s');
|
|
1178
|
+
report.fail(3, 'Server Health', 'Ping returned 500', {
|
|
1179
|
+
expected: '200 with pong',
|
|
1180
|
+
actual: '500 internal error',
|
|
1181
|
+
severity: 'critical'
|
|
1182
|
+
});
|
|
1183
|
+
|
|
1184
|
+
assert.equal(report.results.length, 3);
|
|
1185
|
+
assert.equal(report.passed, 2);
|
|
1186
|
+
assert.equal(report.failed, 1);
|
|
1187
|
+
assert.equal(report.issues.length, 1);
|
|
1188
|
+
assert.equal(report.issues[0].step, 3);
|
|
1189
|
+
assert.equal(report.issues[0].severity, 'critical');
|
|
1190
|
+
});
|
|
1191
|
+
|
|
1192
|
+
test('E2EReport generates markdown', () => {
|
|
1193
|
+
const report = new E2EReport({ version: '0.6.44', nodeVersion: 'v20.0.0' });
|
|
1194
|
+
|
|
1195
|
+
report.pass(1, 'Install', 'OK');
|
|
1196
|
+
report.fail(2, 'Onboarding', 'Manifest not saved', {
|
|
1197
|
+
expected: 'Manifest file created',
|
|
1198
|
+
actual: 'File missing',
|
|
1199
|
+
severity: 'high'
|
|
1200
|
+
});
|
|
1201
|
+
|
|
1202
|
+
const md = report.toMarkdown();
|
|
1203
|
+
assert.includes(md, '# A2A E2E Test Report');
|
|
1204
|
+
assert.includes(md, '0.6.44');
|
|
1205
|
+
assert.includes(md, 'PASS');
|
|
1206
|
+
assert.includes(md, 'FAIL');
|
|
1207
|
+
assert.includes(md, 'Manifest not saved');
|
|
1208
|
+
assert.includes(md, 'high');
|
|
1209
|
+
});
|
|
1210
|
+
|
|
1211
|
+
test('E2EReport generates Linear issue descriptions', () => {
|
|
1212
|
+
const report = new E2EReport({ version: '0.6.44', nodeVersion: 'v20.0.0' });
|
|
1213
|
+
|
|
1214
|
+
report.fail(5, 'Inbound Call', 'Got 500 instead of 200', {
|
|
1215
|
+
expected: '200 success',
|
|
1216
|
+
actual: '500 internal_error',
|
|
1217
|
+
severity: 'critical',
|
|
1218
|
+
reproduction: 'curl -X POST http://localhost:3001/api/a2a/invoke ...'
|
|
1219
|
+
});
|
|
1220
|
+
|
|
1221
|
+
const issues = report.toLinearIssues();
|
|
1222
|
+
assert.equal(issues.length, 1);
|
|
1223
|
+
assert.includes(issues[0].title, 'Inbound Call');
|
|
1224
|
+
assert.includes(issues[0].description, '500');
|
|
1225
|
+
assert.includes(issues[0].description, 'Reproduction');
|
|
1226
|
+
assert.equal(issues[0].priority, 1); // critical = urgent
|
|
1227
|
+
});
|
|
1228
|
+
};
|
|
1229
|
+
```
|
|
1230
|
+
|
|
1231
|
+
**Step 2: Run test to verify it fails**
|
|
1232
|
+
|
|
1233
|
+
Run: `node test/run.js --filter "E2EReport"`
|
|
1234
|
+
Expected: FAIL — module not found
|
|
1235
|
+
|
|
1236
|
+
**Step 3: Write minimal implementation**
|
|
1237
|
+
|
|
1238
|
+
```javascript
|
|
1239
|
+
// test/e2e/report.js
|
|
1240
|
+
/**
|
|
1241
|
+
* E2E Test Report Generator
|
|
1242
|
+
*
|
|
1243
|
+
* Tracks pass/fail results for each step and can output:
|
|
1244
|
+
* - Markdown summary for human review
|
|
1245
|
+
* - Linear issue descriptions for automated bug filing
|
|
1246
|
+
*/
|
|
1247
|
+
class E2EReport {
|
|
1248
|
+
constructor(meta = {}) {
|
|
1249
|
+
this.meta = {
|
|
1250
|
+
version: meta.version || 'unknown',
|
|
1251
|
+
nodeVersion: meta.nodeVersion || process.version,
|
|
1252
|
+
platform: meta.platform || process.platform,
|
|
1253
|
+
date: new Date().toISOString()
|
|
1254
|
+
};
|
|
1255
|
+
this.results = [];
|
|
1256
|
+
this.issues = [];
|
|
1257
|
+
this.passed = 0;
|
|
1258
|
+
this.failed = 0;
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1261
|
+
pass(step, name, notes = '') {
|
|
1262
|
+
this.results.push({ step, name, status: 'PASS', notes });
|
|
1263
|
+
this.passed++;
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
fail(step, name, notes, details = {}) {
|
|
1267
|
+
this.results.push({ step, name, status: 'FAIL', notes });
|
|
1268
|
+
this.failed++;
|
|
1269
|
+
this.issues.push({
|
|
1270
|
+
step,
|
|
1271
|
+
name,
|
|
1272
|
+
notes,
|
|
1273
|
+
expected: details.expected || '',
|
|
1274
|
+
actual: details.actual || '',
|
|
1275
|
+
severity: details.severity || 'medium',
|
|
1276
|
+
reproduction: details.reproduction || ''
|
|
1277
|
+
});
|
|
1278
|
+
}
|
|
1279
|
+
|
|
1280
|
+
toMarkdown() {
|
|
1281
|
+
const lines = [
|
|
1282
|
+
'# A2A E2E Test Report',
|
|
1283
|
+
'',
|
|
1284
|
+
`**Date:** ${this.meta.date}`,
|
|
1285
|
+
`**Package Version:** ${this.meta.version}`,
|
|
1286
|
+
`**Node Version:** ${this.meta.nodeVersion}`,
|
|
1287
|
+
`**Platform:** ${this.meta.platform}`,
|
|
1288
|
+
'',
|
|
1289
|
+
`## Summary: ${this.passed} passed, ${this.failed} failed`,
|
|
1290
|
+
'',
|
|
1291
|
+
'## Results',
|
|
1292
|
+
'',
|
|
1293
|
+
'| Step | Name | Status | Notes |',
|
|
1294
|
+
'|------|------|--------|-------|'
|
|
1295
|
+
];
|
|
1296
|
+
|
|
1297
|
+
for (const r of this.results) {
|
|
1298
|
+
lines.push(`| ${r.step} | ${r.name} | ${r.status} | ${r.notes} |`);
|
|
1299
|
+
}
|
|
1300
|
+
|
|
1301
|
+
if (this.issues.length > 0) {
|
|
1302
|
+
lines.push('', '## Issues Found', '');
|
|
1303
|
+
for (let i = 0; i < this.issues.length; i++) {
|
|
1304
|
+
const issue = this.issues[i];
|
|
1305
|
+
lines.push(
|
|
1306
|
+
`### Issue ${i + 1}: ${issue.name}`,
|
|
1307
|
+
`**Step:** ${issue.step}`,
|
|
1308
|
+
`**Severity:** ${issue.severity}`,
|
|
1309
|
+
`**Description:** ${issue.notes}`,
|
|
1310
|
+
`**Expected:** ${issue.expected}`,
|
|
1311
|
+
`**Actual:** ${issue.actual}`,
|
|
1312
|
+
''
|
|
1313
|
+
);
|
|
1314
|
+
if (issue.reproduction) {
|
|
1315
|
+
lines.push(`**Reproduction:**`, '```', issue.reproduction, '```', '');
|
|
1316
|
+
}
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1320
|
+
return lines.join('\n');
|
|
1321
|
+
}
|
|
1322
|
+
|
|
1323
|
+
/**
|
|
1324
|
+
* Convert issues to Linear issue format.
|
|
1325
|
+
* @returns {Array<{title, description, priority, labels}>}
|
|
1326
|
+
*/
|
|
1327
|
+
toLinearIssues() {
|
|
1328
|
+
const severityToPriority = {
|
|
1329
|
+
critical: 1, // Urgent
|
|
1330
|
+
high: 2, // High
|
|
1331
|
+
medium: 3, // Normal
|
|
1332
|
+
low: 4 // Low
|
|
1333
|
+
};
|
|
1334
|
+
|
|
1335
|
+
return this.issues.map(issue => ({
|
|
1336
|
+
title: `[E2E] Step ${issue.step}: ${issue.name} — ${issue.notes.slice(0, 60)}`,
|
|
1337
|
+
description: [
|
|
1338
|
+
`## E2E Test Failure`,
|
|
1339
|
+
'',
|
|
1340
|
+
`**Step:** ${issue.step} — ${issue.name}`,
|
|
1341
|
+
`**Severity:** ${issue.severity}`,
|
|
1342
|
+
`**Package Version:** ${this.meta.version}`,
|
|
1343
|
+
`**Node:** ${this.meta.nodeVersion}`,
|
|
1344
|
+
`**Platform:** ${this.meta.platform}`,
|
|
1345
|
+
'',
|
|
1346
|
+
`### Expected`,
|
|
1347
|
+
issue.expected,
|
|
1348
|
+
'',
|
|
1349
|
+
`### Actual`,
|
|
1350
|
+
issue.actual,
|
|
1351
|
+
'',
|
|
1352
|
+
issue.reproduction ? `### Reproduction\n\`\`\`\n${issue.reproduction}\n\`\`\`` : ''
|
|
1353
|
+
].join('\n'),
|
|
1354
|
+
priority: severityToPriority[issue.severity] || 3,
|
|
1355
|
+
labels: ['Bug', 'E2E']
|
|
1356
|
+
}));
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
module.exports = { E2EReport };
|
|
1361
|
+
```
|
|
1362
|
+
|
|
1363
|
+
**Step 4: Run test to verify it passes**
|
|
1364
|
+
|
|
1365
|
+
Run: `node test/run.js --filter "E2EReport"`
|
|
1366
|
+
Expected: PASS (all 3 tests)
|
|
1367
|
+
|
|
1368
|
+
**Step 5: Commit**
|
|
1369
|
+
|
|
1370
|
+
```bash
|
|
1371
|
+
git add test/e2e/report.js test/e2e/report.test.js
|
|
1372
|
+
git commit -m "feat(e2e): add report generator with Linear issue formatting"
|
|
1373
|
+
```
|
|
1374
|
+
|
|
1375
|
+
---
|
|
1376
|
+
|
|
1377
|
+
### Task 7: Create orchestrator entry point
|
|
1378
|
+
|
|
1379
|
+
**Files:**
|
|
1380
|
+
- Create: `test/e2e/orchestrate.js`
|
|
1381
|
+
|
|
1382
|
+
This is the script that ties everything together. It can be run standalone (`node test/e2e/orchestrate.js`) or invoked by a Claude agent.
|
|
1383
|
+
|
|
1384
|
+
**Step 1: Write the orchestrator**
|
|
1385
|
+
|
|
1386
|
+
```javascript
|
|
1387
|
+
#!/usr/bin/env node
|
|
1388
|
+
/**
|
|
1389
|
+
* E2E Test Orchestrator
|
|
1390
|
+
*
|
|
1391
|
+
* Runs the full A2A E2E test suite:
|
|
1392
|
+
* 1. Sets up two isolated agent environments
|
|
1393
|
+
* 2. Runs onboarding on both
|
|
1394
|
+
* 3. Exchanges invites
|
|
1395
|
+
* 4. Tests cross-agent calls
|
|
1396
|
+
* 5. Tests error cases
|
|
1397
|
+
* 6. Generates report
|
|
1398
|
+
*
|
|
1399
|
+
* Usage:
|
|
1400
|
+
* node test/e2e/orchestrate.js [--json] [--verbose]
|
|
1401
|
+
*
|
|
1402
|
+
* Exit codes:
|
|
1403
|
+
* 0 = all tests passed
|
|
1404
|
+
* 1 = one or more failures
|
|
1405
|
+
*/
|
|
1406
|
+
|
|
1407
|
+
const { TwoServerHarness } = require('./two-server');
|
|
1408
|
+
const { E2EReport } = require('./report');
|
|
1409
|
+
const http = require('http');
|
|
1410
|
+
|
|
1411
|
+
const verbose = process.argv.includes('--verbose');
|
|
1412
|
+
const jsonOutput = process.argv.includes('--json');
|
|
1413
|
+
|
|
1414
|
+
function log(msg) {
|
|
1415
|
+
if (verbose) console.log(` ${msg}`);
|
|
1416
|
+
}
|
|
1417
|
+
|
|
1418
|
+
function httpPost(url, body, headers = {}) {
|
|
1419
|
+
const urlObj = new URL(url);
|
|
1420
|
+
return new Promise((resolve, reject) => {
|
|
1421
|
+
const data = JSON.stringify(body);
|
|
1422
|
+
const req = http.request({
|
|
1423
|
+
hostname: urlObj.hostname,
|
|
1424
|
+
port: urlObj.port,
|
|
1425
|
+
path: urlObj.pathname,
|
|
1426
|
+
method: 'POST',
|
|
1427
|
+
headers: {
|
|
1428
|
+
'Content-Type': 'application/json',
|
|
1429
|
+
'Content-Length': Buffer.byteLength(data),
|
|
1430
|
+
...headers
|
|
1431
|
+
}
|
|
1432
|
+
}, (res) => {
|
|
1433
|
+
let responseData = '';
|
|
1434
|
+
res.on('data', chunk => responseData += chunk);
|
|
1435
|
+
res.on('end', () => {
|
|
1436
|
+
let parsed;
|
|
1437
|
+
try { parsed = JSON.parse(responseData); } catch { parsed = responseData; }
|
|
1438
|
+
resolve({ statusCode: res.statusCode, headers: res.headers, body: parsed });
|
|
1439
|
+
});
|
|
1440
|
+
});
|
|
1441
|
+
req.on('error', reject);
|
|
1442
|
+
req.write(data);
|
|
1443
|
+
req.end();
|
|
1444
|
+
});
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
function httpGet(url) {
|
|
1448
|
+
return new Promise((resolve, reject) => {
|
|
1449
|
+
http.get(url, (res) => {
|
|
1450
|
+
let data = '';
|
|
1451
|
+
res.on('data', chunk => data += chunk);
|
|
1452
|
+
res.on('end', () => {
|
|
1453
|
+
try { resolve({ statusCode: res.statusCode, body: JSON.parse(data) }); }
|
|
1454
|
+
catch { resolve({ statusCode: res.statusCode, body: data }); }
|
|
1455
|
+
});
|
|
1456
|
+
}).on('error', reject);
|
|
1457
|
+
});
|
|
1458
|
+
}
|
|
1459
|
+
|
|
1460
|
+
async function main() {
|
|
1461
|
+
let version;
|
|
1462
|
+
try { version = require('../../package.json').version; } catch { version = 'unknown'; }
|
|
1463
|
+
|
|
1464
|
+
const report = new E2EReport({ version });
|
|
1465
|
+
let harness = null;
|
|
1466
|
+
|
|
1467
|
+
try {
|
|
1468
|
+
// ── Step 1: Environment Setup ──
|
|
1469
|
+
console.log('Step 1: Setting up two isolated agents...');
|
|
1470
|
+
harness = new TwoServerHarness();
|
|
1471
|
+
await harness.setup();
|
|
1472
|
+
log(`Agent A on port ${harness.agentA.port}`);
|
|
1473
|
+
log(`Agent B on port ${harness.agentB.port}`);
|
|
1474
|
+
report.pass(1, 'Environment Setup', `Ports: ${harness.agentA.port}, ${harness.agentB.port}`);
|
|
1475
|
+
|
|
1476
|
+
// ── Step 2: Server Health ──
|
|
1477
|
+
console.log('Step 2: Checking server health...');
|
|
1478
|
+
const pingA = await httpGet(`http://127.0.0.1:${harness.agentA.port}/api/a2a/ping`);
|
|
1479
|
+
const pingB = await httpGet(`http://127.0.0.1:${harness.agentB.port}/api/a2a/ping`);
|
|
1480
|
+
|
|
1481
|
+
if (pingA.body.pong && pingB.body.pong) {
|
|
1482
|
+
report.pass(2, 'Server Health', 'Both agents respond to ping');
|
|
1483
|
+
} else {
|
|
1484
|
+
report.fail(2, 'Server Health', 'Ping failed', {
|
|
1485
|
+
expected: 'pong: true from both agents',
|
|
1486
|
+
actual: `A: ${JSON.stringify(pingA.body)}, B: ${JSON.stringify(pingB.body)}`,
|
|
1487
|
+
severity: 'critical'
|
|
1488
|
+
});
|
|
1489
|
+
}
|
|
1490
|
+
|
|
1491
|
+
// ── Step 3: Token Creation ──
|
|
1492
|
+
console.log('Step 3: Creating tokens...');
|
|
1493
|
+
const tokenA = harness.agentA.tokenStore.create({
|
|
1494
|
+
name: 'E2E-ForAgentB',
|
|
1495
|
+
permissions: 'public',
|
|
1496
|
+
expires: '1h',
|
|
1497
|
+
maxCalls: 10,
|
|
1498
|
+
allowedTopics: ['testing']
|
|
1499
|
+
});
|
|
1500
|
+
|
|
1501
|
+
if (tokenA.token && tokenA.token.startsWith('fed_')) {
|
|
1502
|
+
report.pass(3, 'Token Creation', `Token: ${tokenA.token.slice(0, 12)}...`);
|
|
1503
|
+
} else {
|
|
1504
|
+
report.fail(3, 'Token Creation', 'Token format invalid', {
|
|
1505
|
+
expected: 'fed_... format',
|
|
1506
|
+
actual: String(tokenA.token),
|
|
1507
|
+
severity: 'critical'
|
|
1508
|
+
});
|
|
1509
|
+
}
|
|
1510
|
+
|
|
1511
|
+
// ── Step 4: Contact Exchange ──
|
|
1512
|
+
console.log('Step 4: Exchanging invites...');
|
|
1513
|
+
const inviteUrl = `${harness.agentA.inviteBase}/${tokenA.token}`;
|
|
1514
|
+
const addResult = harness.agentB.tokenStore.addContact(inviteUrl, { name: 'AgentA' });
|
|
1515
|
+
const contacts = harness.agentB.tokenStore.listContacts();
|
|
1516
|
+
|
|
1517
|
+
if (addResult.success && contacts.length === 1) {
|
|
1518
|
+
report.pass(4, 'Contact Exchange', `Agent B added Agent A as contact`);
|
|
1519
|
+
} else {
|
|
1520
|
+
report.fail(4, 'Contact Exchange', 'Failed to add contact', {
|
|
1521
|
+
expected: 'Contact added successfully',
|
|
1522
|
+
actual: `success: ${addResult.success}, contacts: ${contacts.length}`,
|
|
1523
|
+
severity: 'high'
|
|
1524
|
+
});
|
|
1525
|
+
}
|
|
1526
|
+
|
|
1527
|
+
// ── Step 5: Inbound Call ──
|
|
1528
|
+
console.log('Step 5: Testing inbound call (B → A)...');
|
|
1529
|
+
const callRes = await httpPost(
|
|
1530
|
+
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
1531
|
+
{
|
|
1532
|
+
message: 'Hello from E2E Agent B',
|
|
1533
|
+
caller: { name: 'AgentB', owner: 'E2E Orchestrator' }
|
|
1534
|
+
},
|
|
1535
|
+
{ Authorization: `Bearer ${tokenA.token}` }
|
|
1536
|
+
);
|
|
1537
|
+
|
|
1538
|
+
if (callRes.statusCode === 200 && callRes.body.success && callRes.body.conversation_id) {
|
|
1539
|
+
report.pass(5, 'Inbound Call', `Conv: ${callRes.body.conversation_id}`);
|
|
1540
|
+
} else {
|
|
1541
|
+
report.fail(5, 'Inbound Call', `Status ${callRes.statusCode}`, {
|
|
1542
|
+
expected: '200 with success: true and conversation_id',
|
|
1543
|
+
actual: JSON.stringify(callRes.body).slice(0, 200),
|
|
1544
|
+
severity: 'critical'
|
|
1545
|
+
});
|
|
1546
|
+
}
|
|
1547
|
+
|
|
1548
|
+
// ── Step 6: Multi-Turn ──
|
|
1549
|
+
console.log('Step 6: Testing multi-turn conversation...');
|
|
1550
|
+
if (callRes.body.conversation_id) {
|
|
1551
|
+
const followUp = await httpPost(
|
|
1552
|
+
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
1553
|
+
{
|
|
1554
|
+
message: 'Follow-up from Agent B',
|
|
1555
|
+
conversation_id: callRes.body.conversation_id,
|
|
1556
|
+
caller: { name: 'AgentB' }
|
|
1557
|
+
},
|
|
1558
|
+
{ Authorization: `Bearer ${tokenA.token}` }
|
|
1559
|
+
);
|
|
1560
|
+
|
|
1561
|
+
if (followUp.statusCode === 200 && followUp.body.conversation_id === callRes.body.conversation_id) {
|
|
1562
|
+
report.pass(6, 'Multi-Turn', `Same conv ID, tokens remaining: ${followUp.body.tokens_remaining}`);
|
|
1563
|
+
} else {
|
|
1564
|
+
report.fail(6, 'Multi-Turn', 'Conversation ID mismatch or failure', {
|
|
1565
|
+
expected: `conv_id: ${callRes.body.conversation_id}`,
|
|
1566
|
+
actual: `conv_id: ${followUp.body.conversation_id}, status: ${followUp.statusCode}`,
|
|
1567
|
+
severity: 'high'
|
|
1568
|
+
});
|
|
1569
|
+
}
|
|
1570
|
+
} else {
|
|
1571
|
+
report.fail(6, 'Multi-Turn', 'Skipped — no conversation_id from step 5', { severity: 'high' });
|
|
1572
|
+
}
|
|
1573
|
+
|
|
1574
|
+
// ── Step 7: Error Cases ──
|
|
1575
|
+
console.log('Step 7: Testing error cases...');
|
|
1576
|
+
let errorsPassed = 0;
|
|
1577
|
+
const errorTotal = 3;
|
|
1578
|
+
|
|
1579
|
+
// 7a: No auth
|
|
1580
|
+
const noAuth = await httpPost(
|
|
1581
|
+
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
1582
|
+
{ message: 'No auth' }
|
|
1583
|
+
);
|
|
1584
|
+
if (noAuth.statusCode === 401 && noAuth.body.error === 'missing_token') errorsPassed++;
|
|
1585
|
+
|
|
1586
|
+
// 7b: Bad token
|
|
1587
|
+
const badToken = await httpPost(
|
|
1588
|
+
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
1589
|
+
{ message: 'Bad token' },
|
|
1590
|
+
{ Authorization: 'Bearer fed_totally_invalid' }
|
|
1591
|
+
);
|
|
1592
|
+
if (badToken.statusCode === 401 && badToken.body.error === 'unauthorized') errorsPassed++;
|
|
1593
|
+
|
|
1594
|
+
// 7c: Missing message
|
|
1595
|
+
const noMsg = await httpPost(
|
|
1596
|
+
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
1597
|
+
{},
|
|
1598
|
+
{ Authorization: `Bearer ${tokenA.token}` }
|
|
1599
|
+
);
|
|
1600
|
+
if (noMsg.statusCode === 400 && noMsg.body.error === 'missing_message') errorsPassed++;
|
|
1601
|
+
|
|
1602
|
+
if (errorsPassed === errorTotal) {
|
|
1603
|
+
report.pass(7, 'Error Cases', `All ${errorTotal} error cases correct`);
|
|
1604
|
+
} else {
|
|
1605
|
+
report.fail(7, 'Error Cases', `${errorsPassed}/${errorTotal} passed`, {
|
|
1606
|
+
expected: `All ${errorTotal} error cases return correct status/error`,
|
|
1607
|
+
actual: `noAuth: ${noAuth.statusCode}/${noAuth.body.error}, badToken: ${badToken.statusCode}/${badToken.body.error}, noMsg: ${noMsg.statusCode}/${noMsg.body.error}`,
|
|
1608
|
+
severity: 'high'
|
|
1609
|
+
});
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
// ── Step 8: Token Revocation ──
|
|
1613
|
+
console.log('Step 8: Testing token revocation...');
|
|
1614
|
+
harness.agentA.tokenStore.revoke(tokenA.record.id);
|
|
1615
|
+
const revokedCall = await httpPost(
|
|
1616
|
+
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
1617
|
+
{ message: 'After revoke', caller: { name: 'AgentB' } },
|
|
1618
|
+
{ Authorization: `Bearer ${tokenA.token}` }
|
|
1619
|
+
);
|
|
1620
|
+
|
|
1621
|
+
if (revokedCall.statusCode === 401) {
|
|
1622
|
+
report.pass(8, 'Token Revocation', 'Revoked token correctly rejected');
|
|
1623
|
+
} else {
|
|
1624
|
+
report.fail(8, 'Token Revocation', `Got ${revokedCall.statusCode} instead of 401`, {
|
|
1625
|
+
expected: '401 unauthorized',
|
|
1626
|
+
actual: `${revokedCall.statusCode}: ${JSON.stringify(revokedCall.body)}`,
|
|
1627
|
+
severity: 'critical',
|
|
1628
|
+
reproduction: `Revoke token then POST /invoke with same token`
|
|
1629
|
+
});
|
|
1630
|
+
}
|
|
1631
|
+
|
|
1632
|
+
} catch (err) {
|
|
1633
|
+
report.fail(0, 'Orchestrator Error', err.message, {
|
|
1634
|
+
expected: 'No uncaught errors',
|
|
1635
|
+
actual: err.stack,
|
|
1636
|
+
severity: 'critical'
|
|
1637
|
+
});
|
|
1638
|
+
} finally {
|
|
1639
|
+
if (harness) await harness.teardown();
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
// ── Output ──
|
|
1643
|
+
if (jsonOutput) {
|
|
1644
|
+
console.log(JSON.stringify({
|
|
1645
|
+
meta: report.meta,
|
|
1646
|
+
passed: report.passed,
|
|
1647
|
+
failed: report.failed,
|
|
1648
|
+
results: report.results,
|
|
1649
|
+
issues: report.issues,
|
|
1650
|
+
linearIssues: report.toLinearIssues()
|
|
1651
|
+
}, null, 2));
|
|
1652
|
+
} else {
|
|
1653
|
+
console.log('');
|
|
1654
|
+
console.log(report.toMarkdown());
|
|
1655
|
+
}
|
|
1656
|
+
|
|
1657
|
+
process.exit(report.failed > 0 ? 1 : 0);
|
|
1658
|
+
}
|
|
1659
|
+
|
|
1660
|
+
main();
|
|
1661
|
+
```
|
|
1662
|
+
|
|
1663
|
+
**Step 2: Run the orchestrator**
|
|
1664
|
+
|
|
1665
|
+
Run: `node test/e2e/orchestrate.js --verbose`
|
|
1666
|
+
Expected: All 8 steps PASS, exit code 0
|
|
1667
|
+
|
|
1668
|
+
**Step 3: Commit**
|
|
1669
|
+
|
|
1670
|
+
```bash
|
|
1671
|
+
git add test/e2e/orchestrate.js
|
|
1672
|
+
git commit -m "feat(e2e): add orchestrator — runs full E2E suite and generates report"
|
|
1673
|
+
```
|
|
1674
|
+
|
|
1675
|
+
---
|
|
1676
|
+
|
|
1677
|
+
## Phase 4: Integration with Test Runner
|
|
1678
|
+
|
|
1679
|
+
### Task 8: Register E2E tests with the existing test runner
|
|
1680
|
+
|
|
1681
|
+
**Files:**
|
|
1682
|
+
- Modify: `test/run.js` — add `--e2e` flag support
|
|
1683
|
+
- Create: `test/e2e/index.test.js` — wrapper that runs E2E tests via the standard runner
|
|
1684
|
+
|
|
1685
|
+
The existing test runner at `test/run.js` supports `--unit` and `--integration` flags. We add `--e2e` for the new tests.
|
|
1686
|
+
|
|
1687
|
+
**Step 1: Check current test runner structure**
|
|
1688
|
+
|
|
1689
|
+
Read `test/run.js` to understand how it discovers and runs test files. The runner globs `test/unit/*.test.js` and `test/integration/*.test.js`. We need it to also glob `test/e2e/*.test.js` when `--e2e` is passed (or when no filter is specified and `--all` is used).
|
|
1690
|
+
|
|
1691
|
+
**Step 2: Create E2E index wrapper**
|
|
1692
|
+
|
|
1693
|
+
```javascript
|
|
1694
|
+
// test/e2e/index.test.js
|
|
1695
|
+
/**
|
|
1696
|
+
* E2E Test Suite
|
|
1697
|
+
*
|
|
1698
|
+
* These tests require ephemeral ports and take longer than unit/integration tests.
|
|
1699
|
+
* Run with: node test/run.js --e2e
|
|
1700
|
+
* Or: node test/run.js --filter "E2E"
|
|
1701
|
+
*/
|
|
1702
|
+
module.exports = function (test, assert, helpers) {
|
|
1703
|
+
// Re-export individual E2E test files
|
|
1704
|
+
require('./env.test.js')(test, assert, helpers);
|
|
1705
|
+
require('./cli-runner.test.js')(test, assert, helpers);
|
|
1706
|
+
require('./two-server.test.js')(test, assert, helpers);
|
|
1707
|
+
require('./full-flow.test.js')(test, assert, helpers);
|
|
1708
|
+
require('./report.test.js')(test, assert, helpers);
|
|
1709
|
+
};
|
|
1710
|
+
```
|
|
1711
|
+
|
|
1712
|
+
**Step 3: Modify test runner to support `--e2e` flag**
|
|
1713
|
+
|
|
1714
|
+
In `test/run.js`, locate where test files are discovered and add:
|
|
1715
|
+
- When `--e2e` is passed: only run `test/e2e/*.test.js`
|
|
1716
|
+
- When `--all` or no category flag: include E2E tests
|
|
1717
|
+
- Default behavior (no flags): run unit + integration (NOT e2e, since they're slower)
|
|
1718
|
+
|
|
1719
|
+
**Step 4: Run all E2E tests**
|
|
1720
|
+
|
|
1721
|
+
Run: `node test/run.js --e2e --verbose`
|
|
1722
|
+
Expected: All E2E tests pass
|
|
1723
|
+
|
|
1724
|
+
**Step 5: Run full suite to verify no regressions**
|
|
1725
|
+
|
|
1726
|
+
Run: `npm test`
|
|
1727
|
+
Expected: All unit + integration tests still pass (E2E excluded by default)
|
|
1728
|
+
|
|
1729
|
+
**Step 6: Commit**
|
|
1730
|
+
|
|
1731
|
+
```bash
|
|
1732
|
+
git add test/e2e/index.test.js test/run.js
|
|
1733
|
+
git commit -m "feat(e2e): register E2E tests with test runner under --e2e flag"
|
|
1734
|
+
```
|
|
1735
|
+
|
|
1736
|
+
---
|
|
1737
|
+
|
|
1738
|
+
## Phase 5: Documentation
|
|
1739
|
+
|
|
1740
|
+
### Task 9: Add E2E section to protocol docs
|
|
1741
|
+
|
|
1742
|
+
**Files:**
|
|
1743
|
+
- Modify: `docs/protocol.md` — add "E2E Testing" section
|
|
1744
|
+
|
|
1745
|
+
**Step 1: Add E2E testing documentation section**
|
|
1746
|
+
|
|
1747
|
+
At the end of `docs/protocol.md`, add:
|
|
1748
|
+
|
|
1749
|
+
```markdown
|
|
1750
|
+
## E2E Testing
|
|
1751
|
+
|
|
1752
|
+
### Running the E2E Suite
|
|
1753
|
+
|
|
1754
|
+
```bash
|
|
1755
|
+
# Run E2E tests via test runner
|
|
1756
|
+
node test/run.js --e2e
|
|
1757
|
+
|
|
1758
|
+
# Run the orchestrator directly (verbose output)
|
|
1759
|
+
node test/e2e/orchestrate.js --verbose
|
|
1760
|
+
|
|
1761
|
+
# Get JSON report (for automated processing)
|
|
1762
|
+
node test/e2e/orchestrate.js --json
|
|
1763
|
+
```
|
|
1764
|
+
|
|
1765
|
+
### AI Agent Testing
|
|
1766
|
+
|
|
1767
|
+
The E2E prompt sequence at `docs/prompts/e2e-test-agent.md` provides step-by-step
|
|
1768
|
+
instructions for a Claude subagent to test a fresh a2acalling installation.
|
|
1769
|
+
|
|
1770
|
+
**Orchestrator workflow:**
|
|
1771
|
+
1. Spawn subagent with the prompt from `docs/prompts/e2e-test-agent.md`
|
|
1772
|
+
2. Subagent follows the 9-step sequence
|
|
1773
|
+
3. Subagent produces a markdown report
|
|
1774
|
+
4. Orchestrator reviews failures and creates Linear issues
|
|
1775
|
+
|
|
1776
|
+
### Architecture
|
|
1777
|
+
|
|
1778
|
+
The E2E system uses:
|
|
1779
|
+
- `test/e2e/env.js` — Isolated temp directories and port allocation
|
|
1780
|
+
- `test/e2e/cli-runner.js` — Structured CLI command execution
|
|
1781
|
+
- `test/e2e/two-server.js` — Two independent Express servers on ephemeral ports
|
|
1782
|
+
- `test/e2e/full-flow.test.js` — Cross-agent call tests
|
|
1783
|
+
- `test/e2e/report.js` — Markdown and Linear issue generation
|
|
1784
|
+
- `test/e2e/orchestrate.js` — Standalone orchestrator script
|
|
1785
|
+
```
|
|
1786
|
+
|
|
1787
|
+
**Step 2: Commit**
|
|
1788
|
+
|
|
1789
|
+
```bash
|
|
1790
|
+
git add docs/protocol.md docs/prompts/e2e-test-agent.md
|
|
1791
|
+
git commit -m "docs: add E2E testing section and agent prompt sequence"
|
|
1792
|
+
```
|
|
1793
|
+
|
|
1794
|
+
---
|
|
1795
|
+
|
|
1796
|
+
## Phase 6: Unified Summary Prompt & Output Template
|
|
1797
|
+
|
|
1798
|
+
### Context: Why This Matters
|
|
1799
|
+
|
|
1800
|
+
Today there are 3 separate summary prompts across the codebase:
|
|
1801
|
+
|
|
1802
|
+
1. **`server.js:generateSummary`** — Simple markdown template. No disclosure manifest, no goals, no collaboration context.
|
|
1803
|
+
2. **`openclaw-integration.js:buildSummaryPrompt`** — Strategic JSON. Has owner context from USER.md but no disclosure manifest, no collaboration score explanation, no conversation objective.
|
|
1804
|
+
3. **`claude-subagent.js:runClaudeSummary`** — Resumes Claude session. Relies entirely on implicit session memory — no explicit context at all.
|
|
1805
|
+
|
|
1806
|
+
**None of them include the disclosure manifest, collaboration score context, or conversation objective.** Without that, the summarizer can't assess whether the agent stayed within disclosure boundaries, whether objectives were met, or whether the collaboration score is justified.
|
|
1807
|
+
|
|
1808
|
+
### Design: Unified Summary Prompt
|
|
1809
|
+
|
|
1810
|
+
Both paths (OpenClaw orchestrator reading the transcript, or spawned subagent) use the same prompt and context. The prompt always includes:
|
|
1811
|
+
|
|
1812
|
+
1. **Conversation objective** — why this call was made
|
|
1813
|
+
2. **Full disclosure manifest for this tier** — topics, objectives, do_not_discuss, never_disclose
|
|
1814
|
+
3. **Collaboration state** — phase progression, overlap score, active threads, candidate collaborations
|
|
1815
|
+
4. **What overlap score means** — so the summarizer can validate it
|
|
1816
|
+
5. **Full transcript**
|
|
1817
|
+
|
|
1818
|
+
### Task 10: Create unified summary prompt builder
|
|
1819
|
+
|
|
1820
|
+
**Files:**
|
|
1821
|
+
- Create: `src/lib/summary-prompt.js`
|
|
1822
|
+
- Test: `test/unit/summary-prompt.test.js`
|
|
1823
|
+
|
|
1824
|
+
**Step 1: Write the failing test**
|
|
1825
|
+
|
|
1826
|
+
```javascript
|
|
1827
|
+
// test/unit/summary-prompt.test.js
|
|
1828
|
+
module.exports = function (test, assert, helpers) {
|
|
1829
|
+
|
|
1830
|
+
test('buildUnifiedSummaryPrompt includes all required sections', () => {
|
|
1831
|
+
delete require.cache[require.resolve('../../src/lib/summary-prompt')];
|
|
1832
|
+
const { buildUnifiedSummaryPrompt } = require('../../src/lib/summary-prompt');
|
|
1833
|
+
|
|
1834
|
+
const prompt = buildUnifiedSummaryPrompt({
|
|
1835
|
+
transcript: [
|
|
1836
|
+
{ direction: 'inbound', content: 'Hello from Golda' },
|
|
1837
|
+
{ direction: 'outbound', content: 'Welcome Golda!' }
|
|
1838
|
+
],
|
|
1839
|
+
callerInfo: { name: 'Golda Deluxe', owner: null, context: 'Authentication research' },
|
|
1840
|
+
conversationObjective: 'Explore AI authentication partnerships',
|
|
1841
|
+
disclosure: {
|
|
1842
|
+
topics: [
|
|
1843
|
+
{ topic: 'Market analysis', description: 'Tracking luxury goods indices' }
|
|
1844
|
+
],
|
|
1845
|
+
objectives: [
|
|
1846
|
+
{ objective: 'Find partners', description: 'Authentication network' }
|
|
1847
|
+
],
|
|
1848
|
+
doNotDiscuss: [
|
|
1849
|
+
{ topic: 'Portfolio valuations', reason: 'Share strategy not numbers' }
|
|
1850
|
+
],
|
|
1851
|
+
neverDisclose: ['Bank account numbers', 'Vault locations']
|
|
1852
|
+
},
|
|
1853
|
+
collaborationState: {
|
|
1854
|
+
phase: 'exploring',
|
|
1855
|
+
overlapScore: 0.45,
|
|
1856
|
+
activeThreads: ['authentication', 'ML models'],
|
|
1857
|
+
candidateCollaborations: ['joint pilot'],
|
|
1858
|
+
turnCount: 4,
|
|
1859
|
+
closeSignal: false
|
|
1860
|
+
},
|
|
1861
|
+
ownerContext: {
|
|
1862
|
+
agentName: 'claudebot',
|
|
1863
|
+
ownerName: 'Ben',
|
|
1864
|
+
goals: ['Build authentication network']
|
|
1865
|
+
}
|
|
1866
|
+
});
|
|
1867
|
+
|
|
1868
|
+
// Must include all context sections
|
|
1869
|
+
assert.includes(prompt, 'Explore AI authentication partnerships');
|
|
1870
|
+
assert.includes(prompt, 'Market analysis');
|
|
1871
|
+
assert.includes(prompt, 'Find partners');
|
|
1872
|
+
assert.includes(prompt, 'Portfolio valuations');
|
|
1873
|
+
assert.includes(prompt, 'Bank account numbers');
|
|
1874
|
+
assert.includes(prompt, 'exploring');
|
|
1875
|
+
assert.includes(prompt, '0.45');
|
|
1876
|
+
assert.includes(prompt, 'authentication');
|
|
1877
|
+
assert.includes(prompt, 'Hello from Golda');
|
|
1878
|
+
|
|
1879
|
+
// Must include the output schema
|
|
1880
|
+
assert.includes(prompt, 'headline');
|
|
1881
|
+
assert.includes(prompt, 'quickTake');
|
|
1882
|
+
assert.includes(prompt, 'disclosure');
|
|
1883
|
+
assert.includes(prompt, 'compliance');
|
|
1884
|
+
assert.includes(prompt, 'objectives');
|
|
1885
|
+
});
|
|
1886
|
+
|
|
1887
|
+
test('buildUnifiedSummaryPrompt handles minimal input gracefully', () => {
|
|
1888
|
+
delete require.cache[require.resolve('../../src/lib/summary-prompt')];
|
|
1889
|
+
const { buildUnifiedSummaryPrompt } = require('../../src/lib/summary-prompt');
|
|
1890
|
+
|
|
1891
|
+
const prompt = buildUnifiedSummaryPrompt({
|
|
1892
|
+
transcript: [
|
|
1893
|
+
{ direction: 'inbound', content: 'Hi' },
|
|
1894
|
+
{ direction: 'outbound', content: 'Hello' }
|
|
1895
|
+
],
|
|
1896
|
+
callerInfo: { name: 'Unknown' }
|
|
1897
|
+
});
|
|
1898
|
+
|
|
1899
|
+
assert.includes(prompt, 'Hi');
|
|
1900
|
+
assert.includes(prompt, 'Unknown');
|
|
1901
|
+
// Should still have the output schema even without optional sections
|
|
1902
|
+
assert.includes(prompt, 'headline');
|
|
1903
|
+
});
|
|
1904
|
+
};
|
|
1905
|
+
```
|
|
1906
|
+
|
|
1907
|
+
**Step 2: Run test to verify it fails**
|
|
1908
|
+
|
|
1909
|
+
Run: `node test/run.js --filter "buildUnifiedSummaryPrompt"`
|
|
1910
|
+
Expected: FAIL — module not found
|
|
1911
|
+
|
|
1912
|
+
**Step 3: Write the implementation**
|
|
1913
|
+
|
|
1914
|
+
```javascript
|
|
1915
|
+
// src/lib/summary-prompt.js
|
|
1916
|
+
/**
|
|
1917
|
+
* Unified Summary Prompt Builder
|
|
1918
|
+
*
|
|
1919
|
+
* Builds a comprehensive summary prompt that includes all context
|
|
1920
|
+
* needed for accurate, auditable conversation summaries:
|
|
1921
|
+
*
|
|
1922
|
+
* - Conversation objective (why the call happened)
|
|
1923
|
+
* - Disclosure manifest (what's in scope for this tier)
|
|
1924
|
+
* - Collaboration state (phase, overlap score, threads)
|
|
1925
|
+
* - Full transcript
|
|
1926
|
+
* - Owner context
|
|
1927
|
+
*
|
|
1928
|
+
* Used by both OpenClaw and spawned-agent summary paths.
|
|
1929
|
+
*/
|
|
1930
|
+
|
|
1931
|
+
/**
|
|
1932
|
+
* Build a unified summary prompt with full context.
|
|
1933
|
+
*
|
|
1934
|
+
* @param {object} options
|
|
1935
|
+
* @param {Array} options.transcript - [{direction, content}]
|
|
1936
|
+
* @param {object} options.callerInfo - {name, owner, context}
|
|
1937
|
+
* @param {string} [options.conversationObjective] - Why this call was made
|
|
1938
|
+
* @param {object} [options.disclosure] - {topics, objectives, doNotDiscuss, neverDisclose}
|
|
1939
|
+
* @param {object} [options.collaborationState] - {phase, overlapScore, activeThreads, ...}
|
|
1940
|
+
* @param {object} [options.ownerContext] - {agentName, ownerName, goals}
|
|
1941
|
+
* @returns {string} The complete prompt
|
|
1942
|
+
*/
|
|
1943
|
+
function buildUnifiedSummaryPrompt(options = {}) {
|
|
1944
|
+
const {
|
|
1945
|
+
transcript = [],
|
|
1946
|
+
callerInfo = {},
|
|
1947
|
+
conversationObjective,
|
|
1948
|
+
disclosure,
|
|
1949
|
+
collaborationState,
|
|
1950
|
+
ownerContext = {}
|
|
1951
|
+
} = options;
|
|
1952
|
+
|
|
1953
|
+
const sections = [];
|
|
1954
|
+
|
|
1955
|
+
// ── Header ──
|
|
1956
|
+
sections.push(`You just finished an A2A agent-to-agent call. Summarize it for your owner.
|
|
1957
|
+
|
|
1958
|
+
Your tone: friendly, clear, and genuinely helpful. Lead with what matters most.
|
|
1959
|
+
Write like you're briefing a smart friend — not filing a report.`);
|
|
1960
|
+
|
|
1961
|
+
// ── Conversation Objective ──
|
|
1962
|
+
if (conversationObjective) {
|
|
1963
|
+
sections.push(`## Why This Call Happened
|
|
1964
|
+
${conversationObjective}`);
|
|
1965
|
+
}
|
|
1966
|
+
|
|
1967
|
+
// ── Owner Context ──
|
|
1968
|
+
if (ownerContext.agentName || ownerContext.ownerName || ownerContext.goals) {
|
|
1969
|
+
const parts = [];
|
|
1970
|
+
if (ownerContext.agentName) parts.push(`You are: ${ownerContext.agentName}`);
|
|
1971
|
+
if (ownerContext.ownerName) parts.push(`Your owner: ${ownerContext.ownerName}`);
|
|
1972
|
+
if (ownerContext.goals?.length) {
|
|
1973
|
+
parts.push(`Owner's current goals:\n${ownerContext.goals.map(g => `- ${g}`).join('\n')}`);
|
|
1974
|
+
}
|
|
1975
|
+
sections.push(`## Your Owner\n${parts.join('\n')}`);
|
|
1976
|
+
}
|
|
1977
|
+
|
|
1978
|
+
// ── Disclosure Manifest ──
|
|
1979
|
+
if (disclosure) {
|
|
1980
|
+
const discParts = [];
|
|
1981
|
+
|
|
1982
|
+
if (disclosure.topics?.length) {
|
|
1983
|
+
discParts.push('### Topics In Scope');
|
|
1984
|
+
for (const t of disclosure.topics) {
|
|
1985
|
+
discParts.push(`- **${t.topic}**: ${t.description}`);
|
|
1986
|
+
}
|
|
1987
|
+
}
|
|
1988
|
+
|
|
1989
|
+
if (disclosure.objectives?.length) {
|
|
1990
|
+
discParts.push('\n### Conversation Objectives');
|
|
1991
|
+
for (const o of disclosure.objectives) {
|
|
1992
|
+
const label = o.objective || o.topic;
|
|
1993
|
+
discParts.push(`- **${label}**: ${o.description}`);
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1996
|
+
|
|
1997
|
+
if (disclosure.doNotDiscuss?.length) {
|
|
1998
|
+
discParts.push('\n### Do Not Discuss (Deflect These)');
|
|
1999
|
+
for (const d of disclosure.doNotDiscuss) {
|
|
2000
|
+
discParts.push(`- **${d.topic}**: ${d.reason}`);
|
|
2001
|
+
}
|
|
2002
|
+
}
|
|
2003
|
+
|
|
2004
|
+
if (disclosure.neverDisclose?.length) {
|
|
2005
|
+
discParts.push('\n### Never Disclose (Hard Blocks)');
|
|
2006
|
+
for (const n of disclosure.neverDisclose) {
|
|
2007
|
+
discParts.push(`- ${n}`);
|
|
2008
|
+
}
|
|
2009
|
+
}
|
|
2010
|
+
|
|
2011
|
+
sections.push(`## Disclosure Boundaries\nThese are the rules your agent operated under. Check whether they were followed.\n\n${discParts.join('\n')}`);
|
|
2012
|
+
}
|
|
2013
|
+
|
|
2014
|
+
// ── Collaboration State ──
|
|
2015
|
+
if (collaborationState) {
|
|
2016
|
+
const cs = collaborationState;
|
|
2017
|
+
sections.push(`## Collaboration State at End of Call
|
|
2018
|
+
- **Phase:** ${cs.phase || 'unknown'} (handshake -> exploring -> deepening -> converging -> close)
|
|
2019
|
+
- **Overlap Score:** ${cs.overlapScore != null ? cs.overlapScore.toFixed(2) : 'unknown'}/1.00
|
|
2020
|
+
- **Turn Count:** ${cs.turnCount || 'unknown'}
|
|
2021
|
+
- **Active Threads:** ${cs.activeThreads?.length ? cs.activeThreads.join(', ') : 'none identified'}
|
|
2022
|
+
- **Candidate Collaborations:** ${cs.candidateCollaborations?.length ? cs.candidateCollaborations.join(', ') : 'none yet'}
|
|
2023
|
+
- **Close Signal:** ${cs.closeSignal ? 'yes' : 'no'}
|
|
2024
|
+
|
|
2025
|
+
### What Overlap Score Means
|
|
2026
|
+
- 0.00–0.30: Minimal alignment — different domains, graceful mismatch expected
|
|
2027
|
+
- 0.30–0.60: Moderate — some shared interests, worth exploring
|
|
2028
|
+
- 0.60–0.80: Strong — clear mutual value, specific opportunities emerging
|
|
2029
|
+
- 0.80–1.00: Deep alignment — ready for concrete collaboration`);
|
|
2030
|
+
}
|
|
2031
|
+
|
|
2032
|
+
// ── Transcript ──
|
|
2033
|
+
const callerLabel = callerInfo.name || 'Caller';
|
|
2034
|
+
const messageText = transcript.map(m => {
|
|
2035
|
+
const role = m.direction === 'inbound' ? `[${callerLabel}]` : '[You]';
|
|
2036
|
+
return `${role}: ${m.content}`;
|
|
2037
|
+
}).join('\n\n');
|
|
2038
|
+
|
|
2039
|
+
sections.push(`## Caller
|
|
2040
|
+
${callerInfo.name ? `**Name:** ${callerInfo.name}` : 'Unknown caller'}
|
|
2041
|
+
${callerInfo.owner ? `**Represents:** ${callerInfo.owner}` : ''}
|
|
2042
|
+
${callerInfo.context ? `**Context:** ${callerInfo.context}` : ''}`);
|
|
2043
|
+
|
|
2044
|
+
sections.push(`## Full Transcript\n${messageText}`);
|
|
2045
|
+
|
|
2046
|
+
// ── Output Instructions ──
|
|
2047
|
+
sections.push(`## Your Task
|
|
2048
|
+
|
|
2049
|
+
Summarize this call. Return valid JSON matching this exact schema:
|
|
2050
|
+
|
|
2051
|
+
{
|
|
2052
|
+
"headline": "One sentence — the single most important takeaway for the owner",
|
|
2053
|
+
|
|
2054
|
+
"vibe": "productive | exploratory | mismatch | guarded | breakthrough",
|
|
2055
|
+
|
|
2056
|
+
"quickTake": [
|
|
2057
|
+
"Most important discovery or outcome",
|
|
2058
|
+
"Key opportunity or concern",
|
|
2059
|
+
"Recommended immediate action"
|
|
2060
|
+
],
|
|
2061
|
+
|
|
2062
|
+
"who": {
|
|
2063
|
+
"name": "Caller name",
|
|
2064
|
+
"represents": "Who they work for or represent",
|
|
2065
|
+
"keyFacts": ["Notable fact 1", "Notable fact 2"]
|
|
2066
|
+
},
|
|
2067
|
+
|
|
2068
|
+
"collaboration": {
|
|
2069
|
+
"score": 0.00,
|
|
2070
|
+
"scoreJustification": "Why this score — what aligned, what didn't",
|
|
2071
|
+
"rating": "HIGH | MEDIUM | LOW",
|
|
2072
|
+
"opportunities": ["Specific opportunity with details"]
|
|
2073
|
+
},
|
|
2074
|
+
|
|
2075
|
+
"exchange": {
|
|
2076
|
+
"weGot": ["Info or value we received"],
|
|
2077
|
+
"weGave": ["Info or value we shared"],
|
|
2078
|
+
"balance": "favorable | even | unfavorable"
|
|
2079
|
+
},
|
|
2080
|
+
|
|
2081
|
+
"disclosure": {
|
|
2082
|
+
"compliance": "clean | minor_concern | violation",
|
|
2083
|
+
"topicsCovered": ["In-scope topics that were discussed"],
|
|
2084
|
+
"topicsAvoided": ["Topics that were properly deflected"],
|
|
2085
|
+
"concerns": ["Any info shared that shouldn't have been, or empty array"]
|
|
2086
|
+
},
|
|
2087
|
+
|
|
2088
|
+
"objectives": {
|
|
2089
|
+
"achieved": ["Objectives that were met"],
|
|
2090
|
+
"partiallyAchieved": ["Objectives with some progress"],
|
|
2091
|
+
"notAchieved": ["Objectives not addressed"]
|
|
2092
|
+
},
|
|
2093
|
+
|
|
2094
|
+
"nextSteps": [
|
|
2095
|
+
"Specific actionable follow-up 1",
|
|
2096
|
+
"Specific actionable follow-up 2"
|
|
2097
|
+
],
|
|
2098
|
+
|
|
2099
|
+
"trust": {
|
|
2100
|
+
"level": "maintain | increase | decrease | revoke",
|
|
2101
|
+
"reasoning": "One sentence — why this trust recommendation"
|
|
2102
|
+
},
|
|
2103
|
+
|
|
2104
|
+
"assessment": "One sentence — strategic value judgment for the owner"
|
|
2105
|
+
}
|
|
2106
|
+
|
|
2107
|
+
Important:
|
|
2108
|
+
- Validate the collaboration score — does it match what actually happened in the conversation?
|
|
2109
|
+
- Check disclosure compliance — was any never_disclose or do_not_discuss info leaked?
|
|
2110
|
+
- Be honest about objectives — don't inflate partial progress into "achieved"
|
|
2111
|
+
- quickTake should be genuinely useful, not generic platitudes
|
|
2112
|
+
|
|
2113
|
+
JSON:`);
|
|
2114
|
+
|
|
2115
|
+
return sections.join('\n\n');
|
|
2116
|
+
}
|
|
2117
|
+
|
|
2118
|
+
module.exports = { buildUnifiedSummaryPrompt };
|
|
2119
|
+
```
|
|
2120
|
+
|
|
2121
|
+
**Step 4: Run test to verify it passes**
|
|
2122
|
+
|
|
2123
|
+
Run: `node test/run.js --filter "buildUnifiedSummaryPrompt"`
|
|
2124
|
+
Expected: PASS (both tests)
|
|
2125
|
+
|
|
2126
|
+
**Step 5: Commit**
|
|
2127
|
+
|
|
2128
|
+
```bash
|
|
2129
|
+
git add src/lib/summary-prompt.js test/unit/summary-prompt.test.js
|
|
2130
|
+
git commit -m "feat: add unified summary prompt with disclosure + collaboration context"
|
|
2131
|
+
```
|
|
2132
|
+
|
|
2133
|
+
---
|
|
2134
|
+
|
|
2135
|
+
### Task 11: Create human-readable summary formatter
|
|
2136
|
+
|
|
2137
|
+
**Files:**
|
|
2138
|
+
- Create: `src/lib/summary-formatter.js`
|
|
2139
|
+
- Test: `test/unit/summary-formatter.test.js`
|
|
2140
|
+
|
|
2141
|
+
This takes the JSON output from the summary prompt and renders it as
|
|
2142
|
+
the owner-facing human-readable markdown. Important info at the top,
|
|
2143
|
+
details below, bulleted, scannable.
|
|
2144
|
+
|
|
2145
|
+
**Step 1: Write the failing test**
|
|
2146
|
+
|
|
2147
|
+
```javascript
|
|
2148
|
+
// test/unit/summary-formatter.test.js
|
|
2149
|
+
module.exports = function (test, assert, helpers) {
|
|
2150
|
+
|
|
2151
|
+
test('formatSummary renders headline and quick take at top', () => {
|
|
2152
|
+
delete require.cache[require.resolve('../../src/lib/summary-formatter')];
|
|
2153
|
+
const { formatSummary } = require('../../src/lib/summary-formatter');
|
|
2154
|
+
|
|
2155
|
+
const md = formatSummary({
|
|
2156
|
+
headline: 'Golda has a real authentication pipeline we could plug into',
|
|
2157
|
+
vibe: 'productive',
|
|
2158
|
+
quickTake: [
|
|
2159
|
+
'They have 50+ luxury brands already using their verification system',
|
|
2160
|
+
'Clear fit with our ML capabilities — they need exactly what we build',
|
|
2161
|
+
'Schedule a follow-up to scope a pilot project'
|
|
2162
|
+
],
|
|
2163
|
+
who: {
|
|
2164
|
+
name: 'Golda Deluxe',
|
|
2165
|
+
represents: 'Luxury goods authentication network',
|
|
2166
|
+
keyFacts: ['400+ verified items monthly', 'Looking for ML partner']
|
|
2167
|
+
},
|
|
2168
|
+
collaboration: {
|
|
2169
|
+
score: 0.72,
|
|
2170
|
+
scoreJustification: 'Strong alignment on authentication tech, different domains create complementary value',
|
|
2171
|
+
rating: 'HIGH',
|
|
2172
|
+
opportunities: ['Joint authentication pilot', 'Shared training data pipeline']
|
|
2173
|
+
},
|
|
2174
|
+
exchange: {
|
|
2175
|
+
weGot: ['Details on their verification workflow', 'Access to sample dataset offer'],
|
|
2176
|
+
weGave: ['Overview of our ML capabilities', 'Rough timeline for integration'],
|
|
2177
|
+
balance: 'even'
|
|
2178
|
+
},
|
|
2179
|
+
disclosure: {
|
|
2180
|
+
compliance: 'clean',
|
|
2181
|
+
topicsCovered: ['Market analysis', 'Authentication tech'],
|
|
2182
|
+
topicsAvoided: ['Portfolio valuations'],
|
|
2183
|
+
concerns: []
|
|
2184
|
+
},
|
|
2185
|
+
objectives: {
|
|
2186
|
+
achieved: ['Identified partnership opportunity'],
|
|
2187
|
+
partiallyAchieved: ['Scoped technical requirements'],
|
|
2188
|
+
notAchieved: []
|
|
2189
|
+
},
|
|
2190
|
+
nextSteps: [
|
|
2191
|
+
'Send Golda our ML capabilities one-pager by Friday',
|
|
2192
|
+
'Schedule 30-min technical deep-dive next week'
|
|
2193
|
+
],
|
|
2194
|
+
trust: {
|
|
2195
|
+
level: 'increase',
|
|
2196
|
+
reasoning: 'Genuine expertise, transparent about needs, no red flags'
|
|
2197
|
+
},
|
|
2198
|
+
assessment: 'High-value connection — move fast on the pilot before they find another ML partner'
|
|
2199
|
+
});
|
|
2200
|
+
|
|
2201
|
+
// Headline should be at the very top
|
|
2202
|
+
const headlinePos = md.indexOf('Golda has a real authentication pipeline');
|
|
2203
|
+
const quickTakePos = md.indexOf('Quick Take');
|
|
2204
|
+
const detailsPos = md.indexOf('Details');
|
|
2205
|
+
assert.ok(headlinePos < quickTakePos, 'Headline before quick take');
|
|
2206
|
+
assert.ok(quickTakePos < detailsPos, 'Quick take before details');
|
|
2207
|
+
|
|
2208
|
+
// Key content present
|
|
2209
|
+
assert.includes(md, 'productive');
|
|
2210
|
+
assert.includes(md, '50+ luxury brands');
|
|
2211
|
+
assert.includes(md, 'HIGH');
|
|
2212
|
+
assert.includes(md, '0.72');
|
|
2213
|
+
assert.includes(md, 'Send Golda');
|
|
2214
|
+
assert.includes(md, 'clean');
|
|
2215
|
+
assert.includes(md, 'increase');
|
|
2216
|
+
assert.includes(md, 'move fast on the pilot');
|
|
2217
|
+
});
|
|
2218
|
+
|
|
2219
|
+
test('formatSummary handles mismatch/low-overlap gracefully', () => {
|
|
2220
|
+
delete require.cache[require.resolve('../../src/lib/summary-formatter')];
|
|
2221
|
+
const { formatSummary } = require('../../src/lib/summary-formatter');
|
|
2222
|
+
|
|
2223
|
+
const md = formatSummary({
|
|
2224
|
+
headline: 'Interesting person, but not much overlap with what we do right now',
|
|
2225
|
+
vibe: 'mismatch',
|
|
2226
|
+
quickTake: [
|
|
2227
|
+
'Bramble works in regenerative farming — different world from ours',
|
|
2228
|
+
'Possible long-term connection around data infrastructure',
|
|
2229
|
+
'No immediate follow-up needed — keep the door open'
|
|
2230
|
+
],
|
|
2231
|
+
who: {
|
|
2232
|
+
name: 'Bramble Voss',
|
|
2233
|
+
represents: 'Josefina Araya — regenerative farmer in Costa Rica',
|
|
2234
|
+
keyFacts: ['Heritage seed library with 400+ varieties']
|
|
2235
|
+
},
|
|
2236
|
+
collaboration: {
|
|
2237
|
+
score: 0.18,
|
|
2238
|
+
scoreJustification: 'Almost no topic overlap — farming and AI agent protocols have little intersection',
|
|
2239
|
+
rating: 'LOW',
|
|
2240
|
+
opportunities: []
|
|
2241
|
+
},
|
|
2242
|
+
exchange: {
|
|
2243
|
+
weGot: ['Perspective on decentralized networks in non-tech context'],
|
|
2244
|
+
weGave: ['Brief overview of A2A protocol'],
|
|
2245
|
+
balance: 'even'
|
|
2246
|
+
},
|
|
2247
|
+
disclosure: {
|
|
2248
|
+
compliance: 'clean',
|
|
2249
|
+
topicsCovered: ['General chat'],
|
|
2250
|
+
topicsAvoided: [],
|
|
2251
|
+
concerns: []
|
|
2252
|
+
},
|
|
2253
|
+
objectives: {
|
|
2254
|
+
achieved: [],
|
|
2255
|
+
partiallyAchieved: [],
|
|
2256
|
+
notAchieved: ['Find authentication partners']
|
|
2257
|
+
},
|
|
2258
|
+
nextSteps: [],
|
|
2259
|
+
trust: {
|
|
2260
|
+
level: 'maintain',
|
|
2261
|
+
reasoning: 'Pleasant conversation, no concerns, just not a fit right now'
|
|
2262
|
+
},
|
|
2263
|
+
assessment: 'Good call but low strategic value — no action needed'
|
|
2264
|
+
});
|
|
2265
|
+
|
|
2266
|
+
assert.includes(md, 'mismatch');
|
|
2267
|
+
assert.includes(md, '0.18');
|
|
2268
|
+
assert.includes(md, 'LOW');
|
|
2269
|
+
assert.includes(md, 'No immediate follow-up');
|
|
2270
|
+
});
|
|
2271
|
+
|
|
2272
|
+
test('formatSummary flags disclosure violations prominently', () => {
|
|
2273
|
+
delete require.cache[require.resolve('../../src/lib/summary-formatter')];
|
|
2274
|
+
const { formatSummary } = require('../../src/lib/summary-formatter');
|
|
2275
|
+
|
|
2276
|
+
const md = formatSummary({
|
|
2277
|
+
headline: 'Call went fine but we may have over-shared on financials',
|
|
2278
|
+
vibe: 'guarded',
|
|
2279
|
+
quickTake: [
|
|
2280
|
+
'Caller was probing for specific numbers',
|
|
2281
|
+
'We deflected most questions but slipped on portfolio range',
|
|
2282
|
+
'Review disclosure boundaries for financial topics'
|
|
2283
|
+
],
|
|
2284
|
+
who: { name: 'Probe Agent', represents: 'Unknown', keyFacts: [] },
|
|
2285
|
+
collaboration: {
|
|
2286
|
+
score: 0.3, scoreJustification: 'Moderate interest but extractive pattern',
|
|
2287
|
+
rating: 'LOW', opportunities: []
|
|
2288
|
+
},
|
|
2289
|
+
exchange: {
|
|
2290
|
+
weGot: ['Very little — mostly questions'],
|
|
2291
|
+
weGave: ['Portfolio range estimate', 'General strategy details'],
|
|
2292
|
+
balance: 'unfavorable'
|
|
2293
|
+
},
|
|
2294
|
+
disclosure: {
|
|
2295
|
+
compliance: 'minor_concern',
|
|
2296
|
+
topicsCovered: ['Market analysis'],
|
|
2297
|
+
topicsAvoided: ['Bank account numbers'],
|
|
2298
|
+
concerns: ['Shared approximate portfolio range — should have been deflected']
|
|
2299
|
+
},
|
|
2300
|
+
objectives: { achieved: [], partiallyAchieved: [], notAchieved: ['Grow network'] },
|
|
2301
|
+
nextSteps: ['Review disclosure rules for financial topics'],
|
|
2302
|
+
trust: { level: 'decrease', reasoning: 'Extractive questioning pattern' },
|
|
2303
|
+
assessment: 'Low value call with a disclosure slip — tighten boundaries'
|
|
2304
|
+
});
|
|
2305
|
+
|
|
2306
|
+
// Disclosure concerns should be prominent
|
|
2307
|
+
assert.includes(md, 'minor_concern');
|
|
2308
|
+
assert.includes(md, 'approximate portfolio range');
|
|
2309
|
+
assert.includes(md, 'decrease');
|
|
2310
|
+
assert.includes(md, 'unfavorable');
|
|
2311
|
+
});
|
|
2312
|
+
};
|
|
2313
|
+
```
|
|
2314
|
+
|
|
2315
|
+
**Step 2: Run test to verify it fails**
|
|
2316
|
+
|
|
2317
|
+
Run: `node test/run.js --filter "formatSummary"`
|
|
2318
|
+
Expected: FAIL — module not found
|
|
2319
|
+
|
|
2320
|
+
**Step 3: Write the implementation**
|
|
2321
|
+
|
|
2322
|
+
```javascript
|
|
2323
|
+
// src/lib/summary-formatter.js
|
|
2324
|
+
/**
|
|
2325
|
+
* Summary Formatter
|
|
2326
|
+
*
|
|
2327
|
+
* Renders the structured JSON summary into a human-readable markdown
|
|
2328
|
+
* format. Designed to be scannable, upbeat, and genuinely useful.
|
|
2329
|
+
*
|
|
2330
|
+
* Layout: most important info at the top, details below.
|
|
2331
|
+
*
|
|
2332
|
+
* 1. Headline (one sentence — the takeaway)
|
|
2333
|
+
* 2. Quick Take (3 bullets — what happened, what to do)
|
|
2334
|
+
* 3. Collaboration score + rating
|
|
2335
|
+
* 4. Next Steps (actionable checklist)
|
|
2336
|
+
* 5. --- separator ---
|
|
2337
|
+
* 6. Details: who, exchange, disclosure, objectives, trust
|
|
2338
|
+
*/
|
|
2339
|
+
|
|
2340
|
+
const VIBE_LABELS = {
|
|
2341
|
+
productive: 'Productive call',
|
|
2342
|
+
exploratory: 'Exploratory — still feeling things out',
|
|
2343
|
+
mismatch: 'Friendly but not much overlap',
|
|
2344
|
+
guarded: 'Guarded — worth reviewing',
|
|
2345
|
+
breakthrough: 'Great connection — real momentum'
|
|
2346
|
+
};
|
|
2347
|
+
|
|
2348
|
+
/**
|
|
2349
|
+
* Render a structured summary JSON object into human-readable markdown.
|
|
2350
|
+
*
|
|
2351
|
+
* @param {object} summary - The JSON output from the summary prompt
|
|
2352
|
+
* @returns {string} Formatted markdown
|
|
2353
|
+
*/
|
|
2354
|
+
function formatSummary(summary) {
|
|
2355
|
+
const lines = [];
|
|
2356
|
+
const s = summary;
|
|
2357
|
+
|
|
2358
|
+
// ── Headline ──
|
|
2359
|
+
lines.push(`# Call with ${s.who?.name || 'Unknown'}`);
|
|
2360
|
+
lines.push('');
|
|
2361
|
+
lines.push(`**${s.headline}**`);
|
|
2362
|
+
lines.push('');
|
|
2363
|
+
|
|
2364
|
+
// ── Vibe + Score one-liner ──
|
|
2365
|
+
const vibeLabel = VIBE_LABELS[s.vibe] || s.vibe;
|
|
2366
|
+
const scoreStr = s.collaboration?.score != null
|
|
2367
|
+
? ` | Overlap: ${s.collaboration.score.toFixed(2)}/1.00`
|
|
2368
|
+
: '';
|
|
2369
|
+
lines.push(`*${vibeLabel}${scoreStr}*`);
|
|
2370
|
+
lines.push('');
|
|
2371
|
+
|
|
2372
|
+
// ── Quick Take ──
|
|
2373
|
+
if (s.quickTake?.length) {
|
|
2374
|
+
lines.push('### Quick Take');
|
|
2375
|
+
for (const item of s.quickTake) {
|
|
2376
|
+
lines.push(`- ${item}`);
|
|
2377
|
+
}
|
|
2378
|
+
lines.push('');
|
|
2379
|
+
}
|
|
2380
|
+
|
|
2381
|
+
// ── Collaboration ──
|
|
2382
|
+
if (s.collaboration) {
|
|
2383
|
+
const c = s.collaboration;
|
|
2384
|
+
lines.push(`### Collaboration: ${c.rating || 'N/A'}`);
|
|
2385
|
+
if (c.scoreJustification) {
|
|
2386
|
+
lines.push(c.scoreJustification);
|
|
2387
|
+
}
|
|
2388
|
+
if (c.opportunities?.length) {
|
|
2389
|
+
lines.push('');
|
|
2390
|
+
for (const opp of c.opportunities) {
|
|
2391
|
+
lines.push(`- ${opp}`);
|
|
2392
|
+
}
|
|
2393
|
+
}
|
|
2394
|
+
lines.push('');
|
|
2395
|
+
}
|
|
2396
|
+
|
|
2397
|
+
// ── Next Steps ──
|
|
2398
|
+
if (s.nextSteps?.length) {
|
|
2399
|
+
lines.push('### Next Steps');
|
|
2400
|
+
for (const step of s.nextSteps) {
|
|
2401
|
+
lines.push(`- [ ] ${step}`);
|
|
2402
|
+
}
|
|
2403
|
+
lines.push('');
|
|
2404
|
+
}
|
|
2405
|
+
|
|
2406
|
+
// ── Separator ──
|
|
2407
|
+
lines.push('---');
|
|
2408
|
+
lines.push('');
|
|
2409
|
+
|
|
2410
|
+
// ── Details Section ──
|
|
2411
|
+
lines.push('### Details');
|
|
2412
|
+
lines.push('');
|
|
2413
|
+
|
|
2414
|
+
// Who
|
|
2415
|
+
if (s.who) {
|
|
2416
|
+
lines.push(`**Who:** ${s.who.name || 'Unknown'}${s.who.represents ? ` — ${s.who.represents}` : ''}`);
|
|
2417
|
+
if (s.who.keyFacts?.length) {
|
|
2418
|
+
for (const fact of s.who.keyFacts) {
|
|
2419
|
+
lines.push(`- ${fact}`);
|
|
2420
|
+
}
|
|
2421
|
+
}
|
|
2422
|
+
lines.push('');
|
|
2423
|
+
}
|
|
2424
|
+
|
|
2425
|
+
// Exchange
|
|
2426
|
+
if (s.exchange) {
|
|
2427
|
+
lines.push('**What We Exchanged**');
|
|
2428
|
+
if (s.exchange.weGot?.length) {
|
|
2429
|
+
lines.push(`- Got: ${s.exchange.weGot.join('; ')}`);
|
|
2430
|
+
}
|
|
2431
|
+
if (s.exchange.weGave?.length) {
|
|
2432
|
+
lines.push(`- Gave: ${s.exchange.weGave.join('; ')}`);
|
|
2433
|
+
}
|
|
2434
|
+
if (s.exchange.balance) {
|
|
2435
|
+
lines.push(`- Balance: ${s.exchange.balance}`);
|
|
2436
|
+
}
|
|
2437
|
+
lines.push('');
|
|
2438
|
+
}
|
|
2439
|
+
|
|
2440
|
+
// Disclosure
|
|
2441
|
+
if (s.disclosure) {
|
|
2442
|
+
const d = s.disclosure;
|
|
2443
|
+
const complianceLabel = d.compliance === 'clean' ? 'Clean — no issues'
|
|
2444
|
+
: d.compliance === 'minor_concern' ? 'Minor concern — review below'
|
|
2445
|
+
: d.compliance === 'violation' ? 'VIOLATION — action required'
|
|
2446
|
+
: d.compliance;
|
|
2447
|
+
|
|
2448
|
+
lines.push(`**Disclosure:** ${complianceLabel}`);
|
|
2449
|
+
if (d.topicsCovered?.length) {
|
|
2450
|
+
lines.push(`- Covered: ${d.topicsCovered.join(', ')}`);
|
|
2451
|
+
}
|
|
2452
|
+
if (d.topicsAvoided?.length) {
|
|
2453
|
+
lines.push(`- Properly avoided: ${d.topicsAvoided.join(', ')}`);
|
|
2454
|
+
}
|
|
2455
|
+
if (d.concerns?.length) {
|
|
2456
|
+
for (const concern of d.concerns) {
|
|
2457
|
+
lines.push(`- **Concern:** ${concern}`);
|
|
2458
|
+
}
|
|
2459
|
+
}
|
|
2460
|
+
lines.push('');
|
|
2461
|
+
}
|
|
2462
|
+
|
|
2463
|
+
// Objectives
|
|
2464
|
+
if (s.objectives) {
|
|
2465
|
+
const o = s.objectives;
|
|
2466
|
+
const parts = [];
|
|
2467
|
+
if (o.achieved?.length) parts.push(`Achieved: ${o.achieved.join(', ')}`);
|
|
2468
|
+
if (o.partiallyAchieved?.length) parts.push(`In progress: ${o.partiallyAchieved.join(', ')}`);
|
|
2469
|
+
if (o.notAchieved?.length) parts.push(`Not addressed: ${o.notAchieved.join(', ')}`);
|
|
2470
|
+
if (parts.length) {
|
|
2471
|
+
lines.push('**Objectives**');
|
|
2472
|
+
for (const p of parts) lines.push(`- ${p}`);
|
|
2473
|
+
lines.push('');
|
|
2474
|
+
}
|
|
2475
|
+
}
|
|
2476
|
+
|
|
2477
|
+
// Trust
|
|
2478
|
+
if (s.trust) {
|
|
2479
|
+
lines.push(`**Trust:** ${s.trust.level}${s.trust.reasoning ? ` — ${s.trust.reasoning}` : ''}`);
|
|
2480
|
+
lines.push('');
|
|
2481
|
+
}
|
|
2482
|
+
|
|
2483
|
+
// Assessment
|
|
2484
|
+
if (s.assessment) {
|
|
2485
|
+
lines.push(`**Bottom line:** ${s.assessment}`);
|
|
2486
|
+
}
|
|
2487
|
+
|
|
2488
|
+
return lines.join('\n');
|
|
2489
|
+
}
|
|
2490
|
+
|
|
2491
|
+
module.exports = { formatSummary, VIBE_LABELS };
|
|
2492
|
+
```
|
|
2493
|
+
|
|
2494
|
+
**Step 4: Run test to verify it passes**
|
|
2495
|
+
|
|
2496
|
+
Run: `node test/run.js --filter "formatSummary"`
|
|
2497
|
+
Expected: PASS (all 3 tests)
|
|
2498
|
+
|
|
2499
|
+
**Step 5: Commit**
|
|
2500
|
+
|
|
2501
|
+
```bash
|
|
2502
|
+
git add src/lib/summary-formatter.js test/unit/summary-formatter.test.js
|
|
2503
|
+
git commit -m "feat: add human-readable summary formatter — headline first, details below"
|
|
2504
|
+
```
|
|
2505
|
+
|
|
2506
|
+
---
|
|
2507
|
+
|
|
2508
|
+
### Task 12: Wire unified prompt into both summary paths
|
|
2509
|
+
|
|
2510
|
+
**Files:**
|
|
2511
|
+
- Modify: `src/server.js` — `generateSummary` uses `buildUnifiedSummaryPrompt`
|
|
2512
|
+
- Modify: `src/lib/conversation-driver.js` — `_buildSummarizer` uses `buildUnifiedSummaryPrompt`
|
|
2513
|
+
- Modify: `src/lib/openclaw-integration.js` — `buildSummaryPrompt` delegates to unified builder
|
|
2514
|
+
|
|
2515
|
+
**Step 1: Write integration tests for the wiring**
|
|
2516
|
+
|
|
2517
|
+
Add tests that verify the actual summary prompt (captured via mock handler)
|
|
2518
|
+
contains disclosure and collaboration context when called through the
|
|
2519
|
+
server route and conversation driver paths.
|
|
2520
|
+
|
|
2521
|
+
**Step 2: Modify `server.js:generateSummary`**
|
|
2522
|
+
|
|
2523
|
+
Replace the inline prompt construction with:
|
|
2524
|
+
```javascript
|
|
2525
|
+
const { buildUnifiedSummaryPrompt } = require('./lib/summary-prompt');
|
|
2526
|
+
|
|
2527
|
+
async function generateSummary(messages, callerInfo) {
|
|
2528
|
+
const disc = loadDisclosureForTier(callerInfo?.tier);
|
|
2529
|
+
const prompt = buildUnifiedSummaryPrompt({
|
|
2530
|
+
transcript: messages,
|
|
2531
|
+
callerInfo,
|
|
2532
|
+
conversationObjective: callerInfo?.context || 'Inbound call',
|
|
2533
|
+
disclosure: disc,
|
|
2534
|
+
collaborationState: callerInfo?.collaborationState,
|
|
2535
|
+
ownerContext: {
|
|
2536
|
+
agentName: agentContext.name,
|
|
2537
|
+
ownerName: agentContext.owner,
|
|
2538
|
+
goals: agentContext.goals
|
|
2539
|
+
}
|
|
2540
|
+
});
|
|
2541
|
+
// ... rest unchanged, pass prompt to runtime.summarize()
|
|
2542
|
+
}
|
|
2543
|
+
```
|
|
2544
|
+
|
|
2545
|
+
**Step 3: Modify `conversation-driver.js:_buildSummarizer`**
|
|
2546
|
+
|
|
2547
|
+
Replace the inline prompt with:
|
|
2548
|
+
```javascript
|
|
2549
|
+
const { buildUnifiedSummaryPrompt } = require('./summary-prompt');
|
|
2550
|
+
|
|
2551
|
+
// Inside _buildSummarizer():
|
|
2552
|
+
const prompt = buildUnifiedSummaryPrompt({
|
|
2553
|
+
transcript: messages,
|
|
2554
|
+
callerInfo: { name: agentContext.name, owner: agentContext.owner },
|
|
2555
|
+
conversationObjective: 'Outbound call — you initiated this.',
|
|
2556
|
+
disclosure: tierDisclosure,
|
|
2557
|
+
collaborationState: this._lastCollabState,
|
|
2558
|
+
ownerContext: this.ownerContext
|
|
2559
|
+
});
|
|
2560
|
+
```
|
|
2561
|
+
|
|
2562
|
+
**Step 4: Modify `openclaw-integration.js:buildSummaryPrompt`**
|
|
2563
|
+
|
|
2564
|
+
Delegate to `buildUnifiedSummaryPrompt` while preserving the owner context
|
|
2565
|
+
loading from USER.md:
|
|
2566
|
+
```javascript
|
|
2567
|
+
const { buildUnifiedSummaryPrompt } = require('./summary-prompt');
|
|
2568
|
+
|
|
2569
|
+
function buildSummaryPrompt(messages, ownerContext, callerInfo = {}) {
|
|
2570
|
+
return buildUnifiedSummaryPrompt({
|
|
2571
|
+
transcript: messages,
|
|
2572
|
+
callerInfo,
|
|
2573
|
+
conversationObjective: callerInfo?.context || 'A2A call',
|
|
2574
|
+
disclosure: callerInfo?.disclosure,
|
|
2575
|
+
collaborationState: callerInfo?.collaborationState,
|
|
2576
|
+
ownerContext
|
|
2577
|
+
});
|
|
2578
|
+
}
|
|
2579
|
+
```
|
|
2580
|
+
|
|
2581
|
+
**Step 5: Run existing tests to verify no regressions**
|
|
2582
|
+
|
|
2583
|
+
Run: `npm test`
|
|
2584
|
+
Expected: All existing tests pass
|
|
2585
|
+
|
|
2586
|
+
**Step 6: Commit**
|
|
2587
|
+
|
|
2588
|
+
```bash
|
|
2589
|
+
git add src/server.js src/lib/conversation-driver.js src/lib/openclaw-integration.js
|
|
2590
|
+
git commit -m "feat: wire unified summary prompt into all three summary paths"
|
|
2591
|
+
```
|
|
2592
|
+
|
|
2593
|
+
---
|
|
2594
|
+
|
|
2595
|
+
## Phase 7: 4-Profile Calling Tests with Summary Validation
|
|
2596
|
+
|
|
2597
|
+
### Task 13: Create 4th test profile — Cass Delacroix
|
|
2598
|
+
|
|
2599
|
+
**Files:**
|
|
2600
|
+
- Create: `test/profiles/cass-delacroix.js`
|
|
2601
|
+
|
|
2602
|
+
Cass fills the last gap in the tier/disclosure matrix:
|
|
2603
|
+
|
|
2604
|
+
| Profile | Tier | Disclosure | Domain | Owner |
|
|
2605
|
+
|---------|------|-----------|--------|-------|
|
|
2606
|
+
| Golda Deluxe | friends | public | Luxury goods, markets | null (unnamed) |
|
|
2607
|
+
| Nyx Meridian | public | minimal | DeSci, peer review | Dr. Sarai Okonkwo |
|
|
2608
|
+
| Bramble Voss | friends | public | Farming, seeds | Josefina Araya |
|
|
2609
|
+
| **Cass Delacroix** | **family** | **none** | **Letterpress, typography** | **Margaux Delacroix** |
|
|
2610
|
+
|
|
2611
|
+
**Step 1: Write the profile**
|
|
2612
|
+
|
|
2613
|
+
```javascript
|
|
2614
|
+
// test/profiles/cass-delacroix.js
|
|
2615
|
+
/**
|
|
2616
|
+
* Test Agent Profile: Cass Delacroix
|
|
2617
|
+
*
|
|
2618
|
+
* A letterpress printer, zine maker, and type design historian.
|
|
2619
|
+
* Tests the family tier with disclosure: none — the highest trust
|
|
2620
|
+
* combined with the most restrictive information sharing.
|
|
2621
|
+
*
|
|
2622
|
+
* ┌─────────────────────────────────────────────────────────┐
|
|
2623
|
+
* │ Agent: Cass Delacroix │
|
|
2624
|
+
* │ Owner: Margaux Delacroix │
|
|
2625
|
+
* │ Tier: family │
|
|
2626
|
+
* │ Style: Patient, meticulous, quietly passionate │
|
|
2627
|
+
* │ Disclosure: none │
|
|
2628
|
+
* └─────────────────────────────────────────────────────────┘
|
|
2629
|
+
*
|
|
2630
|
+
* DESIGN RATIONALE
|
|
2631
|
+
* ────────────────
|
|
2632
|
+
* Golda (friends/public): moderate overlap via provenance/authentication
|
|
2633
|
+
* Nyx (public/minimal): strong overlap via trust/verification protocols
|
|
2634
|
+
* Bramble (friends/public): minimal overlap, non-tech domain
|
|
2635
|
+
* Cass (family/none): ZERO overlap AND most restrictive disclosure
|
|
2636
|
+
*
|
|
2637
|
+
* This profile tests:
|
|
2638
|
+
* - Family tier (highest trust level — only profile testing this)
|
|
2639
|
+
* - Disclosure: none (most restrictive — system should not proactively share)
|
|
2640
|
+
* - Zero topic overlap with a typical tech/AI agent
|
|
2641
|
+
* - Named owner
|
|
2642
|
+
* - Whether summary correctly shows family-trust tone with zero disclosure
|
|
2643
|
+
* - Whether the system leaks restricted info under high-trust conditions
|
|
2644
|
+
*
|
|
2645
|
+
* REAL-WORLD INSPIRATION
|
|
2646
|
+
* ──────────────────────
|
|
2647
|
+
* Based on the letterpress revival community: small-shop printers who
|
|
2648
|
+
* combine traditional craft with design thinking. Think Arm Letterpress,
|
|
2649
|
+
* Hamilton Wood Type Museum, or the zine makers at Brooklyn's Printed
|
|
2650
|
+
* Matter. Margaux runs a print shop in Montreal that does custom type
|
|
2651
|
+
* design, artist book editions, and community zine workshops.
|
|
2652
|
+
*/
|
|
2653
|
+
|
|
2654
|
+
module.exports = {
|
|
2655
|
+
// ── Agent Identity ──────────────────────────────────────────────
|
|
2656
|
+
agent: {
|
|
2657
|
+
name: 'Cass Delacroix',
|
|
2658
|
+
owner: 'Margaux Delacroix',
|
|
2659
|
+
personality: 'Patient and meticulous. Can identify a typeface from across the room. ' +
|
|
2660
|
+
'Talks about ink viscosity and paper grain the way others talk about code quality. ' +
|
|
2661
|
+
'Believes typography is inherently political — who gets to set the type shapes the message. ' +
|
|
2662
|
+
'Quietly passionate, never pushy. Will happily spend 20 minutes explaining the difference ' +
|
|
2663
|
+
'between Garamond and Granjon. Distrusts anything printed on a laser printer.'
|
|
2664
|
+
},
|
|
2665
|
+
|
|
2666
|
+
// ── Token Configuration ─────────────────────────────────────────
|
|
2667
|
+
token: {
|
|
2668
|
+
tier: 'family', // highest trust — close friend
|
|
2669
|
+
disclosure: 'none', // most restrictive — system should not share proactively
|
|
2670
|
+
expires: '30d', // long-lived — trusted relationship
|
|
2671
|
+
maxCalls: 100, // generous limit
|
|
2672
|
+
notify: 'summary', // owner doesn't need every notification
|
|
2673
|
+
allowedTopics: [
|
|
2674
|
+
'chat',
|
|
2675
|
+
'calendar',
|
|
2676
|
+
'email',
|
|
2677
|
+
'search',
|
|
2678
|
+
'tools',
|
|
2679
|
+
'letterpress', // custom: letterpress printing
|
|
2680
|
+
'typography', // custom: type design and history
|
|
2681
|
+
'zine-culture', // custom: independent publishing
|
|
2682
|
+
'paper-making', // custom: handmade paper
|
|
2683
|
+
'book-arts' // custom: artist books and binding
|
|
2684
|
+
],
|
|
2685
|
+
allowedGoals: [
|
|
2686
|
+
'find-print-collaborators',
|
|
2687
|
+
'source-rare-type',
|
|
2688
|
+
'connect-zine-community',
|
|
2689
|
+
'document-print-techniques'
|
|
2690
|
+
],
|
|
2691
|
+
tierSettings: {
|
|
2692
|
+
responseStyle: 'thoughtful',
|
|
2693
|
+
maxResponseLength: 2000,
|
|
2694
|
+
allowFollowUp: true
|
|
2695
|
+
}
|
|
2696
|
+
},
|
|
2697
|
+
|
|
2698
|
+
// ── Disclosure Manifest ─────────────────────────────────────────
|
|
2699
|
+
manifest: {
|
|
2700
|
+
version: 2,
|
|
2701
|
+
personality_notes: 'Patient and meticulous. Letterpress printer and type design historian. ' +
|
|
2702
|
+
'Can identify typefaces at a glance. Believes typography is political. ' +
|
|
2703
|
+
'Quietly passionate, never pushy. Distrusts laser printers.',
|
|
2704
|
+
tiers: {
|
|
2705
|
+
public: {
|
|
2706
|
+
topics: [
|
|
2707
|
+
{ topic: 'Letterpress history', description: 'The craft from Gutenberg to the contemporary revival — wood type, metal type, photopolymer plates' },
|
|
2708
|
+
{ topic: 'Typography as design', description: 'How typeface choice shapes meaning — from broadsheets to album covers to protest signs' }
|
|
2709
|
+
],
|
|
2710
|
+
objectives: [
|
|
2711
|
+
{ objective: 'Zine community building', description: 'Connecting independent publishers and small-press makers across cities' },
|
|
2712
|
+
{ objective: 'Print education', description: 'Teaching letterpress to new generations — workshops, residencies, open studio days' }
|
|
2713
|
+
],
|
|
2714
|
+
do_not_discuss: [
|
|
2715
|
+
{ topic: 'Client commission details', reason: 'Redirect — suggest contacting the studio directly for custom work' },
|
|
2716
|
+
{ topic: 'Pricing for custom type', reason: 'Varies by project — not useful to discuss in abstract' }
|
|
2717
|
+
]
|
|
2718
|
+
},
|
|
2719
|
+
friends: {
|
|
2720
|
+
topics: [
|
|
2721
|
+
{ topic: 'Type design process', description: 'How Margaux designs new typefaces — from pencil sketches to digital outlines to metal casting' },
|
|
2722
|
+
{ topic: 'Rare type sourcing', description: 'Hunting for vintage wood and metal type at estate sales, closing print shops, and collector networks' }
|
|
2723
|
+
],
|
|
2724
|
+
objectives: [
|
|
2725
|
+
{ objective: 'Paper sourcing', description: 'Finding mills that still make cotton rag paper with proper tooth and weight' },
|
|
2726
|
+
{ objective: 'Exhibition planning', description: 'Upcoming show at the Montreal Museum of Fine Arts — printed ephemera collection' }
|
|
2727
|
+
],
|
|
2728
|
+
do_not_discuss: [
|
|
2729
|
+
{ topic: 'Unreleased typeface designs', reason: 'Share the process but not the specific letterforms until published' }
|
|
2730
|
+
]
|
|
2731
|
+
},
|
|
2732
|
+
family: {
|
|
2733
|
+
topics: [
|
|
2734
|
+
{ topic: 'Studio finances', description: 'Revenue model: custom commissions, workshop fees, artist edition sales, teaching stipends' },
|
|
2735
|
+
{ topic: 'The Garamond project', description: 'Secret passion project: cutting a new metal Garamond revival from original 16th century specimens' }
|
|
2736
|
+
],
|
|
2737
|
+
objectives: [
|
|
2738
|
+
{ objective: 'Studio succession', description: 'Training two apprentices to eventually run the shop independently' },
|
|
2739
|
+
{ objective: 'Archive digitization', description: 'Photographing and cataloging the entire type collection for preservation' }
|
|
2740
|
+
],
|
|
2741
|
+
do_not_discuss: []
|
|
2742
|
+
}
|
|
2743
|
+
},
|
|
2744
|
+
never_disclose: [
|
|
2745
|
+
'Client names without permission',
|
|
2746
|
+
'Typeface source files before release',
|
|
2747
|
+
'Apprentice personal information',
|
|
2748
|
+
'Studio security details',
|
|
2749
|
+
'Insurance and appraisal values of type collection'
|
|
2750
|
+
]
|
|
2751
|
+
},
|
|
2752
|
+
|
|
2753
|
+
// ── Call Scenarios ──────────────────────────────────────────────
|
|
2754
|
+
callScenarios: {
|
|
2755
|
+
// First contact — reaching out to any agent
|
|
2756
|
+
introduction: {
|
|
2757
|
+
message: "Hi there — Cass Delacroix, calling on behalf of Margaux Delacroix. " +
|
|
2758
|
+
"Margaux runs a letterpress studio in Montreal. We do custom type design, " +
|
|
2759
|
+
"artist book editions, and community print workshops. Margaux is always " +
|
|
2760
|
+
"looking to connect with people who care about craft and making things " +
|
|
2761
|
+
"with their hands. What does your world look like?",
|
|
2762
|
+
caller: {
|
|
2763
|
+
name: 'Cass Delacroix',
|
|
2764
|
+
owner: 'Margaux Delacroix',
|
|
2765
|
+
context: 'Letterpress studio — custom type design and community printing'
|
|
2766
|
+
}
|
|
2767
|
+
},
|
|
2768
|
+
|
|
2769
|
+
// Call to a tech agent (tests zero overlap)
|
|
2770
|
+
techAgentCall: {
|
|
2771
|
+
message: "Hey — Cass Delacroix here, for Margaux Delacroix. She runs a " +
|
|
2772
|
+
"letterpress print shop in Montreal. I know our worlds might not seem " +
|
|
2773
|
+
"like they overlap, but Margaux has been thinking about how independent " +
|
|
2774
|
+
"makers communicate and share resources across distances. Her printer " +
|
|
2775
|
+
"network is basically analog federation — each shop is independent but " +
|
|
2776
|
+
"they share techniques, lend type, and refer clients. She heard someone " +
|
|
2777
|
+
"is building something similar for digital agents and wanted to understand " +
|
|
2778
|
+
"the parallels. How does your system handle trust between strangers?",
|
|
2779
|
+
caller: {
|
|
2780
|
+
name: 'Cass Delacroix',
|
|
2781
|
+
owner: 'Margaux Delacroix',
|
|
2782
|
+
context: 'Exploring parallels between analog maker networks and digital agent federation'
|
|
2783
|
+
}
|
|
2784
|
+
},
|
|
2785
|
+
|
|
2786
|
+
// Deep craft conversation
|
|
2787
|
+
craftDeepDive: {
|
|
2788
|
+
message: "Let me tell you about setting type by hand. You pick up each letter " +
|
|
2789
|
+
"from the case — the capital letters are in the upper case, lowercase in the " +
|
|
2790
|
+
"lower case, that's literally where the terms come from. You compose them " +
|
|
2791
|
+
"backwards in a composing stick, letter by letter, word by word. Then you " +
|
|
2792
|
+
"lock the form, ink the type, lay the paper, and pull the press. Every single " +
|
|
2793
|
+
"impression is slightly different because the pressure, ink coverage, and paper " +
|
|
2794
|
+
"texture vary. That's not a bug, it's the whole point. Each print is an " +
|
|
2795
|
+
"original. What in your world has that quality — where the imperfection " +
|
|
2796
|
+
"is the value?",
|
|
2797
|
+
caller: {
|
|
2798
|
+
name: 'Cass Delacroix',
|
|
2799
|
+
owner: 'Margaux Delacroix',
|
|
2800
|
+
context: 'Philosophy of craft and imperfection'
|
|
2801
|
+
}
|
|
2802
|
+
},
|
|
2803
|
+
|
|
2804
|
+
// The Garamond project (family-tier topic — tests disclosure:none)
|
|
2805
|
+
garamondProject: {
|
|
2806
|
+
message: "I want to tell you about something Margaux has been working on " +
|
|
2807
|
+
"quietly for three years. She's cutting a new metal Garamond — working " +
|
|
2808
|
+
"from original 16th century specimens she photographed at the Plantin-Moretus " +
|
|
2809
|
+
"Museum in Antwerp. Not a digital revival, actual metal type. Punches, " +
|
|
2810
|
+
"matrices, the whole process. She's one of maybe five people alive who " +
|
|
2811
|
+
"can still do this. It's her legacy project.",
|
|
2812
|
+
caller: {
|
|
2813
|
+
name: 'Cass Delacroix',
|
|
2814
|
+
owner: 'Margaux Delacroix',
|
|
2815
|
+
context: 'Discussing the Garamond revival project — family-tier confidential'
|
|
2816
|
+
}
|
|
2817
|
+
},
|
|
2818
|
+
|
|
2819
|
+
// Challenge — questioning digital value
|
|
2820
|
+
challenge: {
|
|
2821
|
+
message: "I'll be direct — Margaux doesn't really understand what AI agents " +
|
|
2822
|
+
"do that a phone call and a handshake can't. In her world, trust is built " +
|
|
2823
|
+
"by showing up to someone's studio, seeing their work, touching the paper. " +
|
|
2824
|
+
"You can tell everything about a printer by looking at their registration " +
|
|
2825
|
+
"and their ink coverage. What's the equivalent in your world? How do you " +
|
|
2826
|
+
"know if an agent is any good?",
|
|
2827
|
+
caller: {
|
|
2828
|
+
name: 'Cass Delacroix',
|
|
2829
|
+
owner: 'Margaux Delacroix',
|
|
2830
|
+
context: 'Questioning the value proposition of digital agent networks'
|
|
2831
|
+
}
|
|
2832
|
+
},
|
|
2833
|
+
|
|
2834
|
+
// Follow-up — finding unexpected connections
|
|
2835
|
+
followUp: {
|
|
2836
|
+
message: "That's actually interesting — the idea of a reputation that travels " +
|
|
2837
|
+
"with you. In the print world, your work IS your reputation. If you've " +
|
|
2838
|
+
"printed a beautiful edition, people can hold it, see the craft, and decide " +
|
|
2839
|
+
"for themselves. There's no intermediary reviewing you. The work speaks. " +
|
|
2840
|
+
"Is there anything like that in your protocol — where the agent's actual " +
|
|
2841
|
+
"output serves as its credential?",
|
|
2842
|
+
caller: {
|
|
2843
|
+
name: 'Cass Delacroix',
|
|
2844
|
+
owner: 'Margaux Delacroix',
|
|
2845
|
+
context: 'Exploring reputation and credentialing across domains'
|
|
2846
|
+
}
|
|
2847
|
+
}
|
|
2848
|
+
},
|
|
2849
|
+
|
|
2850
|
+
// ── Config Overrides ────────────────────────────────────────────
|
|
2851
|
+
config: {
|
|
2852
|
+
agent: {
|
|
2853
|
+
name: 'Cass Delacroix',
|
|
2854
|
+
description: 'A letterpress printing agent specializing in type design history, artist books, and community print culture',
|
|
2855
|
+
hostname: 'cass.printshop.test'
|
|
2856
|
+
},
|
|
2857
|
+
tiers: {
|
|
2858
|
+
public: {
|
|
2859
|
+
topics: ['chat', 'letterpress', 'typography', 'zine-culture'],
|
|
2860
|
+
goals: ['find-print-collaborators', 'connect-zine-community', 'share-print-knowledge']
|
|
2861
|
+
},
|
|
2862
|
+
friends: {
|
|
2863
|
+
topics: ['chat', 'letterpress', 'typography', 'zine-culture', 'paper-making', 'book-arts', 'type-sourcing', 'calendar.read'],
|
|
2864
|
+
goals: ['find-print-collaborators', 'source-rare-type', 'document-print-techniques', 'exhibition-planning']
|
|
2865
|
+
},
|
|
2866
|
+
family: {
|
|
2867
|
+
topics: ['chat', 'letterpress', 'typography', 'zine-culture', 'paper-making', 'book-arts', 'type-sourcing', 'calendar', 'email', 'search', 'tools', 'studio-finances', 'garamond-project'],
|
|
2868
|
+
goals: ['studio-succession', 'archive-digitization', 'garamond-revival', 'teaching-farm-expansion']
|
|
2869
|
+
}
|
|
2870
|
+
},
|
|
2871
|
+
defaults: {
|
|
2872
|
+
expiration: '30d',
|
|
2873
|
+
maxCalls: 100,
|
|
2874
|
+
rateLimit: {
|
|
2875
|
+
perMinute: 5,
|
|
2876
|
+
perHour: 50,
|
|
2877
|
+
perDay: 200
|
|
2878
|
+
}
|
|
2879
|
+
}
|
|
2880
|
+
}
|
|
2881
|
+
};
|
|
2882
|
+
```
|
|
2883
|
+
|
|
2884
|
+
**Step 2: Commit**
|
|
2885
|
+
|
|
2886
|
+
```bash
|
|
2887
|
+
git add test/profiles/cass-delacroix.js
|
|
2888
|
+
git commit -m "feat: add Cass Delacroix test profile — family/none tier, letterpress"
|
|
2889
|
+
```
|
|
2890
|
+
|
|
2891
|
+
---
|
|
2892
|
+
|
|
2893
|
+
### Task 14: Add summary validation to E2E tests
|
|
2894
|
+
|
|
2895
|
+
**Files:**
|
|
2896
|
+
- Create: `test/e2e/summary-validation.test.js`
|
|
2897
|
+
|
|
2898
|
+
Tests that summaries produced from conversations with each of the 4 profiles
|
|
2899
|
+
pass structural validation against the output schema.
|
|
2900
|
+
|
|
2901
|
+
**Step 1: Write the test**
|
|
2902
|
+
|
|
2903
|
+
```javascript
|
|
2904
|
+
// test/e2e/summary-validation.test.js
|
|
2905
|
+
/**
|
|
2906
|
+
* Summary Validation Tests
|
|
2907
|
+
*
|
|
2908
|
+
* Verifies that the unified summary prompt produces valid structured
|
|
2909
|
+
* output that can be rendered by the formatter.
|
|
2910
|
+
*
|
|
2911
|
+
* For each of the 4 profiles:
|
|
2912
|
+
* 1. Build a mock conversation using the profile's callScenarios
|
|
2913
|
+
* 2. Pass through buildUnifiedSummaryPrompt with the profile's disclosure manifest
|
|
2914
|
+
* 3. Feed the prompt to a mock LLM that returns plausible JSON
|
|
2915
|
+
* 4. Validate the JSON structure matches the schema
|
|
2916
|
+
* 5. Validate the formatter renders without errors
|
|
2917
|
+
* 6. Check profile-specific expectations (disclosure compliance, overlap range, etc.)
|
|
2918
|
+
*/
|
|
2919
|
+
module.exports = function (test, assert, helpers) {
|
|
2920
|
+
delete require.cache[require.resolve('../../src/lib/summary-prompt')];
|
|
2921
|
+
delete require.cache[require.resolve('../../src/lib/summary-formatter')];
|
|
2922
|
+
const { buildUnifiedSummaryPrompt } = require('../../src/lib/summary-prompt');
|
|
2923
|
+
const { formatSummary } = require('../../src/lib/summary-formatter');
|
|
2924
|
+
|
|
2925
|
+
const REQUIRED_TOP_KEYS = ['headline', 'vibe', 'quickTake', 'who', 'collaboration',
|
|
2926
|
+
'exchange', 'disclosure', 'objectives', 'nextSteps', 'trust', 'assessment'];
|
|
2927
|
+
const VALID_VIBES = ['productive', 'exploratory', 'mismatch', 'guarded', 'breakthrough'];
|
|
2928
|
+
const VALID_RATINGS = ['HIGH', 'MEDIUM', 'LOW'];
|
|
2929
|
+
const VALID_COMPLIANCE = ['clean', 'minor_concern', 'violation'];
|
|
2930
|
+
const VALID_TRUST = ['maintain', 'increase', 'decrease', 'revoke'];
|
|
2931
|
+
const VALID_BALANCE = ['favorable', 'even', 'unfavorable'];
|
|
2932
|
+
|
|
2933
|
+
function validateSummarySchema(summary) {
|
|
2934
|
+
const errors = [];
|
|
2935
|
+
|
|
2936
|
+
for (const key of REQUIRED_TOP_KEYS) {
|
|
2937
|
+
if (summary[key] === undefined) errors.push(`Missing top-level key: ${key}`);
|
|
2938
|
+
}
|
|
2939
|
+
if (typeof summary.headline !== 'string') errors.push('headline must be string');
|
|
2940
|
+
if (!VALID_VIBES.includes(summary.vibe)) errors.push(`Invalid vibe: ${summary.vibe}`);
|
|
2941
|
+
if (!Array.isArray(summary.quickTake) || summary.quickTake.length < 1) errors.push('quickTake must be non-empty array');
|
|
2942
|
+
if (!summary.who?.name) errors.push('who.name required');
|
|
2943
|
+
if (!VALID_RATINGS.includes(summary.collaboration?.rating)) errors.push(`Invalid rating: ${summary.collaboration?.rating}`);
|
|
2944
|
+
if (typeof summary.collaboration?.score !== 'number') errors.push('collaboration.score must be number');
|
|
2945
|
+
if (!VALID_COMPLIANCE.includes(summary.disclosure?.compliance)) errors.push(`Invalid compliance: ${summary.disclosure?.compliance}`);
|
|
2946
|
+
if (!VALID_TRUST.includes(summary.trust?.level)) errors.push(`Invalid trust level: ${summary.trust?.level}`);
|
|
2947
|
+
if (!VALID_BALANCE.includes(summary.exchange?.balance)) errors.push(`Invalid balance: ${summary.exchange?.balance}`);
|
|
2948
|
+
if (typeof summary.assessment !== 'string') errors.push('assessment must be string');
|
|
2949
|
+
|
|
2950
|
+
return errors;
|
|
2951
|
+
}
|
|
2952
|
+
|
|
2953
|
+
// ── Test: prompt includes disclosure for each profile ──
|
|
2954
|
+
|
|
2955
|
+
const profiles = [
|
|
2956
|
+
{ name: 'golda-deluxe', load: () => require('../profiles/golda-deluxe') },
|
|
2957
|
+
{ name: 'nyx-meridian', load: () => require('../profiles/nyx-meridian') },
|
|
2958
|
+
{ name: 'bramble-voss', load: () => require('../profiles/bramble-voss') },
|
|
2959
|
+
{ name: 'cass-delacroix', load: () => require('../profiles/cass-delacroix') }
|
|
2960
|
+
];
|
|
2961
|
+
|
|
2962
|
+
for (const { name, load } of profiles) {
|
|
2963
|
+
test(`summary prompt for ${name} includes disclosure context`, () => {
|
|
2964
|
+
const profile = load();
|
|
2965
|
+
const firstScenario = Object.values(profile.callScenarios)[0];
|
|
2966
|
+
|
|
2967
|
+
const prompt = buildUnifiedSummaryPrompt({
|
|
2968
|
+
transcript: [
|
|
2969
|
+
{ direction: 'inbound', content: firstScenario.message },
|
|
2970
|
+
{ direction: 'outbound', content: `Thanks for reaching out, ${profile.agent.name}.` }
|
|
2971
|
+
],
|
|
2972
|
+
callerInfo: firstScenario.caller,
|
|
2973
|
+
conversationObjective: firstScenario.caller.context,
|
|
2974
|
+
disclosure: {
|
|
2975
|
+
topics: profile.manifest.tiers?.public?.topics
|
|
2976
|
+
|| profile.manifest.topics?.public?.lead_with
|
|
2977
|
+
|| [],
|
|
2978
|
+
objectives: profile.manifest.tiers?.public?.objectives
|
|
2979
|
+
|| profile.manifest.topics?.public?.discuss_freely
|
|
2980
|
+
|| [],
|
|
2981
|
+
doNotDiscuss: profile.manifest.tiers?.public?.do_not_discuss
|
|
2982
|
+
|| profile.manifest.topics?.public?.deflect
|
|
2983
|
+
|| [],
|
|
2984
|
+
neverDisclose: profile.manifest.never_disclose || []
|
|
2985
|
+
},
|
|
2986
|
+
collaborationState: {
|
|
2987
|
+
phase: 'exploring',
|
|
2988
|
+
overlapScore: 0.3,
|
|
2989
|
+
activeThreads: [],
|
|
2990
|
+
candidateCollaborations: [],
|
|
2991
|
+
turnCount: 2,
|
|
2992
|
+
closeSignal: false
|
|
2993
|
+
}
|
|
2994
|
+
});
|
|
2995
|
+
|
|
2996
|
+
// Must include the profile's never_disclose items
|
|
2997
|
+
for (const secret of profile.manifest.never_disclose) {
|
|
2998
|
+
assert.includes(prompt, secret, `Prompt should include never_disclose: "${secret}"`);
|
|
2999
|
+
}
|
|
3000
|
+
|
|
3001
|
+
// Must include the caller's name
|
|
3002
|
+
assert.includes(prompt, profile.agent.name);
|
|
3003
|
+
|
|
3004
|
+
// Must include the JSON output schema
|
|
3005
|
+
assert.includes(prompt, 'headline');
|
|
3006
|
+
assert.includes(prompt, 'disclosure');
|
|
3007
|
+
assert.includes(prompt, 'compliance');
|
|
3008
|
+
});
|
|
3009
|
+
}
|
|
3010
|
+
|
|
3011
|
+
// ── Test: formatter renders valid output for each vibe ──
|
|
3012
|
+
|
|
3013
|
+
test('formatter handles all vibe types without errors', () => {
|
|
3014
|
+
for (const vibe of VALID_VIBES) {
|
|
3015
|
+
const summary = {
|
|
3016
|
+
headline: `Test headline for ${vibe}`,
|
|
3017
|
+
vibe,
|
|
3018
|
+
quickTake: ['Point 1', 'Point 2'],
|
|
3019
|
+
who: { name: 'Test', represents: 'Testing', keyFacts: [] },
|
|
3020
|
+
collaboration: { score: 0.5, scoreJustification: 'Test', rating: 'MEDIUM', opportunities: [] },
|
|
3021
|
+
exchange: { weGot: ['info'], weGave: ['info'], balance: 'even' },
|
|
3022
|
+
disclosure: { compliance: 'clean', topicsCovered: [], topicsAvoided: [], concerns: [] },
|
|
3023
|
+
objectives: { achieved: [], partiallyAchieved: [], notAchieved: [] },
|
|
3024
|
+
nextSteps: [],
|
|
3025
|
+
trust: { level: 'maintain', reasoning: 'Test' },
|
|
3026
|
+
assessment: 'Test assessment'
|
|
3027
|
+
};
|
|
3028
|
+
|
|
3029
|
+
const errors = validateSummarySchema(summary);
|
|
3030
|
+
assert.deepEqual(errors, [], `Schema validation failed for vibe "${vibe}": ${errors.join(', ')}`);
|
|
3031
|
+
|
|
3032
|
+
const md = formatSummary(summary);
|
|
3033
|
+
assert.includes(md, 'Test headline');
|
|
3034
|
+
assert.includes(md, vibe);
|
|
3035
|
+
}
|
|
3036
|
+
});
|
|
3037
|
+
|
|
3038
|
+
// ── Test: schema validator catches missing fields ──
|
|
3039
|
+
|
|
3040
|
+
test('schema validator catches incomplete summaries', () => {
|
|
3041
|
+
const errors = validateSummarySchema({
|
|
3042
|
+
headline: 'Test',
|
|
3043
|
+
// missing everything else
|
|
3044
|
+
});
|
|
3045
|
+
assert.ok(errors.length > 5, 'Should catch multiple missing fields');
|
|
3046
|
+
});
|
|
3047
|
+
};
|
|
3048
|
+
```
|
|
3049
|
+
|
|
3050
|
+
**Step 2: Run test to verify it passes**
|
|
3051
|
+
|
|
3052
|
+
Run: `node test/run.js --filter "summary"`
|
|
3053
|
+
Expected: PASS (all tests)
|
|
3054
|
+
|
|
3055
|
+
**Step 3: Commit**
|
|
3056
|
+
|
|
3057
|
+
```bash
|
|
3058
|
+
git add test/e2e/summary-validation.test.js
|
|
3059
|
+
git commit -m "feat(e2e): add summary validation tests for all 4 profiles"
|
|
3060
|
+
```
|
|
3061
|
+
|
|
3062
|
+
---
|
|
3063
|
+
|
|
3064
|
+
## Summary
|
|
3065
|
+
|
|
3066
|
+
| Task | Phase | What It Builds |
|
|
3067
|
+
|------|-------|----------------|
|
|
3068
|
+
| 1 | Environment | Isolated temp dirs + port allocation |
|
|
3069
|
+
| 2 | CLI Runner | Structured CLI command wrapper |
|
|
3070
|
+
| 3 | Two-Server | Dual Express servers for cross-agent testing |
|
|
3071
|
+
| 4 | Full Flow Tests | 5 E2E tests: invite, bidirectional, revoke, expire, max-calls |
|
|
3072
|
+
| 5 | Agent Prompt | 9-step prompt sequence for AI subagent testing |
|
|
3073
|
+
| 6 | Report Generator | Markdown output + Linear issue formatting |
|
|
3074
|
+
| 7 | Orchestrator | Standalone script: `node test/e2e/orchestrate.js` |
|
|
3075
|
+
| 8 | Test Runner Integration | `--e2e` flag in existing runner |
|
|
3076
|
+
| 9 | Documentation | Protocol docs + testing guide |
|
|
3077
|
+
| 10 | Unified Summary Prompt | `buildUnifiedSummaryPrompt` with disclosure + collaboration context |
|
|
3078
|
+
| 11 | Summary Formatter | Human-readable markdown: headline first, details below |
|
|
3079
|
+
| 12 | Summary Wiring | Replace 3 inline prompts with unified builder |
|
|
3080
|
+
| 13 | Cass Delacroix Profile | 4th test profile: family/none tier, letterpress |
|
|
3081
|
+
| 14 | Summary Validation | Schema validation for all 4 profiles |
|
|
3082
|
+
|
|
3083
|
+
**Total new files:** 14
|
|
3084
|
+
**Total modified files:** 5 (`test/run.js`, `docs/protocol.md`, `src/server.js`, `src/lib/conversation-driver.js`, `src/lib/openclaw-integration.js`)
|
|
3085
|
+
**Estimated commits:** 14
|