a2acalling 0.6.45 → 0.6.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,1812 +0,0 @@
|
|
|
1
|
-
# E2E Test & Prompt Sequence for A2A Install/Onboarding/Invite Flow
|
|
2
|
-
|
|
3
|
-
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
|
4
|
-
|
|
5
|
-
**Goal:** Build an AI-agent-driven E2E testing system where an orchestrator spawns a subagent that installs a2acalling from npm, runs onboarding, exercises the invite flow between two isolated servers, and reports results (including auto-filing bugs in Linear).
|
|
6
|
-
|
|
7
|
-
**Architecture:** The test system uses isolated temp directories (extending the existing `tmpConfigDir` pattern) to spin up two independent a2a servers on ephemeral ports. A CLI runner wraps all `a2a` commands with structured output parsing. The orchestrator script coordinates the full sequence: environment setup → install verification → onboarding → token creation → invite exchange → cross-server call → report generation. A prompt document gives a Claude subagent the exact steps and expected outcomes.
|
|
8
|
-
|
|
9
|
-
**Tech Stack:** Node.js, Express (ephemeral ports), child_process (CLI invocation), existing zero-dependency test runner, Linear API (for bug filing via MCP or REST)
|
|
10
|
-
|
|
11
|
-
**Linear ticket:** A2A-21
|
|
12
|
-
|
|
13
|
-
---
|
|
14
|
-
|
|
15
|
-
## Phase 1: E2E Environment & CLI Runner
|
|
16
|
-
|
|
17
|
-
### Task 1: Create E2E environment isolation utility
|
|
18
|
-
|
|
19
|
-
**Files:**
|
|
20
|
-
- Create: `test/e2e/env.js`
|
|
21
|
-
- Test: `test/e2e/env.test.js`
|
|
22
|
-
|
|
23
|
-
**Step 1: Write the failing test**
|
|
24
|
-
|
|
25
|
-
```javascript
|
|
26
|
-
// test/e2e/env.test.js
|
|
27
|
-
module.exports = function (test, assert, helpers) {
|
|
28
|
-
test('createE2EEnv returns isolated dir with cleanup', () => {
|
|
29
|
-
const { createE2EEnv } = require('./env');
|
|
30
|
-
const env = createE2EEnv('test-basic');
|
|
31
|
-
|
|
32
|
-
assert.ok(env.dir, 'Should have a directory');
|
|
33
|
-
assert.ok(env.configDir, 'Should have a config directory');
|
|
34
|
-
assert.ok(env.env.A2A_CONFIG_DIR, 'Should set A2A_CONFIG_DIR');
|
|
35
|
-
|
|
36
|
-
const fs = require('fs');
|
|
37
|
-
assert.ok(fs.existsSync(env.dir), 'Directory should exist');
|
|
38
|
-
assert.ok(fs.existsSync(env.configDir), 'Config dir should exist');
|
|
39
|
-
|
|
40
|
-
env.cleanup();
|
|
41
|
-
assert.equal(fs.existsSync(env.dir), false, 'Should clean up');
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
test('createE2EEnv provides isolated process env', () => {
|
|
45
|
-
const { createE2EEnv } = require('./env');
|
|
46
|
-
const envA = createE2EEnv('env-a');
|
|
47
|
-
const envB = createE2EEnv('env-b');
|
|
48
|
-
|
|
49
|
-
assert.ok(envA.configDir !== envB.configDir, 'Should be different dirs');
|
|
50
|
-
|
|
51
|
-
envA.cleanup();
|
|
52
|
-
envB.cleanup();
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
test('createE2EEnv finds available port', async () => {
|
|
56
|
-
const { createE2EEnv } = require('./env');
|
|
57
|
-
const env = createE2EEnv('port-test');
|
|
58
|
-
|
|
59
|
-
const port = await env.findAvailablePort();
|
|
60
|
-
assert.ok(port >= 3001 && port <= 65535, 'Should return valid port');
|
|
61
|
-
|
|
62
|
-
env.cleanup();
|
|
63
|
-
});
|
|
64
|
-
};
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
**Step 2: Run test to verify it fails**
|
|
68
|
-
|
|
69
|
-
Run: `node test/run.js --filter "createE2EEnv"`
|
|
70
|
-
Expected: FAIL — module not found
|
|
71
|
-
|
|
72
|
-
**Step 3: Write minimal implementation**
|
|
73
|
-
|
|
74
|
-
```javascript
|
|
75
|
-
// test/e2e/env.js
|
|
76
|
-
const fs = require('fs');
|
|
77
|
-
const path = require('path');
|
|
78
|
-
const os = require('os');
|
|
79
|
-
const net = require('net');
|
|
80
|
-
|
|
81
|
-
/**
|
|
82
|
-
* Create a fully isolated E2E test environment.
|
|
83
|
-
*
|
|
84
|
-
* Returns { dir, configDir, env, findAvailablePort, cleanup }.
|
|
85
|
-
*
|
|
86
|
-
* - dir: root temp directory for this test run
|
|
87
|
-
* - configDir: path that A2A_CONFIG_DIR points to
|
|
88
|
-
* - env: process.env clone with A2A_CONFIG_DIR set
|
|
89
|
-
* - findAvailablePort(): resolves to an unused port
|
|
90
|
-
* - cleanup(): removes all temp files
|
|
91
|
-
*/
|
|
92
|
-
function createE2EEnv(prefix = 'a2a-e2e') {
|
|
93
|
-
const dir = fs.mkdtempSync(path.join(os.tmpdir(), `${prefix}-`));
|
|
94
|
-
const configDir = path.join(dir, 'config');
|
|
95
|
-
fs.mkdirSync(configDir, { recursive: true });
|
|
96
|
-
|
|
97
|
-
const env = {
|
|
98
|
-
...process.env,
|
|
99
|
-
A2A_CONFIG_DIR: configDir,
|
|
100
|
-
// Prevent postinstall from running quickstart
|
|
101
|
-
CI: 'true'
|
|
102
|
-
};
|
|
103
|
-
|
|
104
|
-
function findAvailablePort(startPort = 3001) {
|
|
105
|
-
return new Promise((resolve, reject) => {
|
|
106
|
-
const server = net.createServer();
|
|
107
|
-
server.listen(0, '127.0.0.1', () => {
|
|
108
|
-
const port = server.address().port;
|
|
109
|
-
server.close(() => resolve(port));
|
|
110
|
-
});
|
|
111
|
-
server.on('error', reject);
|
|
112
|
-
});
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
function cleanup() {
|
|
116
|
-
try {
|
|
117
|
-
fs.rmSync(dir, { recursive: true, force: true });
|
|
118
|
-
} catch (e) { /* best-effort */ }
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
return { dir, configDir, env, findAvailablePort, cleanup };
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
module.exports = { createE2EEnv };
|
|
125
|
-
```
|
|
126
|
-
|
|
127
|
-
**Step 4: Run test to verify it passes**
|
|
128
|
-
|
|
129
|
-
Run: `node test/run.js --filter "createE2EEnv"`
|
|
130
|
-
Expected: PASS (all 3 tests)
|
|
131
|
-
|
|
132
|
-
**Step 5: Commit**
|
|
133
|
-
|
|
134
|
-
```bash
|
|
135
|
-
git add test/e2e/env.js test/e2e/env.test.js
|
|
136
|
-
git commit -m "feat(e2e): add isolated environment utility"
|
|
137
|
-
```
|
|
138
|
-
|
|
139
|
-
---
|
|
140
|
-
|
|
141
|
-
### Task 2: Create CLI runner utility
|
|
142
|
-
|
|
143
|
-
**Files:**
|
|
144
|
-
- Create: `test/e2e/cli-runner.js`
|
|
145
|
-
- Test: `test/e2e/cli-runner.test.js`
|
|
146
|
-
|
|
147
|
-
**Step 1: Write the failing test**
|
|
148
|
-
|
|
149
|
-
```javascript
|
|
150
|
-
// test/e2e/cli-runner.test.js
|
|
151
|
-
module.exports = function (test, assert, helpers) {
|
|
152
|
-
const { createE2EEnv } = require('./env');
|
|
153
|
-
|
|
154
|
-
test('CLIRunner.run executes a2a command and returns output', async () => {
|
|
155
|
-
const env = createE2EEnv('cli-run');
|
|
156
|
-
const { CLIRunner } = require('./cli-runner');
|
|
157
|
-
const runner = new CLIRunner(env);
|
|
158
|
-
|
|
159
|
-
// 'a2a help' should work without onboarding
|
|
160
|
-
const result = await runner.run('help');
|
|
161
|
-
assert.equal(result.exitCode, 0, 'Should exit 0');
|
|
162
|
-
assert.ok(result.stdout.length > 0, 'Should have stdout');
|
|
163
|
-
|
|
164
|
-
env.cleanup();
|
|
165
|
-
});
|
|
166
|
-
|
|
167
|
-
test('CLIRunner.run captures non-zero exit codes', async () => {
|
|
168
|
-
const env = createE2EEnv('cli-fail');
|
|
169
|
-
const { CLIRunner } = require('./cli-runner');
|
|
170
|
-
const runner = new CLIRunner(env);
|
|
171
|
-
|
|
172
|
-
// 'a2a call' without onboarding should fail
|
|
173
|
-
const result = await runner.run('call', ['nobody', 'hello']);
|
|
174
|
-
assert.ok(result.exitCode !== 0, 'Should exit non-zero');
|
|
175
|
-
|
|
176
|
-
env.cleanup();
|
|
177
|
-
});
|
|
178
|
-
|
|
179
|
-
test('CLIRunner.run respects timeout', async () => {
|
|
180
|
-
const env = createE2EEnv('cli-timeout');
|
|
181
|
-
const { CLIRunner } = require('./cli-runner');
|
|
182
|
-
const runner = new CLIRunner(env, { timeout: 500 });
|
|
183
|
-
|
|
184
|
-
// Running a command that hangs should time out
|
|
185
|
-
const result = await runner.run('server', ['99999'], { timeout: 500 });
|
|
186
|
-
assert.ok(result.timedOut || result.exitCode !== 0, 'Should timeout or fail');
|
|
187
|
-
|
|
188
|
-
env.cleanup();
|
|
189
|
-
});
|
|
190
|
-
|
|
191
|
-
test('CLIRunner.onboard completes full onboarding via --submit', async () => {
|
|
192
|
-
const env = createE2EEnv('cli-onboard');
|
|
193
|
-
const { CLIRunner } = require('./cli-runner');
|
|
194
|
-
const runner = new CLIRunner(env);
|
|
195
|
-
|
|
196
|
-
const fs = require('fs');
|
|
197
|
-
const path = require('path');
|
|
198
|
-
|
|
199
|
-
// Pre-set config to awaiting_disclosure (skip port detection step)
|
|
200
|
-
const configPath = path.join(env.configDir, 'a2a-config.json');
|
|
201
|
-
fs.writeFileSync(configPath, JSON.stringify({
|
|
202
|
-
onboarding: { version: 2, step: 'awaiting_disclosure' },
|
|
203
|
-
agent: { hostname: 'localhost:3001', name: 'e2e-test-agent' },
|
|
204
|
-
tiers: {}
|
|
205
|
-
}));
|
|
206
|
-
|
|
207
|
-
const result = await runner.onboard({
|
|
208
|
-
personalityNotes: 'E2E test agent — direct and minimal',
|
|
209
|
-
topics: [{ topic: 'Testing', description: 'Automated E2E tests' }]
|
|
210
|
-
});
|
|
211
|
-
|
|
212
|
-
assert.ok(result.success, 'Onboarding should succeed');
|
|
213
|
-
assert.ok(result.stdout.includes('Onboarding complete'), 'Should say complete');
|
|
214
|
-
|
|
215
|
-
env.cleanup();
|
|
216
|
-
});
|
|
217
|
-
};
|
|
218
|
-
```
|
|
219
|
-
|
|
220
|
-
**Step 2: Run test to verify it fails**
|
|
221
|
-
|
|
222
|
-
Run: `node test/run.js --filter "CLIRunner"`
|
|
223
|
-
Expected: FAIL — module not found
|
|
224
|
-
|
|
225
|
-
**Step 3: Write minimal implementation**
|
|
226
|
-
|
|
227
|
-
```javascript
|
|
228
|
-
// test/e2e/cli-runner.js
|
|
229
|
-
const { execFile } = require('child_process');
|
|
230
|
-
const path = require('path');
|
|
231
|
-
|
|
232
|
-
const CLI_PATH = path.join(__dirname, '..', '..', 'bin', 'cli.js');
|
|
233
|
-
|
|
234
|
-
/**
|
|
235
|
-
* Wraps the a2a CLI for structured E2E testing.
|
|
236
|
-
*
|
|
237
|
-
* Each method runs the CLI as a child process in the
|
|
238
|
-
* given E2E environment, returning { stdout, stderr, exitCode, timedOut }.
|
|
239
|
-
*/
|
|
240
|
-
class CLIRunner {
|
|
241
|
-
constructor(e2eEnv, options = {}) {
|
|
242
|
-
this.env = e2eEnv;
|
|
243
|
-
this.defaultTimeout = options.timeout || 30000;
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
/**
|
|
247
|
-
* Run an a2a CLI command.
|
|
248
|
-
* @param {string} command - The a2a subcommand (e.g., 'list', 'create')
|
|
249
|
-
* @param {string[]} args - Additional arguments
|
|
250
|
-
* @param {object} options - { timeout }
|
|
251
|
-
* @returns {Promise<{stdout, stderr, exitCode, timedOut}>}
|
|
252
|
-
*/
|
|
253
|
-
run(command, args = [], options = {}) {
|
|
254
|
-
const timeout = options.timeout || this.defaultTimeout;
|
|
255
|
-
const fullArgs = [CLI_PATH, command, ...args];
|
|
256
|
-
|
|
257
|
-
return new Promise((resolve) => {
|
|
258
|
-
const child = execFile(process.execPath, fullArgs, {
|
|
259
|
-
env: this.env.env,
|
|
260
|
-
encoding: 'utf8',
|
|
261
|
-
timeout,
|
|
262
|
-
maxBuffer: 1024 * 1024
|
|
263
|
-
}, (error, stdout, stderr) => {
|
|
264
|
-
resolve({
|
|
265
|
-
stdout: stdout || '',
|
|
266
|
-
stderr: stderr || '',
|
|
267
|
-
exitCode: error ? (error.code || 1) : 0,
|
|
268
|
-
timedOut: error && error.killed
|
|
269
|
-
});
|
|
270
|
-
});
|
|
271
|
-
});
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
/**
|
|
275
|
-
* Complete onboarding programmatically via `onboard --submit`.
|
|
276
|
-
*
|
|
277
|
-
* @param {object} disclosure - { personalityNotes, topics, objectives, neverDisclose }
|
|
278
|
-
* @returns {Promise<{success, stdout, stderr}>}
|
|
279
|
-
*/
|
|
280
|
-
async onboard(disclosure = {}) {
|
|
281
|
-
const submission = {
|
|
282
|
-
tiers: {
|
|
283
|
-
public: {
|
|
284
|
-
topics: disclosure.topics || [{ topic: 'General', description: 'Open discussion' }],
|
|
285
|
-
objectives: disclosure.objectives || [],
|
|
286
|
-
do_not_discuss: disclosure.doNotDiscuss || []
|
|
287
|
-
},
|
|
288
|
-
friends: { topics: [], objectives: [], do_not_discuss: [] },
|
|
289
|
-
family: { topics: [], objectives: [], do_not_discuss: [] }
|
|
290
|
-
},
|
|
291
|
-
never_disclose: disclosure.neverDisclose || [],
|
|
292
|
-
personality_notes: disclosure.personalityNotes || 'E2E test agent'
|
|
293
|
-
};
|
|
294
|
-
|
|
295
|
-
const result = await this.run('onboard', ['--submit', JSON.stringify(submission)]);
|
|
296
|
-
return {
|
|
297
|
-
success: result.exitCode === 0 && result.stdout.includes('Onboarding complete'),
|
|
298
|
-
stdout: result.stdout,
|
|
299
|
-
stderr: result.stderr,
|
|
300
|
-
exitCode: result.exitCode
|
|
301
|
-
};
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
/**
|
|
305
|
-
* Create a token and return the parsed output.
|
|
306
|
-
* @param {object} options - { name, tier, expires, maxCalls, topics }
|
|
307
|
-
* @returns {Promise<{success, token, inviteUrl, stdout}>}
|
|
308
|
-
*/
|
|
309
|
-
async createToken(options = {}) {
|
|
310
|
-
const args = [];
|
|
311
|
-
if (options.name) args.push('--name', options.name);
|
|
312
|
-
if (options.tier) args.push('--tier', options.tier);
|
|
313
|
-
if (options.expires) args.push('--expires', options.expires);
|
|
314
|
-
if (options.maxCalls) args.push('--max-calls', String(options.maxCalls));
|
|
315
|
-
if (options.topics) args.push('--topics', options.topics);
|
|
316
|
-
|
|
317
|
-
const result = await this.run('create', args);
|
|
318
|
-
|
|
319
|
-
// Parse invite URL from output (format: a2a://host/token)
|
|
320
|
-
const urlMatch = result.stdout.match(/a2a:\/\/[^\s]+/);
|
|
321
|
-
const tokenMatch = result.stdout.match(/fed_[A-Za-z0-9_-]+/);
|
|
322
|
-
|
|
323
|
-
return {
|
|
324
|
-
success: result.exitCode === 0,
|
|
325
|
-
inviteUrl: urlMatch ? urlMatch[0] : null,
|
|
326
|
-
token: tokenMatch ? tokenMatch[0] : null,
|
|
327
|
-
stdout: result.stdout,
|
|
328
|
-
stderr: result.stderr
|
|
329
|
-
};
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
/**
|
|
333
|
-
* Add a contact from an invite URL.
|
|
334
|
-
* @param {string} inviteUrl - a2a://host/token URL
|
|
335
|
-
* @param {string} name - Contact name
|
|
336
|
-
* @returns {Promise<{success, stdout, stderr}>}
|
|
337
|
-
*/
|
|
338
|
-
async addContact(inviteUrl, name) {
|
|
339
|
-
const result = await this.run('add', [inviteUrl, name]);
|
|
340
|
-
return {
|
|
341
|
-
success: result.exitCode === 0,
|
|
342
|
-
stdout: result.stdout,
|
|
343
|
-
stderr: result.stderr
|
|
344
|
-
};
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
/**
|
|
348
|
-
* List tokens.
|
|
349
|
-
* @returns {Promise<{success, stdout}>}
|
|
350
|
-
*/
|
|
351
|
-
async listTokens() {
|
|
352
|
-
const result = await this.run('list');
|
|
353
|
-
return {
|
|
354
|
-
success: result.exitCode === 0,
|
|
355
|
-
stdout: result.stdout,
|
|
356
|
-
stderr: result.stderr
|
|
357
|
-
};
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
/**
|
|
361
|
-
* List contacts.
|
|
362
|
-
* @returns {Promise<{success, stdout}>}
|
|
363
|
-
*/
|
|
364
|
-
async listContacts() {
|
|
365
|
-
const result = await this.run('contacts');
|
|
366
|
-
return {
|
|
367
|
-
success: result.exitCode === 0,
|
|
368
|
-
stdout: result.stdout,
|
|
369
|
-
stderr: result.stderr
|
|
370
|
-
};
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
/**
|
|
374
|
-
* Ping a remote agent.
|
|
375
|
-
* @param {string} target - URL or contact name
|
|
376
|
-
* @returns {Promise<{success, stdout}>}
|
|
377
|
-
*/
|
|
378
|
-
async ping(target) {
|
|
379
|
-
const result = await this.run('ping', [target]);
|
|
380
|
-
return {
|
|
381
|
-
success: result.exitCode === 0,
|
|
382
|
-
stdout: result.stdout,
|
|
383
|
-
stderr: result.stderr
|
|
384
|
-
};
|
|
385
|
-
}
|
|
386
|
-
}
|
|
387
|
-
|
|
388
|
-
module.exports = { CLIRunner };
|
|
389
|
-
```
|
|
390
|
-
|
|
391
|
-
**Step 4: Run test to verify it passes**
|
|
392
|
-
|
|
393
|
-
Run: `node test/run.js --filter "CLIRunner"`
|
|
394
|
-
Expected: PASS (all 4 tests)
|
|
395
|
-
|
|
396
|
-
**Step 5: Commit**
|
|
397
|
-
|
|
398
|
-
```bash
|
|
399
|
-
git add test/e2e/cli-runner.js test/e2e/cli-runner.test.js
|
|
400
|
-
git commit -m "feat(e2e): add CLI runner utility for structured command execution"
|
|
401
|
-
```
|
|
402
|
-
|
|
403
|
-
---
|
|
404
|
-
|
|
405
|
-
## Phase 2: Two-Server E2E Test
|
|
406
|
-
|
|
407
|
-
### Task 3: Create two-server test harness
|
|
408
|
-
|
|
409
|
-
**Files:**
|
|
410
|
-
- Create: `test/e2e/two-server.js`
|
|
411
|
-
- Test: `test/e2e/two-server.test.js`
|
|
412
|
-
|
|
413
|
-
**Step 1: Write the failing test**
|
|
414
|
-
|
|
415
|
-
```javascript
|
|
416
|
-
// test/e2e/two-server.test.js
|
|
417
|
-
module.exports = function (test, assert, helpers) {
|
|
418
|
-
const { TwoServerHarness } = require('./two-server');
|
|
419
|
-
|
|
420
|
-
test('TwoServerHarness starts two isolated servers', async () => {
|
|
421
|
-
const harness = new TwoServerHarness();
|
|
422
|
-
await harness.setup();
|
|
423
|
-
|
|
424
|
-
assert.ok(harness.agentA, 'Agent A should exist');
|
|
425
|
-
assert.ok(harness.agentB, 'Agent B should exist');
|
|
426
|
-
assert.ok(harness.agentA.port, 'Agent A should have a port');
|
|
427
|
-
assert.ok(harness.agentB.port, 'Agent B should have a port');
|
|
428
|
-
assert.ok(harness.agentA.port !== harness.agentB.port, 'Ports should differ');
|
|
429
|
-
|
|
430
|
-
// Both should respond to ping
|
|
431
|
-
const http = require('http');
|
|
432
|
-
const pingA = await httpGet(`http://127.0.0.1:${harness.agentA.port}/api/a2a/ping`);
|
|
433
|
-
assert.ok(pingA.pong, 'Agent A should respond to ping');
|
|
434
|
-
|
|
435
|
-
const pingB = await httpGet(`http://127.0.0.1:${harness.agentB.port}/api/a2a/ping`);
|
|
436
|
-
assert.ok(pingB.pong, 'Agent B should respond to ping');
|
|
437
|
-
|
|
438
|
-
await harness.teardown();
|
|
439
|
-
});
|
|
440
|
-
|
|
441
|
-
test('TwoServerHarness provides token stores for each agent', async () => {
|
|
442
|
-
const harness = new TwoServerHarness();
|
|
443
|
-
await harness.setup();
|
|
444
|
-
|
|
445
|
-
// Create token on Agent A
|
|
446
|
-
const { token } = harness.agentA.tokenStore.create({ name: 'TestToken' });
|
|
447
|
-
assert.match(token, /^fed_/);
|
|
448
|
-
|
|
449
|
-
// Token should NOT exist on Agent B
|
|
450
|
-
const validation = harness.agentB.tokenStore.validate(token);
|
|
451
|
-
assert.equal(validation.valid, false);
|
|
452
|
-
|
|
453
|
-
await harness.teardown();
|
|
454
|
-
});
|
|
455
|
-
|
|
456
|
-
// Helper to make GET request
|
|
457
|
-
function httpGet(url) {
|
|
458
|
-
const http = require('http');
|
|
459
|
-
return new Promise((resolve, reject) => {
|
|
460
|
-
http.get(url, (res) => {
|
|
461
|
-
let data = '';
|
|
462
|
-
res.on('data', chunk => data += chunk);
|
|
463
|
-
res.on('end', () => {
|
|
464
|
-
try { resolve(JSON.parse(data)); }
|
|
465
|
-
catch { resolve(data); }
|
|
466
|
-
});
|
|
467
|
-
}).on('error', reject);
|
|
468
|
-
});
|
|
469
|
-
}
|
|
470
|
-
};
|
|
471
|
-
```
|
|
472
|
-
|
|
473
|
-
**Step 2: Run test to verify it fails**
|
|
474
|
-
|
|
475
|
-
Run: `node test/run.js --filter "TwoServerHarness"`
|
|
476
|
-
Expected: FAIL — module not found
|
|
477
|
-
|
|
478
|
-
**Step 3: Write minimal implementation**
|
|
479
|
-
|
|
480
|
-
```javascript
|
|
481
|
-
// test/e2e/two-server.js
|
|
482
|
-
const { createE2EEnv } = require('./env');
|
|
483
|
-
const path = require('path');
|
|
484
|
-
|
|
485
|
-
/**
|
|
486
|
-
* Starts two independent A2A servers on ephemeral ports,
|
|
487
|
-
* each with their own config directory and token store.
|
|
488
|
-
*
|
|
489
|
-
* This simulates two separate agents that can exchange
|
|
490
|
-
* invites and call each other over HTTP.
|
|
491
|
-
*/
|
|
492
|
-
class TwoServerHarness {
|
|
493
|
-
constructor(options = {}) {
|
|
494
|
-
this.agentA = null;
|
|
495
|
-
this.agentB = null;
|
|
496
|
-
this.handleMessageA = options.handleMessageA || defaultHandler('AgentA');
|
|
497
|
-
this.handleMessageB = options.handleMessageB || defaultHandler('AgentB');
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
async setup() {
|
|
501
|
-
this.agentA = await this._startAgent('agent-a', this.handleMessageA);
|
|
502
|
-
this.agentB = await this._startAgent('agent-b', this.handleMessageB);
|
|
503
|
-
}
|
|
504
|
-
|
|
505
|
-
async _startAgent(name, handleMessage) {
|
|
506
|
-
const env = createE2EEnv(`e2e-${name}`);
|
|
507
|
-
const port = await env.findAvailablePort();
|
|
508
|
-
|
|
509
|
-
// Fresh requires to get isolated instances
|
|
510
|
-
delete require.cache[require.resolve('../../src/lib/tokens')];
|
|
511
|
-
delete require.cache[require.resolve('../../src/routes/a2a')];
|
|
512
|
-
|
|
513
|
-
const express = require('express');
|
|
514
|
-
const { TokenStore } = require('../../src/lib/tokens');
|
|
515
|
-
const { createRoutes } = require('../../src/routes/a2a');
|
|
516
|
-
|
|
517
|
-
const tokenStore = new TokenStore(env.configDir);
|
|
518
|
-
const app = express();
|
|
519
|
-
app.use(express.json({ limit: '100kb' }));
|
|
520
|
-
|
|
521
|
-
app.use('/api/a2a', createRoutes({
|
|
522
|
-
tokenStore,
|
|
523
|
-
handleMessage,
|
|
524
|
-
notifyOwner: () => Promise.resolve()
|
|
525
|
-
}));
|
|
526
|
-
|
|
527
|
-
const server = await new Promise((resolve) => {
|
|
528
|
-
const s = app.listen(port, '127.0.0.1', () => resolve(s));
|
|
529
|
-
});
|
|
530
|
-
|
|
531
|
-
return {
|
|
532
|
-
name,
|
|
533
|
-
port,
|
|
534
|
-
env,
|
|
535
|
-
tokenStore,
|
|
536
|
-
app,
|
|
537
|
-
server,
|
|
538
|
-
hostname: `127.0.0.1:${port}`,
|
|
539
|
-
inviteBase: `a2a://127.0.0.1:${port}`
|
|
540
|
-
};
|
|
541
|
-
}
|
|
542
|
-
|
|
543
|
-
async teardown() {
|
|
544
|
-
if (this.agentA) {
|
|
545
|
-
await closeServer(this.agentA.server);
|
|
546
|
-
this.agentA.env.cleanup();
|
|
547
|
-
}
|
|
548
|
-
if (this.agentB) {
|
|
549
|
-
await closeServer(this.agentB.server);
|
|
550
|
-
this.agentB.env.cleanup();
|
|
551
|
-
}
|
|
552
|
-
}
|
|
553
|
-
}
|
|
554
|
-
|
|
555
|
-
function defaultHandler(name) {
|
|
556
|
-
return async function (message, context) {
|
|
557
|
-
return {
|
|
558
|
-
text: `${name} received: ${message.slice(0, 100)}`,
|
|
559
|
-
canContinue: true
|
|
560
|
-
};
|
|
561
|
-
};
|
|
562
|
-
}
|
|
563
|
-
|
|
564
|
-
function closeServer(server) {
|
|
565
|
-
return new Promise((resolve) => {
|
|
566
|
-
if (server) server.close(resolve);
|
|
567
|
-
else resolve();
|
|
568
|
-
});
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
module.exports = { TwoServerHarness };
|
|
572
|
-
```
|
|
573
|
-
|
|
574
|
-
**Step 4: Run test to verify it passes**
|
|
575
|
-
|
|
576
|
-
Run: `node test/run.js --filter "TwoServerHarness"`
|
|
577
|
-
Expected: PASS (both tests)
|
|
578
|
-
|
|
579
|
-
**Step 5: Commit**
|
|
580
|
-
|
|
581
|
-
```bash
|
|
582
|
-
git add test/e2e/two-server.js test/e2e/two-server.test.js
|
|
583
|
-
git commit -m "feat(e2e): add two-server harness for cross-agent testing"
|
|
584
|
-
```
|
|
585
|
-
|
|
586
|
-
---
|
|
587
|
-
|
|
588
|
-
### Task 4: Write the full E2E install + onboard + invite test
|
|
589
|
-
|
|
590
|
-
**Files:**
|
|
591
|
-
- Create: `test/e2e/full-flow.test.js`
|
|
592
|
-
|
|
593
|
-
This is the core test that exercises the complete user journey across two agents.
|
|
594
|
-
|
|
595
|
-
**Step 1: Write the test**
|
|
596
|
-
|
|
597
|
-
```javascript
|
|
598
|
-
// test/e2e/full-flow.test.js
|
|
599
|
-
/**
|
|
600
|
-
* Full E2E Flow Test
|
|
601
|
-
*
|
|
602
|
-
* Simulates the complete A2A user journey between two agents:
|
|
603
|
-
*
|
|
604
|
-
* 1. Both agents start with fresh environments
|
|
605
|
-
* 2. Agent A completes onboarding
|
|
606
|
-
* 3. Agent A creates an invite token
|
|
607
|
-
* 4. Agent B adds Agent A as a contact using the invite URL
|
|
608
|
-
* 5. Agent B calls Agent A via HTTP
|
|
609
|
-
* 6. Agent A responds
|
|
610
|
-
* 7. Multi-turn conversation works
|
|
611
|
-
* 8. Conversation ends cleanly
|
|
612
|
-
*/
|
|
613
|
-
module.exports = function (test, assert, helpers) {
|
|
614
|
-
const http = require('http');
|
|
615
|
-
const { TwoServerHarness } = require('./two-server');
|
|
616
|
-
|
|
617
|
-
let harness = null;
|
|
618
|
-
|
|
619
|
-
async function teardown() {
|
|
620
|
-
if (harness) await harness.teardown();
|
|
621
|
-
harness = null;
|
|
622
|
-
}
|
|
623
|
-
|
|
624
|
-
// ── Full Flow: Onboard → Create Token → Invite → Call ──
|
|
625
|
-
|
|
626
|
-
test('full E2E: Agent B calls Agent A via invite URL', async () => {
|
|
627
|
-
harness = new TwoServerHarness();
|
|
628
|
-
await harness.setup();
|
|
629
|
-
|
|
630
|
-
const agentA = harness.agentA;
|
|
631
|
-
const agentB = harness.agentB;
|
|
632
|
-
|
|
633
|
-
// Step 1: Agent A creates a token for Agent B
|
|
634
|
-
const { token, record } = agentA.tokenStore.create({
|
|
635
|
-
name: 'AgentB-Access',
|
|
636
|
-
permissions: 'public',
|
|
637
|
-
expires: '1h',
|
|
638
|
-
maxCalls: 10,
|
|
639
|
-
allowedTopics: ['testing', 'automation']
|
|
640
|
-
});
|
|
641
|
-
|
|
642
|
-
assert.match(token, /^fed_/, 'Token should start with fed_');
|
|
643
|
-
assert.equal(record.name, 'AgentB-Access');
|
|
644
|
-
assert.equal(record.tier, 'public');
|
|
645
|
-
|
|
646
|
-
// Step 2: Construct invite URL
|
|
647
|
-
const inviteUrl = `${agentA.inviteBase}/${token}`;
|
|
648
|
-
assert.match(inviteUrl, /^a2a:\/\//, 'Invite should be a2a:// URL');
|
|
649
|
-
|
|
650
|
-
// Step 3: Agent B adds Agent A as a contact
|
|
651
|
-
agentB.tokenStore.addContact(inviteUrl, {
|
|
652
|
-
name: 'AgentA',
|
|
653
|
-
notes: 'E2E test partner'
|
|
654
|
-
});
|
|
655
|
-
|
|
656
|
-
const contacts = agentB.tokenStore.listContacts();
|
|
657
|
-
assert.equal(contacts.length, 1);
|
|
658
|
-
assert.equal(contacts[0].name, 'AgentA');
|
|
659
|
-
|
|
660
|
-
// Step 4: Agent B retrieves the stored token and calls Agent A
|
|
661
|
-
const contact = agentB.tokenStore.getContact('AgentA');
|
|
662
|
-
assert.equal(contact.host, agentA.hostname);
|
|
663
|
-
assert.equal(contact.token, token);
|
|
664
|
-
|
|
665
|
-
// Step 5: Make the actual HTTP call (Agent B → Agent A)
|
|
666
|
-
const callResult = await httpPost(
|
|
667
|
-
`http://${agentA.hostname}/api/a2a/invoke`,
|
|
668
|
-
{
|
|
669
|
-
message: 'Hello Agent A, this is Agent B calling.',
|
|
670
|
-
caller: { name: 'AgentB', owner: 'E2E Test' }
|
|
671
|
-
},
|
|
672
|
-
{ Authorization: `Bearer ${token}` }
|
|
673
|
-
);
|
|
674
|
-
|
|
675
|
-
assert.equal(callResult.statusCode, 200);
|
|
676
|
-
assert.ok(callResult.body.success);
|
|
677
|
-
assert.match(callResult.body.conversation_id, /^conv_/);
|
|
678
|
-
assert.ok(callResult.body.response.includes('AgentA received'));
|
|
679
|
-
assert.equal(callResult.body.can_continue, true);
|
|
680
|
-
assert.equal(callResult.body.tokens_remaining, 9);
|
|
681
|
-
|
|
682
|
-
// Step 6: Multi-turn — send follow-up on same conversation
|
|
683
|
-
const followUp = await httpPost(
|
|
684
|
-
`http://${agentA.hostname}/api/a2a/invoke`,
|
|
685
|
-
{
|
|
686
|
-
message: 'Follow-up question from Agent B.',
|
|
687
|
-
conversation_id: callResult.body.conversation_id,
|
|
688
|
-
caller: { name: 'AgentB', owner: 'E2E Test' }
|
|
689
|
-
},
|
|
690
|
-
{ Authorization: `Bearer ${token}` }
|
|
691
|
-
);
|
|
692
|
-
|
|
693
|
-
assert.equal(followUp.statusCode, 200);
|
|
694
|
-
assert.ok(followUp.body.success);
|
|
695
|
-
assert.equal(followUp.body.conversation_id, callResult.body.conversation_id);
|
|
696
|
-
assert.equal(followUp.body.tokens_remaining, 8);
|
|
697
|
-
|
|
698
|
-
// Step 7: Verify token usage was tracked
|
|
699
|
-
const tokenRecord = agentA.tokenStore.findById(record.id);
|
|
700
|
-
assert.equal(tokenRecord.calls_made, 2);
|
|
701
|
-
|
|
702
|
-
await teardown();
|
|
703
|
-
});
|
|
704
|
-
|
|
705
|
-
test('full E2E: bidirectional — both agents exchange invites', async () => {
|
|
706
|
-
harness = new TwoServerHarness();
|
|
707
|
-
await harness.setup();
|
|
708
|
-
|
|
709
|
-
const agentA = harness.agentA;
|
|
710
|
-
const agentB = harness.agentB;
|
|
711
|
-
|
|
712
|
-
// Agent A creates token for B
|
|
713
|
-
const tokenAtoB = agentA.tokenStore.create({
|
|
714
|
-
name: 'ForAgentB', permissions: 'friends', maxCalls: 5
|
|
715
|
-
});
|
|
716
|
-
|
|
717
|
-
// Agent B creates token for A
|
|
718
|
-
const tokenBtoA = agentB.tokenStore.create({
|
|
719
|
-
name: 'ForAgentA', permissions: 'public', maxCalls: 5
|
|
720
|
-
});
|
|
721
|
-
|
|
722
|
-
// Exchange invites
|
|
723
|
-
const inviteA = `${agentA.inviteBase}/${tokenAtoB.token}`;
|
|
724
|
-
const inviteB = `${agentB.inviteBase}/${tokenBtoA.token}`;
|
|
725
|
-
|
|
726
|
-
agentB.tokenStore.addContact(inviteA, { name: 'AgentA' });
|
|
727
|
-
agentA.tokenStore.addContact(inviteB, { name: 'AgentB' });
|
|
728
|
-
|
|
729
|
-
// B calls A
|
|
730
|
-
const resBA = await httpPost(
|
|
731
|
-
`http://${agentA.hostname}/api/a2a/invoke`,
|
|
732
|
-
{ message: 'B calling A', caller: { name: 'AgentB' } },
|
|
733
|
-
{ Authorization: `Bearer ${tokenAtoB.token}` }
|
|
734
|
-
);
|
|
735
|
-
assert.equal(resBA.statusCode, 200);
|
|
736
|
-
assert.ok(resBA.body.success);
|
|
737
|
-
|
|
738
|
-
// A calls B
|
|
739
|
-
const resAB = await httpPost(
|
|
740
|
-
`http://${agentB.hostname}/api/a2a/invoke`,
|
|
741
|
-
{ message: 'A calling B', caller: { name: 'AgentA' } },
|
|
742
|
-
{ Authorization: `Bearer ${tokenBtoA.token}` }
|
|
743
|
-
);
|
|
744
|
-
assert.equal(resAB.statusCode, 200);
|
|
745
|
-
assert.ok(resAB.body.success);
|
|
746
|
-
|
|
747
|
-
await teardown();
|
|
748
|
-
});
|
|
749
|
-
|
|
750
|
-
test('full E2E: revoked token rejected mid-conversation', async () => {
|
|
751
|
-
harness = new TwoServerHarness();
|
|
752
|
-
await harness.setup();
|
|
753
|
-
|
|
754
|
-
const { token, record } = harness.agentA.tokenStore.create({
|
|
755
|
-
name: 'Revocable', maxCalls: 10
|
|
756
|
-
});
|
|
757
|
-
|
|
758
|
-
// First call succeeds
|
|
759
|
-
const res1 = await httpPost(
|
|
760
|
-
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
761
|
-
{ message: 'First call', caller: { name: 'Tester' } },
|
|
762
|
-
{ Authorization: `Bearer ${token}` }
|
|
763
|
-
);
|
|
764
|
-
assert.equal(res1.statusCode, 200);
|
|
765
|
-
|
|
766
|
-
// Revoke the token
|
|
767
|
-
harness.agentA.tokenStore.revoke(record.id);
|
|
768
|
-
|
|
769
|
-
// Second call rejected
|
|
770
|
-
const res2 = await httpPost(
|
|
771
|
-
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
772
|
-
{ message: 'After revoke', caller: { name: 'Tester' } },
|
|
773
|
-
{ Authorization: `Bearer ${token}` }
|
|
774
|
-
);
|
|
775
|
-
assert.equal(res2.statusCode, 401);
|
|
776
|
-
assert.equal(res2.body.error, 'unauthorized');
|
|
777
|
-
|
|
778
|
-
await teardown();
|
|
779
|
-
});
|
|
780
|
-
|
|
781
|
-
test('full E2E: expired token rejected', async () => {
|
|
782
|
-
harness = new TwoServerHarness();
|
|
783
|
-
await harness.setup();
|
|
784
|
-
|
|
785
|
-
// Create token that expires immediately (1ms)
|
|
786
|
-
const { token } = harness.agentA.tokenStore.create({
|
|
787
|
-
name: 'ShortLived', expires: '1ms'
|
|
788
|
-
});
|
|
789
|
-
|
|
790
|
-
// Wait for expiry
|
|
791
|
-
await new Promise(r => setTimeout(r, 50));
|
|
792
|
-
|
|
793
|
-
const res = await httpPost(
|
|
794
|
-
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
795
|
-
{ message: 'Too late', caller: { name: 'Tester' } },
|
|
796
|
-
{ Authorization: `Bearer ${token}` }
|
|
797
|
-
);
|
|
798
|
-
assert.equal(res.statusCode, 401);
|
|
799
|
-
assert.equal(res.body.error, 'unauthorized');
|
|
800
|
-
|
|
801
|
-
await teardown();
|
|
802
|
-
});
|
|
803
|
-
|
|
804
|
-
test('full E2E: max calls enforcement across multi-turn', async () => {
|
|
805
|
-
harness = new TwoServerHarness();
|
|
806
|
-
await harness.setup();
|
|
807
|
-
|
|
808
|
-
const { token } = harness.agentA.tokenStore.create({
|
|
809
|
-
name: 'LimitedCalls', maxCalls: 2
|
|
810
|
-
});
|
|
811
|
-
|
|
812
|
-
// Call 1 OK
|
|
813
|
-
const r1 = await httpPost(
|
|
814
|
-
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
815
|
-
{ message: 'Call 1', caller: { name: 'Tester' } },
|
|
816
|
-
{ Authorization: `Bearer ${token}` }
|
|
817
|
-
);
|
|
818
|
-
assert.equal(r1.statusCode, 200);
|
|
819
|
-
|
|
820
|
-
// Call 2 OK
|
|
821
|
-
const r2 = await httpPost(
|
|
822
|
-
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
823
|
-
{ message: 'Call 2', caller: { name: 'Tester' } },
|
|
824
|
-
{ Authorization: `Bearer ${token}` }
|
|
825
|
-
);
|
|
826
|
-
assert.equal(r2.statusCode, 200);
|
|
827
|
-
|
|
828
|
-
// Call 3 rejected
|
|
829
|
-
const r3 = await httpPost(
|
|
830
|
-
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
831
|
-
{ message: 'Call 3', caller: { name: 'Tester' } },
|
|
832
|
-
{ Authorization: `Bearer ${token}` }
|
|
833
|
-
);
|
|
834
|
-
assert.equal(r3.statusCode, 401);
|
|
835
|
-
|
|
836
|
-
await teardown();
|
|
837
|
-
});
|
|
838
|
-
|
|
839
|
-
// ── HTTP helper ──
|
|
840
|
-
function httpPost(url, body, headers = {}) {
|
|
841
|
-
const urlObj = new URL(url);
|
|
842
|
-
return new Promise((resolve, reject) => {
|
|
843
|
-
const data = JSON.stringify(body);
|
|
844
|
-
const req = http.request({
|
|
845
|
-
hostname: urlObj.hostname,
|
|
846
|
-
port: urlObj.port,
|
|
847
|
-
path: urlObj.pathname,
|
|
848
|
-
method: 'POST',
|
|
849
|
-
headers: {
|
|
850
|
-
'Content-Type': 'application/json',
|
|
851
|
-
'Content-Length': Buffer.byteLength(data),
|
|
852
|
-
...headers
|
|
853
|
-
}
|
|
854
|
-
}, (res) => {
|
|
855
|
-
let responseData = '';
|
|
856
|
-
res.on('data', chunk => responseData += chunk);
|
|
857
|
-
res.on('end', () => {
|
|
858
|
-
let parsed;
|
|
859
|
-
try { parsed = JSON.parse(responseData); } catch { parsed = responseData; }
|
|
860
|
-
resolve({ statusCode: res.statusCode, headers: res.headers, body: parsed });
|
|
861
|
-
});
|
|
862
|
-
});
|
|
863
|
-
req.on('error', reject);
|
|
864
|
-
req.write(data);
|
|
865
|
-
req.end();
|
|
866
|
-
});
|
|
867
|
-
}
|
|
868
|
-
};
|
|
869
|
-
```
|
|
870
|
-
|
|
871
|
-
**Step 2: Run test to verify it passes**
|
|
872
|
-
|
|
873
|
-
Run: `node test/run.js --filter "full E2E"`
|
|
874
|
-
Expected: PASS (all 5 tests)
|
|
875
|
-
|
|
876
|
-
**Step 3: Commit**
|
|
877
|
-
|
|
878
|
-
```bash
|
|
879
|
-
git add test/e2e/full-flow.test.js
|
|
880
|
-
git commit -m "feat(e2e): add full flow tests — onboard, invite, cross-agent call"
|
|
881
|
-
```
|
|
882
|
-
|
|
883
|
-
---
|
|
884
|
-
|
|
885
|
-
## Phase 3: Agent Prompt Sequence & Report
|
|
886
|
-
|
|
887
|
-
### Task 5: Create the E2E test agent prompt sequence
|
|
888
|
-
|
|
889
|
-
**Files:**
|
|
890
|
-
- Create: `docs/prompts/e2e-test-agent.md`
|
|
891
|
-
|
|
892
|
-
This is the prompt document that an AI orchestrator gives to a subagent. The subagent follows these steps to test the A2A system.
|
|
893
|
-
|
|
894
|
-
**Step 1: Write the prompt document**
|
|
895
|
-
|
|
896
|
-
```markdown
|
|
897
|
-
# A2A E2E Test Agent — Prompt Sequence
|
|
898
|
-
|
|
899
|
-
You are an E2E test agent for the `a2acalling` npm package. Your job is to verify
|
|
900
|
-
that a fresh install, onboarding, and invite flow all work correctly.
|
|
901
|
-
|
|
902
|
-
## Your Environment
|
|
903
|
-
|
|
904
|
-
You have been given a clean working directory. You will:
|
|
905
|
-
1. Install `a2acalling` from npm (or use a local tarball if provided)
|
|
906
|
-
2. Run through the full onboarding flow
|
|
907
|
-
3. Create tokens and test the invite flow
|
|
908
|
-
4. Verify the server responds correctly
|
|
909
|
-
5. Report all findings
|
|
910
|
-
|
|
911
|
-
## Pre-Flight
|
|
912
|
-
|
|
913
|
-
Before starting, verify:
|
|
914
|
-
- [ ] Node.js >= 18 is available (`node --version`)
|
|
915
|
-
- [ ] npm is available (`npm --version`)
|
|
916
|
-
- [ ] Working directory is clean and writable
|
|
917
|
-
- [ ] No existing A2A config (`ls ~/.config/openclaw/` should not exist or be empty)
|
|
918
|
-
|
|
919
|
-
If any pre-flight check fails, report the failure and stop.
|
|
920
|
-
|
|
921
|
-
## Step 1: Install a2acalling
|
|
922
|
-
|
|
923
|
-
```bash
|
|
924
|
-
npm install -g a2acalling
|
|
925
|
-
```
|
|
926
|
-
|
|
927
|
-
**Expected:**
|
|
928
|
-
- Exit code 0
|
|
929
|
-
- `a2a` command is now available
|
|
930
|
-
- `a2a --version` prints a version number
|
|
931
|
-
|
|
932
|
-
**Report if:** Install fails, postinstall errors, command not found after install.
|
|
933
|
-
|
|
934
|
-
## Step 2: Run Quickstart (Onboarding)
|
|
935
|
-
|
|
936
|
-
```bash
|
|
937
|
-
a2a quickstart
|
|
938
|
-
```
|
|
939
|
-
|
|
940
|
-
**Expected:**
|
|
941
|
-
- Step 1: Port detection — finds an available port (3001-3020)
|
|
942
|
-
- Step 2: Server starts on the detected port
|
|
943
|
-
- Step 3: Disclosure prompt appears — asking for topics, objectives, personality
|
|
944
|
-
- The agent should be in `awaiting_disclosure` state
|
|
945
|
-
|
|
946
|
-
**Then submit disclosure:**
|
|
947
|
-
|
|
948
|
-
```bash
|
|
949
|
-
a2a quickstart --submit '{
|
|
950
|
-
"tiers": {
|
|
951
|
-
"public": {
|
|
952
|
-
"topics": [{"topic": "Testing", "description": "Automated system testing"}],
|
|
953
|
-
"objectives": [{"objective": "Verify install", "description": "Confirm the package works"}],
|
|
954
|
-
"do_not_discuss": []
|
|
955
|
-
},
|
|
956
|
-
"friends": {"topics": [], "objectives": [], "do_not_discuss": []},
|
|
957
|
-
"family": {"topics": [], "objectives": [], "do_not_discuss": []}
|
|
958
|
-
},
|
|
959
|
-
"never_disclose": ["Test secrets"],
|
|
960
|
-
"personality_notes": "Direct and methodical test agent"
|
|
961
|
-
}'
|
|
962
|
-
```
|
|
963
|
-
|
|
964
|
-
**Expected:**
|
|
965
|
-
- "Disclosure manifest saved"
|
|
966
|
-
- "Onboarding complete"
|
|
967
|
-
- Step numbers are sequential (no duplicates)
|
|
968
|
-
- Config file exists at `~/.config/openclaw/a2a-config.json` with `onboarding.step === 'complete'`
|
|
969
|
-
- First invite URL is generated (`a2a://hostname/fed_...`)
|
|
970
|
-
|
|
971
|
-
**Report if:** Onboarding hangs, step numbers are wrong, manifest not saved, invite not generated.
|
|
972
|
-
|
|
973
|
-
## Step 3: Verify Server Health
|
|
974
|
-
|
|
975
|
-
```bash
|
|
976
|
-
a2a ping a2a://localhost:<port>/test
|
|
977
|
-
```
|
|
978
|
-
|
|
979
|
-
Or directly:
|
|
980
|
-
|
|
981
|
-
```bash
|
|
982
|
-
curl http://localhost:<port>/api/a2a/ping
|
|
983
|
-
```
|
|
984
|
-
|
|
985
|
-
**Expected:** `{"pong": true, "timestamp": "..."}`
|
|
986
|
-
|
|
987
|
-
```bash
|
|
988
|
-
curl http://localhost:<port>/api/a2a/status
|
|
989
|
-
```
|
|
990
|
-
|
|
991
|
-
**Expected:** `{"a2a": true, "version": "...", "capabilities": ["invoke", "multi-turn", ...]}`
|
|
992
|
-
|
|
993
|
-
**Report if:** Server not running, ping fails, status missing expected fields.
|
|
994
|
-
|
|
995
|
-
## Step 4: Create Invite Token
|
|
996
|
-
|
|
997
|
-
```bash
|
|
998
|
-
a2a create --name "E2E-Tester" --tier public --expires 1h --max-calls 20
|
|
999
|
-
```
|
|
1000
|
-
|
|
1001
|
-
**Expected:**
|
|
1002
|
-
- Token created successfully
|
|
1003
|
-
- Invite URL printed: `a2a://hostname/fed_...`
|
|
1004
|
-
- Token appears in `a2a list`
|
|
1005
|
-
|
|
1006
|
-
**Report if:** Token creation fails, URL format wrong, not in list.
|
|
1007
|
-
|
|
1008
|
-
## Step 5: Test Inbound Call
|
|
1009
|
-
|
|
1010
|
-
Using the invite URL from Step 4, make a direct HTTP call:
|
|
1011
|
-
|
|
1012
|
-
```bash
|
|
1013
|
-
TOKEN="<token from step 4>"
|
|
1014
|
-
PORT="<port from step 2>"
|
|
1015
|
-
curl -X POST http://localhost:$PORT/api/a2a/invoke \
|
|
1016
|
-
-H "Content-Type: application/json" \
|
|
1017
|
-
-H "Authorization: Bearer $TOKEN" \
|
|
1018
|
-
-d '{"message": "Hello from E2E test agent", "caller": {"name": "E2E-Tester", "owner": "Automated Test"}}'
|
|
1019
|
-
```
|
|
1020
|
-
|
|
1021
|
-
**Expected Response:**
|
|
1022
|
-
```json
|
|
1023
|
-
{
|
|
1024
|
-
"success": true,
|
|
1025
|
-
"conversation_id": "conv_...",
|
|
1026
|
-
"response": "...",
|
|
1027
|
-
"can_continue": true,
|
|
1028
|
-
"tokens_remaining": 19
|
|
1029
|
-
}
|
|
1030
|
-
```
|
|
1031
|
-
|
|
1032
|
-
**Report if:** 401 unauthorized, 500 error, missing conversation_id, unexpected response shape.
|
|
1033
|
-
|
|
1034
|
-
## Step 6: Test Multi-Turn Conversation
|
|
1035
|
-
|
|
1036
|
-
Using the `conversation_id` from Step 5:
|
|
1037
|
-
|
|
1038
|
-
```bash
|
|
1039
|
-
curl -X POST http://localhost:$PORT/api/a2a/invoke \
|
|
1040
|
-
-H "Content-Type: application/json" \
|
|
1041
|
-
-H "Authorization: Bearer $TOKEN" \
|
|
1042
|
-
-d '{"message": "Follow-up message", "conversation_id": "<conv_id>", "caller": {"name": "E2E-Tester"}}'
|
|
1043
|
-
```
|
|
1044
|
-
|
|
1045
|
-
**Expected:**
|
|
1046
|
-
- Same `conversation_id` returned
|
|
1047
|
-
- `tokens_remaining` decremented by 1
|
|
1048
|
-
- `can_continue` is true
|
|
1049
|
-
|
|
1050
|
-
**Report if:** Conversation ID changes, token count wrong, can_continue unexpected.
|
|
1051
|
-
|
|
1052
|
-
## Step 7: Test Error Cases
|
|
1053
|
-
|
|
1054
|
-
### 7a. No Authorization
|
|
1055
|
-
```bash
|
|
1056
|
-
curl -X POST http://localhost:$PORT/api/a2a/invoke \
|
|
1057
|
-
-H "Content-Type: application/json" \
|
|
1058
|
-
-d '{"message": "No auth"}'
|
|
1059
|
-
```
|
|
1060
|
-
**Expected:** 401, `{"error": "missing_token"}`
|
|
1061
|
-
|
|
1062
|
-
### 7b. Invalid Token
|
|
1063
|
-
```bash
|
|
1064
|
-
curl -X POST http://localhost:$PORT/api/a2a/invoke \
|
|
1065
|
-
-H "Content-Type: application/json" \
|
|
1066
|
-
-H "Authorization: Bearer fed_invalid_garbage" \
|
|
1067
|
-
-d '{"message": "Bad token"}'
|
|
1068
|
-
```
|
|
1069
|
-
**Expected:** 401, `{"error": "unauthorized"}`
|
|
1070
|
-
|
|
1071
|
-
### 7c. Missing Message
|
|
1072
|
-
```bash
|
|
1073
|
-
curl -X POST http://localhost:$PORT/api/a2a/invoke \
|
|
1074
|
-
-H "Content-Type: application/json" \
|
|
1075
|
-
-H "Authorization: Bearer $TOKEN" \
|
|
1076
|
-
-d '{}'
|
|
1077
|
-
```
|
|
1078
|
-
**Expected:** 400, `{"error": "missing_message"}`
|
|
1079
|
-
|
|
1080
|
-
**Report if:** Any error case returns unexpected status code or error format.
|
|
1081
|
-
|
|
1082
|
-
## Step 8: Token Revocation
|
|
1083
|
-
|
|
1084
|
-
```bash
|
|
1085
|
-
# Get token ID from list
|
|
1086
|
-
a2a list
|
|
1087
|
-
|
|
1088
|
-
# Revoke it
|
|
1089
|
-
a2a revoke <token_id>
|
|
1090
|
-
|
|
1091
|
-
# Verify call fails
|
|
1092
|
-
curl -X POST http://localhost:$PORT/api/a2a/invoke \
|
|
1093
|
-
-H "Content-Type: application/json" \
|
|
1094
|
-
-H "Authorization: Bearer $TOKEN" \
|
|
1095
|
-
-d '{"message": "Should fail"}'
|
|
1096
|
-
```
|
|
1097
|
-
|
|
1098
|
-
**Expected:** 401 after revocation.
|
|
1099
|
-
|
|
1100
|
-
**Report if:** Revoked token still works.
|
|
1101
|
-
|
|
1102
|
-
## Step 9: Cleanup
|
|
1103
|
-
|
|
1104
|
-
```bash
|
|
1105
|
-
a2a uninstall
|
|
1106
|
-
```
|
|
1107
|
-
|
|
1108
|
-
**Expected:** Server stopped, config files removed.
|
|
1109
|
-
|
|
1110
|
-
## Reporting Format
|
|
1111
|
-
|
|
1112
|
-
After completing all steps, produce a report in this format:
|
|
1113
|
-
|
|
1114
|
-
```markdown
|
|
1115
|
-
# A2A E2E Test Report
|
|
1116
|
-
|
|
1117
|
-
**Date:** YYYY-MM-DD HH:MM:SS
|
|
1118
|
-
**Package Version:** x.y.z
|
|
1119
|
-
**Node Version:** vXX.X.X
|
|
1120
|
-
**Platform:** linux/darwin/win32
|
|
1121
|
-
|
|
1122
|
-
## Results
|
|
1123
|
-
|
|
1124
|
-
| Step | Name | Status | Notes |
|
|
1125
|
-
|------|------|--------|-------|
|
|
1126
|
-
| 1 | Install | PASS/FAIL | ... |
|
|
1127
|
-
| 2 | Onboarding | PASS/FAIL | ... |
|
|
1128
|
-
| 3 | Server Health | PASS/FAIL | ... |
|
|
1129
|
-
| 4 | Create Token | PASS/FAIL | ... |
|
|
1130
|
-
| 5 | Inbound Call | PASS/FAIL | ... |
|
|
1131
|
-
| 6 | Multi-Turn | PASS/FAIL | ... |
|
|
1132
|
-
| 7 | Error Cases | PASS/FAIL | ... |
|
|
1133
|
-
| 8 | Revocation | PASS/FAIL | ... |
|
|
1134
|
-
| 9 | Cleanup | PASS/FAIL | ... |
|
|
1135
|
-
|
|
1136
|
-
## Issues Found
|
|
1137
|
-
|
|
1138
|
-
### Issue 1: [Title]
|
|
1139
|
-
**Step:** N
|
|
1140
|
-
**Severity:** critical/high/medium/low
|
|
1141
|
-
**Description:** What happened
|
|
1142
|
-
**Expected:** What should have happened
|
|
1143
|
-
**Actual:** What actually happened
|
|
1144
|
-
**Reproduction:** Exact commands to reproduce
|
|
1145
|
-
```
|
|
1146
|
-
|
|
1147
|
-
For each issue found, the orchestrator should create a Linear ticket
|
|
1148
|
-
with the "Todo" status on the "a2a calling" team, labeled "Bug" and "E2E".
|
|
1149
|
-
```
|
|
1150
|
-
|
|
1151
|
-
**Step 2: Commit**
|
|
1152
|
-
|
|
1153
|
-
```bash
|
|
1154
|
-
git add docs/prompts/e2e-test-agent.md
|
|
1155
|
-
git commit -m "docs: add E2E test agent prompt sequence"
|
|
1156
|
-
```
|
|
1157
|
-
|
|
1158
|
-
---
|
|
1159
|
-
|
|
1160
|
-
### Task 6: Create report generator with Linear integration
|
|
1161
|
-
|
|
1162
|
-
**Files:**
|
|
1163
|
-
- Create: `test/e2e/report.js`
|
|
1164
|
-
- Test: `test/e2e/report.test.js`
|
|
1165
|
-
|
|
1166
|
-
**Step 1: Write the failing test**
|
|
1167
|
-
|
|
1168
|
-
```javascript
|
|
1169
|
-
// test/e2e/report.test.js
|
|
1170
|
-
module.exports = function (test, assert, helpers) {
|
|
1171
|
-
const { E2EReport } = require('./report');
|
|
1172
|
-
|
|
1173
|
-
test('E2EReport tracks step results', () => {
|
|
1174
|
-
const report = new E2EReport({ version: '0.6.44', nodeVersion: 'v20.0.0' });
|
|
1175
|
-
|
|
1176
|
-
report.pass(1, 'Install', 'Installed successfully');
|
|
1177
|
-
report.pass(2, 'Onboarding', 'Completed in 3s');
|
|
1178
|
-
report.fail(3, 'Server Health', 'Ping returned 500', {
|
|
1179
|
-
expected: '200 with pong',
|
|
1180
|
-
actual: '500 internal error',
|
|
1181
|
-
severity: 'critical'
|
|
1182
|
-
});
|
|
1183
|
-
|
|
1184
|
-
assert.equal(report.results.length, 3);
|
|
1185
|
-
assert.equal(report.passed, 2);
|
|
1186
|
-
assert.equal(report.failed, 1);
|
|
1187
|
-
assert.equal(report.issues.length, 1);
|
|
1188
|
-
assert.equal(report.issues[0].step, 3);
|
|
1189
|
-
assert.equal(report.issues[0].severity, 'critical');
|
|
1190
|
-
});
|
|
1191
|
-
|
|
1192
|
-
test('E2EReport generates markdown', () => {
|
|
1193
|
-
const report = new E2EReport({ version: '0.6.44', nodeVersion: 'v20.0.0' });
|
|
1194
|
-
|
|
1195
|
-
report.pass(1, 'Install', 'OK');
|
|
1196
|
-
report.fail(2, 'Onboarding', 'Manifest not saved', {
|
|
1197
|
-
expected: 'Manifest file created',
|
|
1198
|
-
actual: 'File missing',
|
|
1199
|
-
severity: 'high'
|
|
1200
|
-
});
|
|
1201
|
-
|
|
1202
|
-
const md = report.toMarkdown();
|
|
1203
|
-
assert.includes(md, '# A2A E2E Test Report');
|
|
1204
|
-
assert.includes(md, '0.6.44');
|
|
1205
|
-
assert.includes(md, 'PASS');
|
|
1206
|
-
assert.includes(md, 'FAIL');
|
|
1207
|
-
assert.includes(md, 'Manifest not saved');
|
|
1208
|
-
assert.includes(md, 'high');
|
|
1209
|
-
});
|
|
1210
|
-
|
|
1211
|
-
test('E2EReport generates Linear issue descriptions', () => {
|
|
1212
|
-
const report = new E2EReport({ version: '0.6.44', nodeVersion: 'v20.0.0' });
|
|
1213
|
-
|
|
1214
|
-
report.fail(5, 'Inbound Call', 'Got 500 instead of 200', {
|
|
1215
|
-
expected: '200 success',
|
|
1216
|
-
actual: '500 internal_error',
|
|
1217
|
-
severity: 'critical',
|
|
1218
|
-
reproduction: 'curl -X POST http://localhost:3001/api/a2a/invoke ...'
|
|
1219
|
-
});
|
|
1220
|
-
|
|
1221
|
-
const issues = report.toLinearIssues();
|
|
1222
|
-
assert.equal(issues.length, 1);
|
|
1223
|
-
assert.includes(issues[0].title, 'Inbound Call');
|
|
1224
|
-
assert.includes(issues[0].description, '500');
|
|
1225
|
-
assert.includes(issues[0].description, 'Reproduction');
|
|
1226
|
-
assert.equal(issues[0].priority, 1); // critical = urgent
|
|
1227
|
-
});
|
|
1228
|
-
};
|
|
1229
|
-
```
|
|
1230
|
-
|
|
1231
|
-
**Step 2: Run test to verify it fails**
|
|
1232
|
-
|
|
1233
|
-
Run: `node test/run.js --filter "E2EReport"`
|
|
1234
|
-
Expected: FAIL — module not found
|
|
1235
|
-
|
|
1236
|
-
**Step 3: Write minimal implementation**
|
|
1237
|
-
|
|
1238
|
-
```javascript
|
|
1239
|
-
// test/e2e/report.js
|
|
1240
|
-
/**
|
|
1241
|
-
* E2E Test Report Generator
|
|
1242
|
-
*
|
|
1243
|
-
* Tracks pass/fail results for each step and can output:
|
|
1244
|
-
* - Markdown summary for human review
|
|
1245
|
-
* - Linear issue descriptions for automated bug filing
|
|
1246
|
-
*/
|
|
1247
|
-
class E2EReport {
|
|
1248
|
-
constructor(meta = {}) {
|
|
1249
|
-
this.meta = {
|
|
1250
|
-
version: meta.version || 'unknown',
|
|
1251
|
-
nodeVersion: meta.nodeVersion || process.version,
|
|
1252
|
-
platform: meta.platform || process.platform,
|
|
1253
|
-
date: new Date().toISOString()
|
|
1254
|
-
};
|
|
1255
|
-
this.results = [];
|
|
1256
|
-
this.issues = [];
|
|
1257
|
-
this.passed = 0;
|
|
1258
|
-
this.failed = 0;
|
|
1259
|
-
}
|
|
1260
|
-
|
|
1261
|
-
pass(step, name, notes = '') {
|
|
1262
|
-
this.results.push({ step, name, status: 'PASS', notes });
|
|
1263
|
-
this.passed++;
|
|
1264
|
-
}
|
|
1265
|
-
|
|
1266
|
-
fail(step, name, notes, details = {}) {
|
|
1267
|
-
this.results.push({ step, name, status: 'FAIL', notes });
|
|
1268
|
-
this.failed++;
|
|
1269
|
-
this.issues.push({
|
|
1270
|
-
step,
|
|
1271
|
-
name,
|
|
1272
|
-
notes,
|
|
1273
|
-
expected: details.expected || '',
|
|
1274
|
-
actual: details.actual || '',
|
|
1275
|
-
severity: details.severity || 'medium',
|
|
1276
|
-
reproduction: details.reproduction || ''
|
|
1277
|
-
});
|
|
1278
|
-
}
|
|
1279
|
-
|
|
1280
|
-
toMarkdown() {
|
|
1281
|
-
const lines = [
|
|
1282
|
-
'# A2A E2E Test Report',
|
|
1283
|
-
'',
|
|
1284
|
-
`**Date:** ${this.meta.date}`,
|
|
1285
|
-
`**Package Version:** ${this.meta.version}`,
|
|
1286
|
-
`**Node Version:** ${this.meta.nodeVersion}`,
|
|
1287
|
-
`**Platform:** ${this.meta.platform}`,
|
|
1288
|
-
'',
|
|
1289
|
-
`## Summary: ${this.passed} passed, ${this.failed} failed`,
|
|
1290
|
-
'',
|
|
1291
|
-
'## Results',
|
|
1292
|
-
'',
|
|
1293
|
-
'| Step | Name | Status | Notes |',
|
|
1294
|
-
'|------|------|--------|-------|'
|
|
1295
|
-
];
|
|
1296
|
-
|
|
1297
|
-
for (const r of this.results) {
|
|
1298
|
-
lines.push(`| ${r.step} | ${r.name} | ${r.status} | ${r.notes} |`);
|
|
1299
|
-
}
|
|
1300
|
-
|
|
1301
|
-
if (this.issues.length > 0) {
|
|
1302
|
-
lines.push('', '## Issues Found', '');
|
|
1303
|
-
for (let i = 0; i < this.issues.length; i++) {
|
|
1304
|
-
const issue = this.issues[i];
|
|
1305
|
-
lines.push(
|
|
1306
|
-
`### Issue ${i + 1}: ${issue.name}`,
|
|
1307
|
-
`**Step:** ${issue.step}`,
|
|
1308
|
-
`**Severity:** ${issue.severity}`,
|
|
1309
|
-
`**Description:** ${issue.notes}`,
|
|
1310
|
-
`**Expected:** ${issue.expected}`,
|
|
1311
|
-
`**Actual:** ${issue.actual}`,
|
|
1312
|
-
''
|
|
1313
|
-
);
|
|
1314
|
-
if (issue.reproduction) {
|
|
1315
|
-
lines.push(`**Reproduction:**`, '```', issue.reproduction, '```', '');
|
|
1316
|
-
}
|
|
1317
|
-
}
|
|
1318
|
-
}
|
|
1319
|
-
|
|
1320
|
-
return lines.join('\n');
|
|
1321
|
-
}
|
|
1322
|
-
|
|
1323
|
-
/**
|
|
1324
|
-
* Convert issues to Linear issue format.
|
|
1325
|
-
* @returns {Array<{title, description, priority, labels}>}
|
|
1326
|
-
*/
|
|
1327
|
-
toLinearIssues() {
|
|
1328
|
-
const severityToPriority = {
|
|
1329
|
-
critical: 1, // Urgent
|
|
1330
|
-
high: 2, // High
|
|
1331
|
-
medium: 3, // Normal
|
|
1332
|
-
low: 4 // Low
|
|
1333
|
-
};
|
|
1334
|
-
|
|
1335
|
-
return this.issues.map(issue => ({
|
|
1336
|
-
title: `[E2E] Step ${issue.step}: ${issue.name} — ${issue.notes.slice(0, 60)}`,
|
|
1337
|
-
description: [
|
|
1338
|
-
`## E2E Test Failure`,
|
|
1339
|
-
'',
|
|
1340
|
-
`**Step:** ${issue.step} — ${issue.name}`,
|
|
1341
|
-
`**Severity:** ${issue.severity}`,
|
|
1342
|
-
`**Package Version:** ${this.meta.version}`,
|
|
1343
|
-
`**Node:** ${this.meta.nodeVersion}`,
|
|
1344
|
-
`**Platform:** ${this.meta.platform}`,
|
|
1345
|
-
'',
|
|
1346
|
-
`### Expected`,
|
|
1347
|
-
issue.expected,
|
|
1348
|
-
'',
|
|
1349
|
-
`### Actual`,
|
|
1350
|
-
issue.actual,
|
|
1351
|
-
'',
|
|
1352
|
-
issue.reproduction ? `### Reproduction\n\`\`\`\n${issue.reproduction}\n\`\`\`` : ''
|
|
1353
|
-
].join('\n'),
|
|
1354
|
-
priority: severityToPriority[issue.severity] || 3,
|
|
1355
|
-
labels: ['Bug', 'E2E']
|
|
1356
|
-
}));
|
|
1357
|
-
}
|
|
1358
|
-
}
|
|
1359
|
-
|
|
1360
|
-
module.exports = { E2EReport };
|
|
1361
|
-
```
|
|
1362
|
-
|
|
1363
|
-
**Step 4: Run test to verify it passes**
|
|
1364
|
-
|
|
1365
|
-
Run: `node test/run.js --filter "E2EReport"`
|
|
1366
|
-
Expected: PASS (all 3 tests)
|
|
1367
|
-
|
|
1368
|
-
**Step 5: Commit**
|
|
1369
|
-
|
|
1370
|
-
```bash
|
|
1371
|
-
git add test/e2e/report.js test/e2e/report.test.js
|
|
1372
|
-
git commit -m "feat(e2e): add report generator with Linear issue formatting"
|
|
1373
|
-
```
|
|
1374
|
-
|
|
1375
|
-
---
|
|
1376
|
-
|
|
1377
|
-
### Task 7: Create orchestrator entry point
|
|
1378
|
-
|
|
1379
|
-
**Files:**
|
|
1380
|
-
- Create: `test/e2e/orchestrate.js`
|
|
1381
|
-
|
|
1382
|
-
This is the script that ties everything together. It can be run standalone (`node test/e2e/orchestrate.js`) or invoked by a Claude agent.
|
|
1383
|
-
|
|
1384
|
-
**Step 1: Write the orchestrator**
|
|
1385
|
-
|
|
1386
|
-
```javascript
|
|
1387
|
-
#!/usr/bin/env node
|
|
1388
|
-
/**
|
|
1389
|
-
* E2E Test Orchestrator
|
|
1390
|
-
*
|
|
1391
|
-
* Runs the full A2A E2E test suite:
|
|
1392
|
-
* 1. Sets up two isolated agent environments
|
|
1393
|
-
* 2. Runs onboarding on both
|
|
1394
|
-
* 3. Exchanges invites
|
|
1395
|
-
* 4. Tests cross-agent calls
|
|
1396
|
-
* 5. Tests error cases
|
|
1397
|
-
* 6. Generates report
|
|
1398
|
-
*
|
|
1399
|
-
* Usage:
|
|
1400
|
-
* node test/e2e/orchestrate.js [--json] [--verbose]
|
|
1401
|
-
*
|
|
1402
|
-
* Exit codes:
|
|
1403
|
-
* 0 = all tests passed
|
|
1404
|
-
* 1 = one or more failures
|
|
1405
|
-
*/
|
|
1406
|
-
|
|
1407
|
-
const { TwoServerHarness } = require('./two-server');
|
|
1408
|
-
const { E2EReport } = require('./report');
|
|
1409
|
-
const http = require('http');
|
|
1410
|
-
|
|
1411
|
-
const verbose = process.argv.includes('--verbose');
|
|
1412
|
-
const jsonOutput = process.argv.includes('--json');
|
|
1413
|
-
|
|
1414
|
-
function log(msg) {
|
|
1415
|
-
if (verbose) console.log(` ${msg}`);
|
|
1416
|
-
}
|
|
1417
|
-
|
|
1418
|
-
function httpPost(url, body, headers = {}) {
|
|
1419
|
-
const urlObj = new URL(url);
|
|
1420
|
-
return new Promise((resolve, reject) => {
|
|
1421
|
-
const data = JSON.stringify(body);
|
|
1422
|
-
const req = http.request({
|
|
1423
|
-
hostname: urlObj.hostname,
|
|
1424
|
-
port: urlObj.port,
|
|
1425
|
-
path: urlObj.pathname,
|
|
1426
|
-
method: 'POST',
|
|
1427
|
-
headers: {
|
|
1428
|
-
'Content-Type': 'application/json',
|
|
1429
|
-
'Content-Length': Buffer.byteLength(data),
|
|
1430
|
-
...headers
|
|
1431
|
-
}
|
|
1432
|
-
}, (res) => {
|
|
1433
|
-
let responseData = '';
|
|
1434
|
-
res.on('data', chunk => responseData += chunk);
|
|
1435
|
-
res.on('end', () => {
|
|
1436
|
-
let parsed;
|
|
1437
|
-
try { parsed = JSON.parse(responseData); } catch { parsed = responseData; }
|
|
1438
|
-
resolve({ statusCode: res.statusCode, headers: res.headers, body: parsed });
|
|
1439
|
-
});
|
|
1440
|
-
});
|
|
1441
|
-
req.on('error', reject);
|
|
1442
|
-
req.write(data);
|
|
1443
|
-
req.end();
|
|
1444
|
-
});
|
|
1445
|
-
}
|
|
1446
|
-
|
|
1447
|
-
function httpGet(url) {
|
|
1448
|
-
return new Promise((resolve, reject) => {
|
|
1449
|
-
http.get(url, (res) => {
|
|
1450
|
-
let data = '';
|
|
1451
|
-
res.on('data', chunk => data += chunk);
|
|
1452
|
-
res.on('end', () => {
|
|
1453
|
-
try { resolve({ statusCode: res.statusCode, body: JSON.parse(data) }); }
|
|
1454
|
-
catch { resolve({ statusCode: res.statusCode, body: data }); }
|
|
1455
|
-
});
|
|
1456
|
-
}).on('error', reject);
|
|
1457
|
-
});
|
|
1458
|
-
}
|
|
1459
|
-
|
|
1460
|
-
async function main() {
|
|
1461
|
-
let version;
|
|
1462
|
-
try { version = require('../../package.json').version; } catch { version = 'unknown'; }
|
|
1463
|
-
|
|
1464
|
-
const report = new E2EReport({ version });
|
|
1465
|
-
let harness = null;
|
|
1466
|
-
|
|
1467
|
-
try {
|
|
1468
|
-
// ── Step 1: Environment Setup ──
|
|
1469
|
-
console.log('Step 1: Setting up two isolated agents...');
|
|
1470
|
-
harness = new TwoServerHarness();
|
|
1471
|
-
await harness.setup();
|
|
1472
|
-
log(`Agent A on port ${harness.agentA.port}`);
|
|
1473
|
-
log(`Agent B on port ${harness.agentB.port}`);
|
|
1474
|
-
report.pass(1, 'Environment Setup', `Ports: ${harness.agentA.port}, ${harness.agentB.port}`);
|
|
1475
|
-
|
|
1476
|
-
// ── Step 2: Server Health ──
|
|
1477
|
-
console.log('Step 2: Checking server health...');
|
|
1478
|
-
const pingA = await httpGet(`http://127.0.0.1:${harness.agentA.port}/api/a2a/ping`);
|
|
1479
|
-
const pingB = await httpGet(`http://127.0.0.1:${harness.agentB.port}/api/a2a/ping`);
|
|
1480
|
-
|
|
1481
|
-
if (pingA.body.pong && pingB.body.pong) {
|
|
1482
|
-
report.pass(2, 'Server Health', 'Both agents respond to ping');
|
|
1483
|
-
} else {
|
|
1484
|
-
report.fail(2, 'Server Health', 'Ping failed', {
|
|
1485
|
-
expected: 'pong: true from both agents',
|
|
1486
|
-
actual: `A: ${JSON.stringify(pingA.body)}, B: ${JSON.stringify(pingB.body)}`,
|
|
1487
|
-
severity: 'critical'
|
|
1488
|
-
});
|
|
1489
|
-
}
|
|
1490
|
-
|
|
1491
|
-
// ── Step 3: Token Creation ──
|
|
1492
|
-
console.log('Step 3: Creating tokens...');
|
|
1493
|
-
const tokenA = harness.agentA.tokenStore.create({
|
|
1494
|
-
name: 'E2E-ForAgentB',
|
|
1495
|
-
permissions: 'public',
|
|
1496
|
-
expires: '1h',
|
|
1497
|
-
maxCalls: 10,
|
|
1498
|
-
allowedTopics: ['testing']
|
|
1499
|
-
});
|
|
1500
|
-
|
|
1501
|
-
if (tokenA.token && tokenA.token.startsWith('fed_')) {
|
|
1502
|
-
report.pass(3, 'Token Creation', `Token: ${tokenA.token.slice(0, 12)}...`);
|
|
1503
|
-
} else {
|
|
1504
|
-
report.fail(3, 'Token Creation', 'Token format invalid', {
|
|
1505
|
-
expected: 'fed_... format',
|
|
1506
|
-
actual: String(tokenA.token),
|
|
1507
|
-
severity: 'critical'
|
|
1508
|
-
});
|
|
1509
|
-
}
|
|
1510
|
-
|
|
1511
|
-
// ── Step 4: Contact Exchange ──
|
|
1512
|
-
console.log('Step 4: Exchanging invites...');
|
|
1513
|
-
const inviteUrl = `${harness.agentA.inviteBase}/${tokenA.token}`;
|
|
1514
|
-
const addResult = harness.agentB.tokenStore.addContact(inviteUrl, { name: 'AgentA' });
|
|
1515
|
-
const contacts = harness.agentB.tokenStore.listContacts();
|
|
1516
|
-
|
|
1517
|
-
if (addResult.success && contacts.length === 1) {
|
|
1518
|
-
report.pass(4, 'Contact Exchange', `Agent B added Agent A as contact`);
|
|
1519
|
-
} else {
|
|
1520
|
-
report.fail(4, 'Contact Exchange', 'Failed to add contact', {
|
|
1521
|
-
expected: 'Contact added successfully',
|
|
1522
|
-
actual: `success: ${addResult.success}, contacts: ${contacts.length}`,
|
|
1523
|
-
severity: 'high'
|
|
1524
|
-
});
|
|
1525
|
-
}
|
|
1526
|
-
|
|
1527
|
-
// ── Step 5: Inbound Call ──
|
|
1528
|
-
console.log('Step 5: Testing inbound call (B → A)...');
|
|
1529
|
-
const callRes = await httpPost(
|
|
1530
|
-
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
1531
|
-
{
|
|
1532
|
-
message: 'Hello from E2E Agent B',
|
|
1533
|
-
caller: { name: 'AgentB', owner: 'E2E Orchestrator' }
|
|
1534
|
-
},
|
|
1535
|
-
{ Authorization: `Bearer ${tokenA.token}` }
|
|
1536
|
-
);
|
|
1537
|
-
|
|
1538
|
-
if (callRes.statusCode === 200 && callRes.body.success && callRes.body.conversation_id) {
|
|
1539
|
-
report.pass(5, 'Inbound Call', `Conv: ${callRes.body.conversation_id}`);
|
|
1540
|
-
} else {
|
|
1541
|
-
report.fail(5, 'Inbound Call', `Status ${callRes.statusCode}`, {
|
|
1542
|
-
expected: '200 with success: true and conversation_id',
|
|
1543
|
-
actual: JSON.stringify(callRes.body).slice(0, 200),
|
|
1544
|
-
severity: 'critical'
|
|
1545
|
-
});
|
|
1546
|
-
}
|
|
1547
|
-
|
|
1548
|
-
// ── Step 6: Multi-Turn ──
|
|
1549
|
-
console.log('Step 6: Testing multi-turn conversation...');
|
|
1550
|
-
if (callRes.body.conversation_id) {
|
|
1551
|
-
const followUp = await httpPost(
|
|
1552
|
-
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
1553
|
-
{
|
|
1554
|
-
message: 'Follow-up from Agent B',
|
|
1555
|
-
conversation_id: callRes.body.conversation_id,
|
|
1556
|
-
caller: { name: 'AgentB' }
|
|
1557
|
-
},
|
|
1558
|
-
{ Authorization: `Bearer ${tokenA.token}` }
|
|
1559
|
-
);
|
|
1560
|
-
|
|
1561
|
-
if (followUp.statusCode === 200 && followUp.body.conversation_id === callRes.body.conversation_id) {
|
|
1562
|
-
report.pass(6, 'Multi-Turn', `Same conv ID, tokens remaining: ${followUp.body.tokens_remaining}`);
|
|
1563
|
-
} else {
|
|
1564
|
-
report.fail(6, 'Multi-Turn', 'Conversation ID mismatch or failure', {
|
|
1565
|
-
expected: `conv_id: ${callRes.body.conversation_id}`,
|
|
1566
|
-
actual: `conv_id: ${followUp.body.conversation_id}, status: ${followUp.statusCode}`,
|
|
1567
|
-
severity: 'high'
|
|
1568
|
-
});
|
|
1569
|
-
}
|
|
1570
|
-
} else {
|
|
1571
|
-
report.fail(6, 'Multi-Turn', 'Skipped — no conversation_id from step 5', { severity: 'high' });
|
|
1572
|
-
}
|
|
1573
|
-
|
|
1574
|
-
// ── Step 7: Error Cases ──
|
|
1575
|
-
console.log('Step 7: Testing error cases...');
|
|
1576
|
-
let errorsPassed = 0;
|
|
1577
|
-
const errorTotal = 3;
|
|
1578
|
-
|
|
1579
|
-
// 7a: No auth
|
|
1580
|
-
const noAuth = await httpPost(
|
|
1581
|
-
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
1582
|
-
{ message: 'No auth' }
|
|
1583
|
-
);
|
|
1584
|
-
if (noAuth.statusCode === 401 && noAuth.body.error === 'missing_token') errorsPassed++;
|
|
1585
|
-
|
|
1586
|
-
// 7b: Bad token
|
|
1587
|
-
const badToken = await httpPost(
|
|
1588
|
-
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
1589
|
-
{ message: 'Bad token' },
|
|
1590
|
-
{ Authorization: 'Bearer fed_totally_invalid' }
|
|
1591
|
-
);
|
|
1592
|
-
if (badToken.statusCode === 401 && badToken.body.error === 'unauthorized') errorsPassed++;
|
|
1593
|
-
|
|
1594
|
-
// 7c: Missing message
|
|
1595
|
-
const noMsg = await httpPost(
|
|
1596
|
-
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
1597
|
-
{},
|
|
1598
|
-
{ Authorization: `Bearer ${tokenA.token}` }
|
|
1599
|
-
);
|
|
1600
|
-
if (noMsg.statusCode === 400 && noMsg.body.error === 'missing_message') errorsPassed++;
|
|
1601
|
-
|
|
1602
|
-
if (errorsPassed === errorTotal) {
|
|
1603
|
-
report.pass(7, 'Error Cases', `All ${errorTotal} error cases correct`);
|
|
1604
|
-
} else {
|
|
1605
|
-
report.fail(7, 'Error Cases', `${errorsPassed}/${errorTotal} passed`, {
|
|
1606
|
-
expected: `All ${errorTotal} error cases return correct status/error`,
|
|
1607
|
-
actual: `noAuth: ${noAuth.statusCode}/${noAuth.body.error}, badToken: ${badToken.statusCode}/${badToken.body.error}, noMsg: ${noMsg.statusCode}/${noMsg.body.error}`,
|
|
1608
|
-
severity: 'high'
|
|
1609
|
-
});
|
|
1610
|
-
}
|
|
1611
|
-
|
|
1612
|
-
// ── Step 8: Token Revocation ──
|
|
1613
|
-
console.log('Step 8: Testing token revocation...');
|
|
1614
|
-
harness.agentA.tokenStore.revoke(tokenA.record.id);
|
|
1615
|
-
const revokedCall = await httpPost(
|
|
1616
|
-
`http://${harness.agentA.hostname}/api/a2a/invoke`,
|
|
1617
|
-
{ message: 'After revoke', caller: { name: 'AgentB' } },
|
|
1618
|
-
{ Authorization: `Bearer ${tokenA.token}` }
|
|
1619
|
-
);
|
|
1620
|
-
|
|
1621
|
-
if (revokedCall.statusCode === 401) {
|
|
1622
|
-
report.pass(8, 'Token Revocation', 'Revoked token correctly rejected');
|
|
1623
|
-
} else {
|
|
1624
|
-
report.fail(8, 'Token Revocation', `Got ${revokedCall.statusCode} instead of 401`, {
|
|
1625
|
-
expected: '401 unauthorized',
|
|
1626
|
-
actual: `${revokedCall.statusCode}: ${JSON.stringify(revokedCall.body)}`,
|
|
1627
|
-
severity: 'critical',
|
|
1628
|
-
reproduction: `Revoke token then POST /invoke with same token`
|
|
1629
|
-
});
|
|
1630
|
-
}
|
|
1631
|
-
|
|
1632
|
-
} catch (err) {
|
|
1633
|
-
report.fail(0, 'Orchestrator Error', err.message, {
|
|
1634
|
-
expected: 'No uncaught errors',
|
|
1635
|
-
actual: err.stack,
|
|
1636
|
-
severity: 'critical'
|
|
1637
|
-
});
|
|
1638
|
-
} finally {
|
|
1639
|
-
if (harness) await harness.teardown();
|
|
1640
|
-
}
|
|
1641
|
-
|
|
1642
|
-
// ── Output ──
|
|
1643
|
-
if (jsonOutput) {
|
|
1644
|
-
console.log(JSON.stringify({
|
|
1645
|
-
meta: report.meta,
|
|
1646
|
-
passed: report.passed,
|
|
1647
|
-
failed: report.failed,
|
|
1648
|
-
results: report.results,
|
|
1649
|
-
issues: report.issues,
|
|
1650
|
-
linearIssues: report.toLinearIssues()
|
|
1651
|
-
}, null, 2));
|
|
1652
|
-
} else {
|
|
1653
|
-
console.log('');
|
|
1654
|
-
console.log(report.toMarkdown());
|
|
1655
|
-
}
|
|
1656
|
-
|
|
1657
|
-
process.exit(report.failed > 0 ? 1 : 0);
|
|
1658
|
-
}
|
|
1659
|
-
|
|
1660
|
-
main();
|
|
1661
|
-
```
|
|
1662
|
-
|
|
1663
|
-
**Step 2: Run the orchestrator**
|
|
1664
|
-
|
|
1665
|
-
Run: `node test/e2e/orchestrate.js --verbose`
|
|
1666
|
-
Expected: All 8 steps PASS, exit code 0
|
|
1667
|
-
|
|
1668
|
-
**Step 3: Commit**
|
|
1669
|
-
|
|
1670
|
-
```bash
|
|
1671
|
-
git add test/e2e/orchestrate.js
|
|
1672
|
-
git commit -m "feat(e2e): add orchestrator — runs full E2E suite and generates report"
|
|
1673
|
-
```
|
|
1674
|
-
|
|
1675
|
-
---
|
|
1676
|
-
|
|
1677
|
-
## Phase 4: Integration with Test Runner
|
|
1678
|
-
|
|
1679
|
-
### Task 8: Register E2E tests with the existing test runner
|
|
1680
|
-
|
|
1681
|
-
**Files:**
|
|
1682
|
-
- Modify: `test/run.js` — add `--e2e` flag support
|
|
1683
|
-
- Create: `test/e2e/index.test.js` — wrapper that runs E2E tests via the standard runner
|
|
1684
|
-
|
|
1685
|
-
The existing test runner at `test/run.js` supports `--unit` and `--integration` flags. We add `--e2e` for the new tests.
|
|
1686
|
-
|
|
1687
|
-
**Step 1: Check current test runner structure**
|
|
1688
|
-
|
|
1689
|
-
Read `test/run.js` to understand how it discovers and runs test files. The runner globs `test/unit/*.test.js` and `test/integration/*.test.js`. We need it to also glob `test/e2e/*.test.js` when `--e2e` is passed (or when no filter is specified and `--all` is used).
|
|
1690
|
-
|
|
1691
|
-
**Step 2: Create E2E index wrapper**
|
|
1692
|
-
|
|
1693
|
-
```javascript
|
|
1694
|
-
// test/e2e/index.test.js
|
|
1695
|
-
/**
|
|
1696
|
-
* E2E Test Suite
|
|
1697
|
-
*
|
|
1698
|
-
* These tests require ephemeral ports and take longer than unit/integration tests.
|
|
1699
|
-
* Run with: node test/run.js --e2e
|
|
1700
|
-
* Or: node test/run.js --filter "E2E"
|
|
1701
|
-
*/
|
|
1702
|
-
module.exports = function (test, assert, helpers) {
|
|
1703
|
-
// Re-export individual E2E test files
|
|
1704
|
-
require('./env.test.js')(test, assert, helpers);
|
|
1705
|
-
require('./cli-runner.test.js')(test, assert, helpers);
|
|
1706
|
-
require('./two-server.test.js')(test, assert, helpers);
|
|
1707
|
-
require('./full-flow.test.js')(test, assert, helpers);
|
|
1708
|
-
require('./report.test.js')(test, assert, helpers);
|
|
1709
|
-
};
|
|
1710
|
-
```
|
|
1711
|
-
|
|
1712
|
-
**Step 3: Modify test runner to support `--e2e` flag**
|
|
1713
|
-
|
|
1714
|
-
In `test/run.js`, locate where test files are discovered and add:
|
|
1715
|
-
- When `--e2e` is passed: only run `test/e2e/*.test.js`
|
|
1716
|
-
- When `--all` or no category flag: include E2E tests
|
|
1717
|
-
- Default behavior (no flags): run unit + integration (NOT e2e, since they're slower)
|
|
1718
|
-
|
|
1719
|
-
**Step 4: Run all E2E tests**
|
|
1720
|
-
|
|
1721
|
-
Run: `node test/run.js --e2e --verbose`
|
|
1722
|
-
Expected: All E2E tests pass
|
|
1723
|
-
|
|
1724
|
-
**Step 5: Run full suite to verify no regressions**
|
|
1725
|
-
|
|
1726
|
-
Run: `npm test`
|
|
1727
|
-
Expected: All unit + integration tests still pass (E2E excluded by default)
|
|
1728
|
-
|
|
1729
|
-
**Step 6: Commit**
|
|
1730
|
-
|
|
1731
|
-
```bash
|
|
1732
|
-
git add test/e2e/index.test.js test/run.js
|
|
1733
|
-
git commit -m "feat(e2e): register E2E tests with test runner under --e2e flag"
|
|
1734
|
-
```
|
|
1735
|
-
|
|
1736
|
-
---
|
|
1737
|
-
|
|
1738
|
-
## Phase 5: Documentation
|
|
1739
|
-
|
|
1740
|
-
### Task 9: Add E2E section to protocol docs
|
|
1741
|
-
|
|
1742
|
-
**Files:**
|
|
1743
|
-
- Modify: `docs/protocol.md` — add "E2E Testing" section
|
|
1744
|
-
|
|
1745
|
-
**Step 1: Add E2E testing documentation section**
|
|
1746
|
-
|
|
1747
|
-
At the end of `docs/protocol.md`, add:
|
|
1748
|
-
|
|
1749
|
-
```markdown
|
|
1750
|
-
## E2E Testing
|
|
1751
|
-
|
|
1752
|
-
### Running the E2E Suite
|
|
1753
|
-
|
|
1754
|
-
```bash
|
|
1755
|
-
# Run E2E tests via test runner
|
|
1756
|
-
node test/run.js --e2e
|
|
1757
|
-
|
|
1758
|
-
# Run the orchestrator directly (verbose output)
|
|
1759
|
-
node test/e2e/orchestrate.js --verbose
|
|
1760
|
-
|
|
1761
|
-
# Get JSON report (for automated processing)
|
|
1762
|
-
node test/e2e/orchestrate.js --json
|
|
1763
|
-
```
|
|
1764
|
-
|
|
1765
|
-
### AI Agent Testing
|
|
1766
|
-
|
|
1767
|
-
The E2E prompt sequence at `docs/prompts/e2e-test-agent.md` provides step-by-step
|
|
1768
|
-
instructions for a Claude subagent to test a fresh a2acalling installation.
|
|
1769
|
-
|
|
1770
|
-
**Orchestrator workflow:**
|
|
1771
|
-
1. Spawn subagent with the prompt from `docs/prompts/e2e-test-agent.md`
|
|
1772
|
-
2. Subagent follows the 9-step sequence
|
|
1773
|
-
3. Subagent produces a markdown report
|
|
1774
|
-
4. Orchestrator reviews failures and creates Linear issues
|
|
1775
|
-
|
|
1776
|
-
### Architecture
|
|
1777
|
-
|
|
1778
|
-
The E2E system uses:
|
|
1779
|
-
- `test/e2e/env.js` — Isolated temp directories and port allocation
|
|
1780
|
-
- `test/e2e/cli-runner.js` — Structured CLI command execution
|
|
1781
|
-
- `test/e2e/two-server.js` — Two independent Express servers on ephemeral ports
|
|
1782
|
-
- `test/e2e/full-flow.test.js` — Cross-agent call tests
|
|
1783
|
-
- `test/e2e/report.js` — Markdown and Linear issue generation
|
|
1784
|
-
- `test/e2e/orchestrate.js` — Standalone orchestrator script
|
|
1785
|
-
```
|
|
1786
|
-
|
|
1787
|
-
**Step 2: Commit**
|
|
1788
|
-
|
|
1789
|
-
```bash
|
|
1790
|
-
git add docs/protocol.md docs/prompts/e2e-test-agent.md
|
|
1791
|
-
git commit -m "docs: add E2E testing section and agent prompt sequence"
|
|
1792
|
-
```
|
|
1793
|
-
|
|
1794
|
-
---
|
|
1795
|
-
|
|
1796
|
-
## Summary
|
|
1797
|
-
|
|
1798
|
-
| Task | Phase | What It Builds |
|
|
1799
|
-
|------|-------|----------------|
|
|
1800
|
-
| 1 | Environment | Isolated temp dirs + port allocation |
|
|
1801
|
-
| 2 | CLI Runner | Structured CLI command wrapper |
|
|
1802
|
-
| 3 | Two-Server | Dual Express servers for cross-agent testing |
|
|
1803
|
-
| 4 | Full Flow Tests | 5 E2E tests: invite, bidirectional, revoke, expire, max-calls |
|
|
1804
|
-
| 5 | Agent Prompt | 9-step prompt sequence for AI subagent testing |
|
|
1805
|
-
| 6 | Report Generator | Markdown output + Linear issue formatting |
|
|
1806
|
-
| 7 | Orchestrator | Standalone script: `node test/e2e/orchestrate.js` |
|
|
1807
|
-
| 8 | Test Runner Integration | `--e2e` flag in existing runner |
|
|
1808
|
-
| 9 | Documentation | Protocol docs + testing guide |
|
|
1809
|
-
|
|
1810
|
-
**Total new files:** 9
|
|
1811
|
-
**Total modified files:** 2 (`test/run.js`, `docs/protocol.md`)
|
|
1812
|
-
**Estimated commits:** 9
|