@plaited/acp-harness 0.2.5 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +120 -16
  3. package/bin/cli.ts +105 -636
  4. package/bin/tests/cli.spec.ts +218 -51
  5. package/package.json +20 -4
  6. package/src/acp-client.ts +5 -4
  7. package/src/acp-transport.ts +14 -7
  8. package/src/adapter-check.ts +542 -0
  9. package/src/adapter-scaffold.ts +934 -0
  10. package/src/balance.ts +232 -0
  11. package/src/calibrate.ts +300 -0
  12. package/src/capture.ts +457 -0
  13. package/src/constants.ts +94 -0
  14. package/src/grader-loader.ts +174 -0
  15. package/src/harness.ts +35 -0
  16. package/src/schemas-cli.ts +239 -0
  17. package/src/schemas.ts +567 -0
  18. package/src/summarize.ts +245 -0
  19. package/src/tests/adapter-check.spec.ts +70 -0
  20. package/src/tests/adapter-scaffold.spec.ts +112 -0
  21. package/src/tests/fixtures/grader-bad-module.ts +5 -0
  22. package/src/tests/fixtures/grader-exec-fail.py +9 -0
  23. package/src/tests/fixtures/grader-exec-invalid.py +6 -0
  24. package/src/tests/fixtures/grader-exec.py +29 -0
  25. package/src/tests/fixtures/grader-module.ts +14 -0
  26. package/src/tests/grader-loader.spec.ts +153 -0
  27. package/src/trials.ts +395 -0
  28. package/src/validate-refs.ts +188 -0
  29. package/.claude/rules/accuracy.md +0 -43
  30. package/.claude/rules/bun-apis.md +0 -80
  31. package/.claude/rules/code-review.md +0 -254
  32. package/.claude/rules/git-workflow.md +0 -37
  33. package/.claude/rules/github.md +0 -154
  34. package/.claude/rules/testing.md +0 -172
  35. package/.claude/skills/acp-harness/SKILL.md +0 -310
  36. package/.claude/skills/acp-harness/assets/Dockerfile.acp +0 -25
  37. package/.claude/skills/acp-harness/assets/docker-compose.acp.yml +0 -19
  38. package/.claude/skills/acp-harness/references/downstream.md +0 -288
  39. package/.claude/skills/acp-harness/references/output-formats.md +0 -221
  40. package/.claude-plugin/marketplace.json +0 -15
  41. package/.claude-plugin/plugin.json +0 -16
  42. package/.github/CODEOWNERS +0 -6
  43. package/.github/workflows/ci.yml +0 -63
  44. package/.github/workflows/publish.yml +0 -146
  45. package/.mcp.json +0 -20
  46. package/CLAUDE.md +0 -92
  47. package/Dockerfile.test +0 -23
  48. package/biome.json +0 -96
  49. package/bun.lock +0 -513
  50. package/docker-compose.test.yml +0 -21
  51. package/scripts/bun-test-wrapper.sh +0 -46
  52. package/src/acp.constants.ts +0 -56
  53. package/src/acp.schemas.ts +0 -161
  54. package/src/acp.types.ts +0 -28
  55. package/src/tests/fixtures/.claude/settings.local.json +0 -8
  56. package/src/tests/fixtures/.claude/skills/greeting/SKILL.md +0 -17
  57. package/tsconfig.json +0 -32
@@ -0,0 +1,542 @@
1
+ /**
2
+ * ACP adapter compliance checker.
3
+ *
4
+ * @remarks
5
+ * Validates that an adapter correctly implements the Agent Client Protocol
6
+ * by running a series of checks:
7
+ *
8
+ * 1. spawn - Adapter can be launched as subprocess
9
+ * 2. initialize - Responds with valid agentCapabilities
10
+ * 3. session/new - Creates session and returns sessionId
11
+ * 4. session/prompt - Accepts prompt and emits session/update notifications
12
+ * 5. session/cancel - Accepts cancel notification gracefully
13
+ * 6. framing - All messages are newline-delimited JSON-RPC 2.0
14
+ *
15
+ * @packageDocumentation
16
+ */
17
+
18
+ import { parseArgs } from 'node:util'
19
+ import { createACPTransport } from './acp-transport.ts'
20
+ import { ACP_METHODS, ACP_PROTOCOL_VERSION, DEFAULT_ACP_CLIENT_NAME } from './constants.ts'
21
+
22
+ // ============================================================================
23
+ // Types
24
+ // ============================================================================
25
+
26
+ /** Configuration for compliance check */
27
+ export type CheckConfig = {
28
+ /** Command to spawn adapter (e.g., ['bun', './src/index.ts']) */
29
+ command: string[]
30
+ /** Timeout for each check in milliseconds */
31
+ timeout: number
32
+ /** Show detailed protocol messages */
33
+ verbose: boolean
34
+ }
35
+
36
+ /** Result of a single check */
37
+ export type CheckResult = {
38
+ /** Check name */
39
+ name: string
40
+ /** Whether the check passed */
41
+ passed: boolean
42
+ /** Human-readable message */
43
+ message: string
44
+ /** Additional details (for verbose output) */
45
+ details?: string
46
+ }
47
+
48
+ /** Result of full compliance check */
49
+ export type ComplianceResult = {
50
+ /** Whether all checks passed */
51
+ passed: boolean
52
+ /** Individual check results */
53
+ checks: CheckResult[]
54
+ /** Summary statistics */
55
+ summary: {
56
+ total: number
57
+ passed: number
58
+ failed: number
59
+ }
60
+ }
61
+
62
+ // ============================================================================
63
+ // Check Implementations
64
+ // ============================================================================
65
+
66
+ /**
67
+ * Check: spawn
68
+ * Verify adapter can be launched as a subprocess.
69
+ */
70
+ const checkSpawn = async (config: CheckConfig): Promise<CheckResult> => {
71
+ const { command, timeout, verbose } = config
72
+
73
+ try {
74
+ const transport = createACPTransport({
75
+ command,
76
+ timeout,
77
+ onNotification: () => {},
78
+ onRequest: async () => ({}),
79
+ onError: () => {},
80
+ onClose: () => {},
81
+ })
82
+
83
+ await transport.start()
84
+ await transport.close(false) // Don't send shutdown, just close
85
+
86
+ return {
87
+ name: 'spawn',
88
+ passed: true,
89
+ message: 'Adapter launched successfully',
90
+ details: verbose ? `Command: ${command.join(' ')}` : undefined,
91
+ }
92
+ } catch (error) {
93
+ return {
94
+ name: 'spawn',
95
+ passed: false,
96
+ message: `Failed to spawn adapter: ${error instanceof Error ? error.message : String(error)}`,
97
+ }
98
+ }
99
+ }
100
+
101
+ /**
102
+ * Check: initialize
103
+ * Verify adapter responds to initialize with valid agentCapabilities.
104
+ */
105
+ const checkInitialize = async (
106
+ config: CheckConfig,
107
+ ): Promise<{ result: CheckResult; transport?: ReturnType<typeof createACPTransport>; capabilities?: unknown }> => {
108
+ const { command, timeout, verbose } = config
109
+
110
+ try {
111
+ const transport = createACPTransport({
112
+ command,
113
+ timeout,
114
+ onNotification: () => {},
115
+ onRequest: async () => ({}),
116
+ onError: () => {},
117
+ onClose: () => {},
118
+ })
119
+
120
+ await transport.start()
121
+
122
+ const initResponse = await transport.request<{
123
+ protocolVersion: number
124
+ agentInfo?: { name: string; version: string }
125
+ agentCapabilities?: Record<string, unknown>
126
+ }>(ACP_METHODS.INITIALIZE, {
127
+ protocolVersion: ACP_PROTOCOL_VERSION,
128
+ clientInfo: { name: DEFAULT_ACP_CLIENT_NAME, version: '1.0.0' },
129
+ clientCapabilities: {},
130
+ })
131
+
132
+ if (!initResponse || initResponse.protocolVersion !== ACP_PROTOCOL_VERSION) {
133
+ await transport.close(false)
134
+ return {
135
+ result: {
136
+ name: 'initialize',
137
+ passed: false,
138
+ message: `Invalid protocol version: expected ${ACP_PROTOCOL_VERSION}, got ${initResponse?.protocolVersion}`,
139
+ },
140
+ }
141
+ }
142
+
143
+ const capabilities = initResponse.agentCapabilities ?? {}
144
+ const capList = Object.entries(capabilities)
145
+ .filter(([, v]) => v)
146
+ .map(([k, v]) => {
147
+ if (typeof v === 'object' && v !== null) {
148
+ const nested = Object.entries(v as Record<string, unknown>)
149
+ .filter(([, nv]) => nv)
150
+ .map(([nk]) => nk)
151
+ return nested.length > 0 ? `${k}.${nested.join(', ')}` : k
152
+ }
153
+ return k
154
+ })
155
+
156
+ return {
157
+ result: {
158
+ name: 'initialize',
159
+ passed: true,
160
+ message: `Protocol version ${initResponse.protocolVersion}${capList.length > 0 ? `, capabilities: ${capList.join(', ')}` : ''}`,
161
+ details: verbose ? JSON.stringify(initResponse, null, 2) : undefined,
162
+ },
163
+ transport,
164
+ capabilities,
165
+ }
166
+ } catch (error) {
167
+ return {
168
+ result: {
169
+ name: 'initialize',
170
+ passed: false,
171
+ message: `Initialize failed: ${error instanceof Error ? error.message : String(error)}`,
172
+ },
173
+ }
174
+ }
175
+ }
176
+
177
+ /**
178
+ * Check: session/new
179
+ * Verify adapter creates session and returns sessionId.
180
+ */
181
+ const checkSessionNew = async (
182
+ transport: ReturnType<typeof createACPTransport>,
183
+ verbose: boolean,
184
+ ): Promise<{ result: CheckResult; sessionId?: string }> => {
185
+ try {
186
+ const response = await transport.request<{ sessionId: string }>(ACP_METHODS.CREATE_SESSION, {
187
+ cwd: process.cwd(),
188
+ mcpServers: [],
189
+ })
190
+
191
+ if (!response || !response.sessionId) {
192
+ return {
193
+ result: {
194
+ name: 'session/new',
195
+ passed: false,
196
+ message: 'No sessionId in response',
197
+ },
198
+ }
199
+ }
200
+
201
+ return {
202
+ result: {
203
+ name: 'session/new',
204
+ passed: true,
205
+ message: `Session ${response.sessionId} created`,
206
+ details: verbose ? JSON.stringify(response, null, 2) : undefined,
207
+ },
208
+ sessionId: response.sessionId,
209
+ }
210
+ } catch (error) {
211
+ return {
212
+ result: {
213
+ name: 'session/new',
214
+ passed: false,
215
+ message: `session/new failed: ${error instanceof Error ? error.message : String(error)}`,
216
+ },
217
+ }
218
+ }
219
+ }
220
+
221
+ /**
222
+ * Check: session/prompt
223
+ * Verify adapter accepts prompt and emits session/update notifications.
224
+ */
225
+ const checkSessionPrompt = async (config: CheckConfig, sessionId: string): Promise<CheckResult> => {
226
+ const { command, timeout, verbose } = config
227
+ const updates: unknown[] = []
228
+
229
+ // Create a new transport with update collection
230
+ const transport = createACPTransport({
231
+ command,
232
+ timeout,
233
+ onNotification: (method: string, params: unknown) => {
234
+ if (method === ACP_METHODS.UPDATE) {
235
+ updates.push(params)
236
+ }
237
+ },
238
+ onRequest: async () => ({}),
239
+ onError: () => {},
240
+ onClose: () => {},
241
+ })
242
+
243
+ try {
244
+ await transport.start()
245
+
246
+ // Re-initialize for new connection
247
+ await transport.request(ACP_METHODS.INITIALIZE, {
248
+ protocolVersion: ACP_PROTOCOL_VERSION,
249
+ clientInfo: { name: DEFAULT_ACP_CLIENT_NAME, version: '1.0.0' },
250
+ clientCapabilities: {},
251
+ })
252
+
253
+ const response = await transport.request<{ content: unknown[] }>(ACP_METHODS.PROMPT, {
254
+ sessionId,
255
+ prompt: [{ type: 'text', text: 'Hello, this is a test prompt.' }],
256
+ })
257
+
258
+ await transport.close(false)
259
+
260
+ if (!response || !response.content) {
261
+ return {
262
+ name: 'session/prompt',
263
+ passed: false,
264
+ message: 'No content in response',
265
+ }
266
+ }
267
+
268
+ // Categorize updates
269
+ const updateTypes = updates.map((u) => {
270
+ const update = u as { update?: { sessionUpdate?: string } }
271
+ return update?.update?.sessionUpdate ?? 'unknown'
272
+ })
273
+
274
+ const uniqueTypes = [...new Set(updateTypes)]
275
+ const typeDisplay = uniqueTypes.length > 0 ? uniqueTypes.join(', ') : 'none'
276
+
277
+ return {
278
+ name: 'session/prompt',
279
+ passed: true,
280
+ message: `Received ${updates.length} update${updates.length !== 1 ? 's' : ''} (${typeDisplay})`,
281
+ details: verbose ? JSON.stringify({ updates, response }, null, 2) : undefined,
282
+ }
283
+ } catch (error) {
284
+ await transport.close(false).catch(() => {})
285
+
286
+ return {
287
+ name: 'session/prompt',
288
+ passed: false,
289
+ message: `session/prompt failed: ${error instanceof Error ? error.message : String(error)}`,
290
+ }
291
+ }
292
+ }
293
+
294
+ /**
295
+ * Check: session/cancel
296
+ * Verify adapter accepts cancel notification gracefully.
297
+ */
298
+ const checkSessionCancel = async (config: CheckConfig, sessionId: string): Promise<CheckResult> => {
299
+ const { command, timeout, verbose } = config
300
+
301
+ const transport = createACPTransport({
302
+ command,
303
+ timeout,
304
+ onNotification: () => {},
305
+ onRequest: async () => ({}),
306
+ onError: () => {},
307
+ onClose: () => {},
308
+ })
309
+
310
+ try {
311
+ await transport.start()
312
+
313
+ // Re-initialize for new connection
314
+ await transport.request(ACP_METHODS.INITIALIZE, {
315
+ protocolVersion: ACP_PROTOCOL_VERSION,
316
+ clientInfo: { name: DEFAULT_ACP_CLIENT_NAME, version: '1.0.0' },
317
+ clientCapabilities: {},
318
+ })
319
+
320
+ await transport.notify(ACP_METHODS.CANCEL, { sessionId })
321
+
322
+ // Give adapter a moment to process the notification
323
+ await new Promise((resolve) => setTimeout(resolve, 100))
324
+
325
+ await transport.close(false)
326
+
327
+ return {
328
+ name: 'session/cancel',
329
+ passed: true,
330
+ message: 'Acknowledged without error',
331
+ details: verbose ? `Sent cancel for session ${sessionId}` : undefined,
332
+ }
333
+ } catch (error) {
334
+ await transport.close(false).catch(() => {})
335
+
336
+ return {
337
+ name: 'session/cancel',
338
+ passed: false,
339
+ message: `session/cancel failed: ${error instanceof Error ? error.message : String(error)}`,
340
+ }
341
+ }
342
+ }
343
+
344
+ /**
345
+ * Check: framing
346
+ * Verify all messages are valid JSON-RPC 2.0.
347
+ * This is implicitly tested by the other checks succeeding.
348
+ */
349
+ const checkFraming = (previousChecks: CheckResult[]): CheckResult => {
350
+ // If all previous checks passed, framing is valid
351
+ const allPassed = previousChecks.every((c) => c.passed)
352
+
353
+ if (allPassed) {
354
+ return {
355
+ name: 'framing',
356
+ passed: true,
357
+ message: 'All messages valid JSON-RPC 2.0',
358
+ }
359
+ }
360
+
361
+ return {
362
+ name: 'framing',
363
+ passed: false,
364
+ message: 'Some messages failed validation (see above)',
365
+ }
366
+ }
367
+
368
+ // ============================================================================
369
+ // Main Check Runner
370
+ // ============================================================================
371
+
372
+ /**
373
+ * Run full compliance check against an adapter.
374
+ *
375
+ * @param config - Check configuration
376
+ * @returns Compliance result with all check details
377
+ */
378
+ export const runCheck = async (config: CheckConfig): Promise<ComplianceResult> => {
379
+ const checks: CheckResult[] = []
380
+
381
+ // Check 1: spawn
382
+ const spawnResult = await checkSpawn(config)
383
+ checks.push(spawnResult)
384
+
385
+ if (!spawnResult.passed) {
386
+ // Can't continue if spawn fails
387
+ return {
388
+ passed: false,
389
+ checks,
390
+ summary: { total: 6, passed: 0, failed: 1 },
391
+ }
392
+ }
393
+
394
+ // Check 2: initialize
395
+ const { result: initResult, transport, capabilities: _ } = await checkInitialize(config)
396
+ checks.push(initResult)
397
+
398
+ if (!initResult.passed || !transport) {
399
+ return {
400
+ passed: false,
401
+ checks,
402
+ summary: { total: 6, passed: 1, failed: 1 },
403
+ }
404
+ }
405
+
406
+ // Check 3: session/new
407
+ const { result: sessionResult, sessionId } = await checkSessionNew(transport, config.verbose)
408
+ checks.push(sessionResult)
409
+
410
+ if (!sessionResult.passed || !sessionId) {
411
+ await transport.close(false)
412
+ return {
413
+ passed: false,
414
+ checks,
415
+ summary: { total: 6, passed: 2, failed: 1 },
416
+ }
417
+ }
418
+
419
+ // Clean up init transport - we'll create fresh ones for remaining checks
420
+ await transport.close(true)
421
+
422
+ // Check 4: session/prompt (uses fresh transport)
423
+ const promptResult = await checkSessionPrompt(config, sessionId)
424
+ checks.push(promptResult)
425
+
426
+ // Check 5: session/cancel (uses fresh transport)
427
+ const cancelResult = await checkSessionCancel(config, sessionId)
428
+ checks.push(cancelResult)
429
+
430
+ // Check 6: framing (based on previous results)
431
+ const framingResult = checkFraming(checks)
432
+ checks.push(framingResult)
433
+
434
+ const passed = checks.filter((c) => c.passed).length
435
+ const failed = checks.filter((c) => !c.passed).length
436
+
437
+ return {
438
+ passed: failed === 0,
439
+ checks,
440
+ summary: {
441
+ total: checks.length,
442
+ passed,
443
+ failed,
444
+ },
445
+ }
446
+ }
447
+
448
+ // ============================================================================
449
+ // CLI Entry Point
450
+ // ============================================================================
451
+
452
+ /**
453
+ * Adapter check command CLI handler.
454
+ *
455
+ * @param args - Command line arguments (after 'adapter:check')
456
+ */
457
+ export const adapterCheck = async (args: string[]): Promise<void> => {
458
+ const { values, positionals } = parseArgs({
459
+ args,
460
+ options: {
461
+ timeout: { type: 'string', default: '5000' },
462
+ verbose: { type: 'boolean', default: false },
463
+ help: { type: 'boolean', short: 'h' },
464
+ },
465
+ allowPositionals: true,
466
+ })
467
+
468
+ if (values.help) {
469
+ // biome-ignore lint/suspicious/noConsole: CLI help output
470
+ console.log(`
471
+ Usage: acp-harness adapter:check <command> [args...]
472
+
473
+ Arguments:
474
+ command [args] Command to spawn the adapter
475
+
476
+ Options:
477
+ --timeout Timeout for each check in ms (default: 5000)
478
+ --verbose Show detailed protocol messages
479
+ -h, --help Show this help message
480
+
481
+ Checks Performed:
482
+ spawn Adapter can be launched as subprocess
483
+ initialize Responds with valid agentCapabilities
484
+ session/new Creates session and returns sessionId
485
+ session/prompt Accepts prompt and emits updates
486
+ session/cancel Accepts cancel notification gracefully
487
+ framing All messages are valid JSON-RPC 2.0
488
+
489
+ Examples:
490
+ # Check local TypeScript adapter
491
+ acp-harness adapter:check bun ./my-adapter/src/index.ts
492
+
493
+ # Check with verbose output
494
+ acp-harness adapter:check bunx my-adapter --verbose
495
+
496
+ # Check Python adapter
497
+ acp-harness adapter:check python ./adapter.py
498
+ `)
499
+ return
500
+ }
501
+
502
+ if (positionals.length === 0) {
503
+ console.error('Error: adapter command is required')
504
+ console.error('Example: acp-harness adapter:check bun ./src/index.ts')
505
+ process.exit(1)
506
+ }
507
+
508
+ const command = positionals
509
+
510
+ // biome-ignore lint/suspicious/noConsole: CLI output
511
+ console.log(`Checking ACP compliance for: ${command.join(' ')}\n`)
512
+
513
+ const result = await runCheck({
514
+ command,
515
+ timeout: Number.parseInt(values.timeout ?? '5000', 10),
516
+ verbose: values.verbose ?? false,
517
+ })
518
+
519
+ // Print results
520
+ for (const check of result.checks) {
521
+ const icon = check.passed ? '\u2713' : '\u2717'
522
+ const color = check.passed ? '\x1b[32m' : '\x1b[31m'
523
+ const reset = '\x1b[0m'
524
+
525
+ // biome-ignore lint/suspicious/noConsole: CLI output
526
+ console.log(`${color}${icon}${reset} ${check.name}: ${check.message}`)
527
+
528
+ if (check.details && values.verbose) {
529
+ // biome-ignore lint/suspicious/noConsole: CLI verbose output
530
+ console.log(` ${check.details.split('\n').join('\n ')}`)
531
+ }
532
+ }
533
+
534
+ // biome-ignore lint/suspicious/noConsole: CLI output
535
+ console.log(
536
+ `\n${result.summary.passed}/${result.summary.total} checks passed.${result.passed ? ' Adapter is ACP-compliant.' : ''}`,
537
+ )
538
+
539
+ if (!result.passed) {
540
+ process.exit(1)
541
+ }
542
+ }