@plaited/acp-harness 0.2.6 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +120 -16
  3. package/bin/cli.ts +105 -636
  4. package/bin/tests/cli.spec.ts +218 -51
  5. package/package.json +20 -4
  6. package/src/acp-client.ts +5 -4
  7. package/src/acp-transport.ts +14 -7
  8. package/src/adapter-check.ts +542 -0
  9. package/src/adapter-scaffold.ts +934 -0
  10. package/src/balance.ts +232 -0
  11. package/src/calibrate.ts +300 -0
  12. package/src/capture.ts +457 -0
  13. package/src/constants.ts +94 -0
  14. package/src/grader-loader.ts +174 -0
  15. package/src/harness.ts +35 -0
  16. package/src/schemas-cli.ts +239 -0
  17. package/src/schemas.ts +567 -0
  18. package/src/summarize.ts +245 -0
  19. package/src/tests/adapter-check.spec.ts +70 -0
  20. package/src/tests/adapter-scaffold.spec.ts +112 -0
  21. package/src/tests/fixtures/grader-bad-module.ts +5 -0
  22. package/src/tests/fixtures/grader-exec-fail.py +9 -0
  23. package/src/tests/fixtures/grader-exec-invalid.py +6 -0
  24. package/src/tests/fixtures/grader-exec.py +29 -0
  25. package/src/tests/fixtures/grader-module.ts +14 -0
  26. package/src/tests/grader-loader.spec.ts +153 -0
  27. package/src/trials.ts +395 -0
  28. package/src/validate-refs.ts +188 -0
  29. package/.claude/rules/accuracy.md +0 -43
  30. package/.claude/rules/bun-apis.md +0 -80
  31. package/.claude/rules/code-review.md +0 -254
  32. package/.claude/rules/git-workflow.md +0 -37
  33. package/.claude/rules/github.md +0 -154
  34. package/.claude/rules/testing.md +0 -172
  35. package/.claude/skills/acp-harness/SKILL.md +0 -310
  36. package/.claude/skills/acp-harness/assets/Dockerfile.acp +0 -25
  37. package/.claude/skills/acp-harness/assets/docker-compose.acp.yml +0 -19
  38. package/.claude/skills/acp-harness/references/downstream.md +0 -288
  39. package/.claude/skills/acp-harness/references/output-formats.md +0 -221
  40. package/.claude-plugin/marketplace.json +0 -15
  41. package/.claude-plugin/plugin.json +0 -16
  42. package/.github/CODEOWNERS +0 -6
  43. package/.github/workflows/ci.yml +0 -63
  44. package/.github/workflows/publish.yml +0 -146
  45. package/.mcp.json +0 -20
  46. package/CLAUDE.md +0 -92
  47. package/Dockerfile.test +0 -23
  48. package/biome.json +0 -96
  49. package/bun.lock +0 -513
  50. package/docker-compose.test.yml +0 -21
  51. package/scripts/bun-test-wrapper.sh +0 -46
  52. package/src/acp.constants.ts +0 -56
  53. package/src/acp.schemas.ts +0 -161
  54. package/src/acp.types.ts +0 -28
  55. package/src/tests/fixtures/.claude/settings.local.json +0 -8
  56. package/src/tests/fixtures/.claude/skills/greeting/SKILL.md +0 -17
  57. package/tsconfig.json +0 -32
@@ -26,10 +26,11 @@ describe('CLI invocation', () => {
26
26
  const exitCode = await proc.exited
27
27
 
28
28
  expect(exitCode).toBe(0)
29
- expect(stdout).toContain('Usage: acp-harness')
30
- expect(stdout).toContain('--cmd, --command')
31
- expect(stdout).toContain('--output')
32
- expect(stdout).toContain('--format')
29
+ expect(stdout).toContain('acp-harness')
30
+ expect(stdout).toContain('Commands:')
31
+ expect(stdout).toContain('capture')
32
+ expect(stdout).toContain('trials')
33
+ expect(stdout).toContain('summarize')
33
34
  })
34
35
 
35
36
  test('shows help with -h flag', async () => {
@@ -41,7 +42,7 @@ describe('CLI invocation', () => {
41
42
  const exitCode = await proc.exited
42
43
 
43
44
  expect(exitCode).toBe(0)
44
- expect(stdout).toContain('Usage: acp-harness')
45
+ expect(stdout).toContain('acp-harness')
45
46
  })
46
47
 
47
48
  test('shows help when no arguments provided', async () => {
@@ -52,8 +53,8 @@ describe('CLI invocation', () => {
52
53
  const stdout = await new Response(proc.stdout).text()
53
54
  const exitCode = await proc.exited
54
55
 
55
- expect(exitCode).toBe(1) // Exits with error when no args
56
- expect(stdout).toContain('Usage: acp-harness')
56
+ expect(exitCode).toBe(0) // Exits cleanly when showing help
57
+ expect(stdout).toContain('acp-harness')
57
58
  })
58
59
 
59
60
  test('help shows example commands', async () => {
@@ -64,23 +65,27 @@ describe('CLI invocation', () => {
64
65
  const stdout = await new Response(proc.stdout).text()
65
66
 
66
67
  expect(stdout).toContain('bunx claude-code-acp')
67
- expect(stdout).toContain('bun ./my-adapter.ts')
68
- expect(stdout).toContain('--format judge')
68
+ expect(stdout).toContain('prompts.jsonl')
69
+ expect(stdout).toContain('results.jsonl')
69
70
  })
70
71
 
71
- test('help shows both --cmd and --command flags', async () => {
72
+ test('help shows available commands', async () => {
72
73
  const proc = Bun.spawn(['bun', CLI_PATH, '--help'], {
73
74
  stdout: 'pipe',
74
75
  stderr: 'pipe',
75
76
  })
76
77
  const stdout = await new Response(proc.stdout).text()
77
78
 
78
- expect(stdout).toContain('--cmd')
79
- expect(stdout).toContain('--command')
79
+ expect(stdout).toContain('capture')
80
+ expect(stdout).toContain('trials')
81
+ expect(stdout).toContain('summarize')
82
+ expect(stdout).toContain('calibrate')
83
+ expect(stdout).toContain('balance')
84
+ expect(stdout).toContain('schemas')
80
85
  })
81
86
 
82
87
  test('fails with non-existent prompts file', async () => {
83
- const proc = Bun.spawn(['bun', CLI_PATH, 'nonexistent.jsonl'], {
88
+ const proc = Bun.spawn(['bun', CLI_PATH, 'capture', 'nonexistent.jsonl', 'bunx', 'claude-code-acp'], {
84
89
  stdout: 'pipe',
85
90
  stderr: 'pipe',
86
91
  })
@@ -88,7 +93,62 @@ describe('CLI invocation', () => {
88
93
  const exitCode = await proc.exited
89
94
 
90
95
  expect(exitCode).not.toBe(0)
91
- expect(stderr).toContain('Error')
96
+ expect(stderr).toContain('no such file or directory')
97
+ })
98
+
99
+ test('fails when no agent command provided', async () => {
100
+ const tmpFile = `/tmp/test-prompts-${Date.now()}.jsonl`
101
+ await Bun.write(tmpFile, '{"id":"test-001","input":"test"}\n')
102
+
103
+ const proc = Bun.spawn(['bun', CLI_PATH, 'capture', tmpFile], {
104
+ stdout: 'pipe',
105
+ stderr: 'pipe',
106
+ })
107
+ const stderr = await new Response(proc.stderr).text()
108
+ const exitCode = await proc.exited
109
+
110
+ expect(exitCode).toBe(1)
111
+ expect(stderr).toContain('ACP agent command is required')
112
+ })
113
+
114
+ test('fails with unknown command', async () => {
115
+ const proc = Bun.spawn(['bun', CLI_PATH, 'unknown-command'], {
116
+ stdout: 'pipe',
117
+ stderr: 'pipe',
118
+ })
119
+ const stderr = await new Response(proc.stderr).text()
120
+ const exitCode = await proc.exited
121
+
122
+ expect(exitCode).toBe(1)
123
+ expect(stderr).toContain('Unknown command')
124
+ })
125
+
126
+ test('capture command shows help with --help', async () => {
127
+ const proc = Bun.spawn(['bun', CLI_PATH, 'capture', '--help'], {
128
+ stdout: 'pipe',
129
+ stderr: 'pipe',
130
+ })
131
+ const stdout = await new Response(proc.stdout).text()
132
+ const exitCode = await proc.exited
133
+
134
+ expect(exitCode).toBe(0)
135
+ expect(stdout).toContain('capture')
136
+ expect(stdout).toContain('prompts.jsonl')
137
+ expect(stdout).toContain('--output')
138
+ })
139
+
140
+ test('trials command shows help with --help', async () => {
141
+ const proc = Bun.spawn(['bun', CLI_PATH, 'trials', '--help'], {
142
+ stdout: 'pipe',
143
+ stderr: 'pipe',
144
+ })
145
+ const stdout = await new Response(proc.stdout).text()
146
+ const exitCode = await proc.exited
147
+
148
+ expect(exitCode).toBe(0)
149
+ expect(stdout).toContain('trials')
150
+ expect(stdout).toContain('-k')
151
+ expect(stdout).toContain('pass@k')
92
152
  })
93
153
  })
94
154
 
@@ -101,7 +161,6 @@ const SummaryResultSchema = z.object({
101
161
  input: z.string(),
102
162
  output: z.string(),
103
163
  toolCalls: z.array(z.string()),
104
- status: z.enum(['passed', 'failed', 'error', 'timeout']),
105
164
  duration: z.number(),
106
165
  })
107
166
 
@@ -110,13 +169,13 @@ const TrajectoryStepSchema = z.discriminatedUnion('type', [
110
169
  type: z.literal('thought'),
111
170
  content: z.string(),
112
171
  timestamp: z.number(),
113
- stepId: z.string(),
172
+ stepId: z.string().optional(),
114
173
  }),
115
174
  z.object({
116
175
  type: z.literal('message'),
117
176
  content: z.string(),
118
177
  timestamp: z.number(),
119
- stepId: z.string(),
178
+ stepId: z.string().optional(),
120
179
  }),
121
180
  z.object({
122
181
  type: z.literal('tool_call'),
@@ -126,22 +185,17 @@ const TrajectoryStepSchema = z.discriminatedUnion('type', [
126
185
  output: z.unknown().optional(),
127
186
  duration: z.number().optional(),
128
187
  timestamp: z.number(),
129
- stepId: z.string(),
188
+ stepId: z.string().optional(),
130
189
  }),
131
190
  z.object({
132
191
  type: z.literal('plan'),
133
- entries: z.array(
134
- z.object({
135
- content: z.string(),
136
- status: z.string(),
137
- }),
138
- ),
192
+ entries: z.array(z.unknown()),
139
193
  timestamp: z.number(),
140
- stepId: z.string(),
194
+ stepId: z.string().optional(),
141
195
  }),
142
196
  ])
143
197
 
144
- const FullResultSchema = z.object({
198
+ const CaptureResultSchema = z.object({
145
199
  id: z.string(),
146
200
  input: z.string(),
147
201
  output: z.string(),
@@ -153,7 +207,7 @@ const FullResultSchema = z.object({
153
207
  end: z.number(),
154
208
  firstResponse: z.number().optional(),
155
209
  }),
156
- status: z.enum(['passed', 'failed', 'error', 'timeout']),
210
+ toolErrors: z.boolean(),
157
211
  errors: z.array(z.string()).optional(),
158
212
  })
159
213
 
@@ -161,12 +215,12 @@ const FullResultSchema = z.object({
161
215
  // Sample Output Data (matches harness output format)
162
216
  // ============================================================================
163
217
 
164
- const SAMPLE_SUMMARY_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","toolCalls":["Write"],"status":"passed","duration":1234}
165
- {"id":"test-002","input":"Fix the bug","output":"I fixed the bug","toolCalls":["Read","Edit"],"status":"passed","duration":2567}
166
- {"id":"test-003","input":"Broken test","output":"","toolCalls":[],"status":"failed","duration":500}`
218
+ const SAMPLE_SUMMARY_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","toolCalls":["Write"],"duration":1234}
219
+ {"id":"test-002","input":"Fix the bug","output":"I fixed the bug","toolCalls":["Read","Edit"],"duration":2567}
220
+ {"id":"test-003","input":"Broken test","output":"","toolCalls":[],"duration":500}`
167
221
 
168
- const SAMPLE_FULL_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","trajectory":[{"type":"thought","content":"I'll create a button template","timestamp":100,"stepId":"test-001-step-1"},{"type":"tool_call","name":"Write","status":"completed","input":{"file_path":"src/button.tsx","content":"export const Button = () => <button>Click</button>"},"output":"File written","duration":234,"timestamp":150,"stepId":"test-001-step-2"},{"type":"message","content":"I created the button","timestamp":500,"stepId":"test-001-step-3"}],"metadata":{"category":"ui","agent":"claude-code-acp"},"timing":{"start":1704067200000,"end":1704067201234,"firstResponse":100},"status":"passed"}
169
- {"id":"test-002","input":"Fix the bug","output":"I fixed the bug","trajectory":[{"type":"tool_call","name":"Read","status":"completed","input":{"file_path":"src/app.ts"},"output":"file contents...","duration":100,"timestamp":50,"stepId":"test-002-step-1"},{"type":"tool_call","name":"Edit","status":"completed","input":{"file_path":"src/app.ts","old_string":"bug","new_string":"fix"},"duration":150,"timestamp":200,"stepId":"test-002-step-2"},{"type":"message","content":"I fixed the bug","timestamp":400,"stepId":"test-002-step-3"}],"metadata":{"category":"bugfix","agent":"claude-code-acp"},"timing":{"start":1704067300000,"end":1704067302567},"status":"passed"}`
222
+ const SAMPLE_CAPTURE_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","trajectory":[{"type":"thought","content":"I'll create a button template","timestamp":100,"stepId":"test-001-step-1"},{"type":"tool_call","name":"Write","status":"completed","input":{"file_path":"src/button.tsx","content":"export const Button = () => <button>Click</button>"},"output":"File written","duration":234,"timestamp":150,"stepId":"test-001-step-2"},{"type":"message","content":"I created the button","timestamp":500,"stepId":"test-001-step-3"}],"metadata":{"category":"ui","agent":"claude-code-acp"},"timing":{"start":1704067200000,"end":1704067201234,"firstResponse":100},"toolErrors":false}
223
+ {"id":"test-002","input":"Fix the bug","output":"I fixed the bug","trajectory":[{"type":"tool_call","name":"Read","status":"completed","input":{"file_path":"src/app.ts"},"output":"file contents...","duration":100,"timestamp":50,"stepId":"test-002-step-1"},{"type":"tool_call","name":"Edit","status":"completed","input":{"file_path":"src/app.ts","old_string":"bug","new_string":"fix"},"duration":150,"timestamp":200,"stepId":"test-002-step-2"},{"type":"message","content":"I fixed the bug","timestamp":400,"stepId":"test-002-step-3"}],"metadata":{"category":"bugfix","agent":"claude-code-acp"},"timing":{"start":1704067300000,"end":1704067302567},"toolErrors":false}`
170
224
 
171
225
  // ============================================================================
172
226
  // Downstream Pattern Tests
@@ -188,12 +242,11 @@ describe('downstream patterns: summary JSONL', () => {
188
242
  }
189
243
  })
190
244
 
191
- test('filters by status (jq pattern)', () => {
245
+ test('filters by output presence (jq pattern)', () => {
192
246
  const results = parseResults(SAMPLE_SUMMARY_JSONL)
193
- const failed = results.filter((r) => r.status === 'failed')
247
+ const withOutput = results.filter((r) => r.output.length > 0)
194
248
 
195
- expect(failed).toHaveLength(1)
196
- expect(failed[0]?.id).toBe('test-003')
249
+ expect(withOutput).toHaveLength(2)
197
250
  })
198
251
 
199
252
  test('calculates average duration (jq pattern)', () => {
@@ -214,35 +267,35 @@ describe('downstream patterns: summary JSONL', () => {
214
267
  expect(toolCounts).toEqual({ Write: 1, Read: 1, Edit: 1 })
215
268
  })
216
269
 
217
- test('calculates pass rate (jq pattern)', () => {
270
+ test('calculates success rate by output presence', () => {
218
271
  const results = parseResults(SAMPLE_SUMMARY_JSONL)
219
- const passed = results.filter((r) => r.status === 'passed').length
272
+ const withOutput = results.filter((r) => r.output.length > 0).length
220
273
  const total = results.length
221
274
 
222
- expect(passed).toBe(2)
275
+ expect(withOutput).toBe(2)
223
276
  expect(total).toBe(3)
224
- expect(passed / total).toBeCloseTo(0.667, 2)
277
+ expect(withOutput / total).toBeCloseTo(0.667, 2)
225
278
  })
226
279
  })
227
280
 
228
- describe('downstream patterns: full JSONL', () => {
281
+ describe('downstream patterns: capture JSONL', () => {
229
282
  const parseResults = (jsonl: string) =>
230
283
  jsonl
231
284
  .trim()
232
285
  .split('\n')
233
286
  .map((line) => JSON.parse(line))
234
287
 
235
- test('parses full JSONL with trajectories', () => {
236
- const results = parseResults(SAMPLE_FULL_JSONL)
288
+ test('parses capture JSONL with trajectories', () => {
289
+ const results = parseResults(SAMPLE_CAPTURE_JSONL)
237
290
 
238
291
  expect(results).toHaveLength(2)
239
292
  for (const result of results) {
240
- expect(() => FullResultSchema.parse(result)).not.toThrow()
293
+ expect(() => CaptureResultSchema.parse(result)).not.toThrow()
241
294
  }
242
295
  })
243
296
 
244
297
  test('step IDs follow expected format', () => {
245
- const results = parseResults(SAMPLE_FULL_JSONL)
298
+ const results = parseResults(SAMPLE_CAPTURE_JSONL)
246
299
 
247
300
  for (const result of results) {
248
301
  for (const step of result.trajectory) {
@@ -252,7 +305,7 @@ describe('downstream patterns: full JSONL', () => {
252
305
  })
253
306
 
254
307
  test('step-level retrieval pattern works', () => {
255
- const results = parseResults(SAMPLE_FULL_JSONL)
308
+ const results = parseResults(SAMPLE_CAPTURE_JSONL)
256
309
 
257
310
  // Build step index (pattern from downstream.md)
258
311
  const stepIndex = new Map<string, unknown>()
@@ -270,7 +323,7 @@ describe('downstream patterns: full JSONL', () => {
270
323
  })
271
324
 
272
325
  test('extracts tool calls from trajectory', () => {
273
- const results = parseResults(SAMPLE_FULL_JSONL)
326
+ const results = parseResults(SAMPLE_CAPTURE_JSONL)
274
327
  const result = results[1] // test-002
275
328
 
276
329
  const toolCalls = result.trajectory.filter((s: { type: string }) => s.type === 'tool_call')
@@ -279,12 +332,19 @@ describe('downstream patterns: full JSONL', () => {
279
332
  })
280
333
 
281
334
  test('filters by metadata category', () => {
282
- const results = parseResults(SAMPLE_FULL_JSONL)
335
+ const results = parseResults(SAMPLE_CAPTURE_JSONL)
283
336
  const uiResults = results.filter((r) => r.metadata.category === 'ui')
284
337
 
285
338
  expect(uiResults).toHaveLength(1)
286
339
  expect(uiResults[0]?.id).toBe('test-001')
287
340
  })
341
+
342
+ test('identifies results with tool errors', () => {
343
+ const results = parseResults(SAMPLE_CAPTURE_JSONL)
344
+ const withErrors = results.filter((r) => r.toolErrors)
345
+
346
+ expect(withErrors).toHaveLength(0) // Sample data has no errors
347
+ })
288
348
  })
289
349
 
290
350
  describe('downstream patterns: advanced filtering', () => {
@@ -321,7 +381,7 @@ describe('downstream patterns: advanced filtering', () => {
321
381
 
322
382
  test('deduplicates by ID keeping latest (merge pattern)', () => {
323
383
  const combinedJsonl = `${SAMPLE_SUMMARY_JSONL}
324
- {"id":"test-001","input":"Create a button v2","output":"I created the button v2","toolCalls":["Write","Edit"],"status":"passed","duration":1500}`
384
+ {"id":"test-001","input":"Create a button v2","output":"I created the button v2","toolCalls":["Write","Edit"],"duration":1500}`
325
385
 
326
386
  const results = parseResults(combinedJsonl)
327
387
 
@@ -338,7 +398,7 @@ describe('downstream patterns: advanced filtering', () => {
338
398
  })
339
399
 
340
400
  test('groups by category and counts', () => {
341
- const results = parseResults(SAMPLE_FULL_JSONL)
401
+ const results = parseResults(SAMPLE_CAPTURE_JSONL)
342
402
 
343
403
  // Group by category (simulates jq group_by pattern)
344
404
  const grouped = results.reduce<Record<string, number>>((acc, r) => {
@@ -351,7 +411,7 @@ describe('downstream patterns: advanced filtering', () => {
351
411
  })
352
412
 
353
413
  test('extracts timing information', () => {
354
- const results = parseResults(SAMPLE_FULL_JSONL)
414
+ const results = parseResults(SAMPLE_CAPTURE_JSONL)
355
415
  const result = results[0]
356
416
 
357
417
  expect(result.timing.start).toBe(1704067200000)
@@ -360,3 +420,110 @@ describe('downstream patterns: advanced filtering', () => {
360
420
  expect(result.timing.end - result.timing.start).toBe(1234) // matches duration
361
421
  })
362
422
  })
423
+
424
+ // ============================================================================
425
+ // MCP Server Config Parsing Tests
426
+ // ============================================================================
427
+
428
+ describe('MCP server config parsing', () => {
429
+ test('parses stdio MCP server config', () => {
430
+ const json = '{"type":"stdio","name":"fs","command":"mcp-filesystem","args":["/data"],"env":[]}'
431
+ const proc = Bun.spawn(
432
+ ['bun', CLI_PATH, 'capture', '/tmp/test.jsonl', 'bunx', 'claude-code-acp', '--mcp-server', json, '--help'],
433
+ {
434
+ stdout: 'pipe',
435
+ stderr: 'pipe',
436
+ },
437
+ )
438
+
439
+ // If it doesn't crash, the parsing worked
440
+ expect(proc.exited).resolves.toBeDefined()
441
+ })
442
+
443
+ test('parses http MCP server config', () => {
444
+ const json =
445
+ '{"type":"http","name":"api","url":"https://example.com/mcp","headers":[{"name":"Authorization","value":"Bearer token"}]}'
446
+ const proc = Bun.spawn(
447
+ ['bun', CLI_PATH, 'capture', '/tmp/test.jsonl', 'bunx', 'claude-code-acp', '--mcp-server', json, '--help'],
448
+ {
449
+ stdout: 'pipe',
450
+ stderr: 'pipe',
451
+ },
452
+ )
453
+
454
+ // If it doesn't crash, the parsing worked
455
+ expect(proc.exited).resolves.toBeDefined()
456
+ })
457
+
458
+ test('accepts multiple MCP servers', () => {
459
+ const json1 = '{"type":"stdio","name":"fs","command":"mcp-filesystem","args":[],"env":[]}'
460
+ const json2 = '{"type":"http","name":"api","url":"https://example.com","headers":[]}'
461
+ const proc = Bun.spawn(
462
+ [
463
+ 'bun',
464
+ CLI_PATH,
465
+ 'capture',
466
+ '/tmp/test.jsonl',
467
+ 'bunx',
468
+ 'claude-code-acp',
469
+ '--mcp-server',
470
+ json1,
471
+ '--mcp-server',
472
+ json2,
473
+ '--help',
474
+ ],
475
+ {
476
+ stdout: 'pipe',
477
+ stderr: 'pipe',
478
+ },
479
+ )
480
+
481
+ // If it doesn't crash, the parsing worked
482
+ expect(proc.exited).resolves.toBeDefined()
483
+ })
484
+ })
485
+
486
+ // ============================================================================
487
+ // Error Handling Tests
488
+ // ============================================================================
489
+
490
+ describe('error handling', () => {
491
+ test('fails with invalid JSONL format', async () => {
492
+ const tmpFile = `/tmp/invalid-${Date.now()}.jsonl`
493
+ await Bun.write(tmpFile, '{invalid json}\n')
494
+
495
+ const proc = Bun.spawn(['bun', CLI_PATH, 'capture', tmpFile, 'bunx', 'claude-code-acp'], {
496
+ stdout: 'pipe',
497
+ stderr: 'pipe',
498
+ })
499
+ const stderr = await new Response(proc.stderr).text()
500
+ const exitCode = await proc.exited
501
+
502
+ expect(exitCode).not.toBe(0)
503
+ expect(stderr).toContain('Invalid prompt at line 1')
504
+ })
505
+
506
+ test('capture command requires prompts path', async () => {
507
+ const proc = Bun.spawn(['bun', CLI_PATH, 'capture'], {
508
+ stdout: 'pipe',
509
+ stderr: 'pipe',
510
+ })
511
+ const stderr = await new Response(proc.stderr).text()
512
+ const exitCode = await proc.exited
513
+
514
+ expect(exitCode).toBe(1)
515
+ expect(stderr).toContain('prompts.jsonl path is required')
516
+ })
517
+
518
+ test('summarize command requires input path', async () => {
519
+ const proc = Bun.spawn(['bun', CLI_PATH, 'summarize'], {
520
+ stdout: 'pipe',
521
+ stderr: 'pipe',
522
+ })
523
+ const stderr = await new Response(proc.stderr).text()
524
+ const exitCode = await proc.exited
525
+
526
+ expect(exitCode).toBe(1)
527
+ expect(stderr).toContain('results.jsonl path is required')
528
+ })
529
+ })
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@plaited/acp-harness",
3
- "version": "0.2.6",
3
+ "version": "0.3.1",
4
4
  "description": "CLI tool for capturing agent trajectories from ACP-compatible agents",
5
5
  "license": "ISC",
6
6
  "engines": {
@@ -18,6 +18,21 @@
18
18
  "acp-harness": "./bin/cli.ts"
19
19
  },
20
20
  "type": "module",
21
+ "exports": {
22
+ ".": "./src/acp.ts",
23
+ "./schemas": "./src/schemas.ts",
24
+ "./harness": "./src/harness.ts"
25
+ },
26
+ "files": [
27
+ "./src/**",
28
+ "./bin/**",
29
+ "!./src/**/tests/*",
30
+ "!./src/**/*.spec.ts",
31
+ "!./src/**/*.docker.ts",
32
+ "!./bin/**/tests/*",
33
+ "!./bin/**/*.spec.ts",
34
+ "!./bin/**/*.docker.ts"
35
+ ],
21
36
  "publishConfig": {
22
37
  "access": "public"
23
38
  },
@@ -40,11 +55,12 @@
40
55
  ]
41
56
  },
42
57
  "dependencies": {
43
- "@agentclientprotocol/sdk": "^0.13.0",
44
- "zod": "^4.3.5"
58
+ "zod": "^4.3.5",
59
+ "@plaited/development-skills": "0.4.1"
45
60
  },
46
61
  "peerDependencies": {
47
- "bun": ">=1.2.9"
62
+ "typescript-language-server": "^5.1.3",
63
+ "@agentclientprotocol/sdk": "^0.13.0"
48
64
  },
49
65
  "devDependencies": {
50
66
  "@biomejs/biome": "2.3.11",
package/src/acp-client.ts CHANGED
@@ -30,10 +30,11 @@ import type {
30
30
  SessionNotification,
31
31
  } from '@agentclientprotocol/sdk'
32
32
  import { version } from '../package.json' with { type: 'json' }
33
- import { ACP_METHODS, ACP_PROTOCOL_VERSION, DEFAULT_ACP_CLIENT_NAME } from './acp.constants.ts'
34
- import { RequestPermissionRequestSchema, SessionNotificationSchema } from './acp.schemas.ts'
35
- import type { Session } from './acp.types.ts'
36
33
  import { createACPTransport } from './acp-transport.ts'
34
+ import { ACP_METHODS, ACP_PROTOCOL_VERSION, DEFAULT_ACP_CLIENT_NAME, DEFAULT_POLLING_INTERVAL } from './constants.ts'
35
+ import type { Session } from './schemas.ts'
36
+ import { RequestPermissionRequestSchema, SessionNotificationSchema } from './schemas.ts'
37
+
37
38
  // ============================================================================
38
39
  // Types
39
40
  // ============================================================================
@@ -120,7 +121,7 @@ export const createACPClient = (config: ACPClientConfig) => {
120
121
  clientInfo = { name: DEFAULT_ACP_CLIENT_NAME, version },
121
122
  capabilities = {},
122
123
  timeout = 30000,
123
- pollingInterval = 50,
124
+ pollingInterval = DEFAULT_POLLING_INTERVAL,
124
125
  onPermissionRequest,
125
126
  } = config
126
127
 
@@ -10,7 +10,7 @@
10
10
  * newline-delimited JSON messages with Zod runtime validation.
11
11
  */
12
12
 
13
- import { JSON_RPC_ERRORS } from './acp.constants.ts'
13
+ import { JSON_RPC_ERRORS } from './constants.ts'
14
14
  import type {
15
15
  JsonRpcError,
16
16
  JsonRpcErrorResponse,
@@ -19,8 +19,8 @@ import type {
19
19
  JsonRpcRequest,
20
20
  JsonRpcResponse,
21
21
  JsonRpcSuccessResponse,
22
- } from './acp.schemas.ts'
23
- import { JsonRpcMessageSchema } from './acp.schemas.ts'
22
+ } from './schemas.ts'
23
+ import { JsonRpcMessageSchema } from './schemas.ts'
24
24
 
25
25
  // ============================================================================
26
26
  // Types
@@ -71,7 +71,7 @@ type PipedSubprocess = {
71
71
  }
72
72
 
73
73
  /** Custom error for ACP transport failures */
74
- class ACPTransportError extends Error {
74
+ export class ACPTransportError extends Error {
75
75
  constructor(
76
76
  message: string,
77
77
  public readonly code?: number,
@@ -275,9 +275,12 @@ export const createACPTransport = (config: ACPTransportConfig) => {
275
275
 
276
276
  // Read stdout for JSON-RPC messages
277
277
  const readStdout = async () => {
278
+ if (!subprocess) {
279
+ throw new ACPTransportError('Subprocess not started')
280
+ }
278
281
  // Type assertion needed: Bun's ReadableStreamDefaultReader includes readMany
279
282
  // but node:stream/web reader returned by getReader() doesn't have it
280
- const reader = subprocess!.stdout.getReader() as globalThis.ReadableStreamDefaultReader<Uint8Array>
283
+ const reader = subprocess.stdout.getReader() as globalThis.ReadableStreamDefaultReader<Uint8Array>
281
284
  stdoutReader = reader
282
285
  const decoder = new TextDecoder()
283
286
 
@@ -303,9 +306,12 @@ export const createACPTransport = (config: ACPTransportConfig) => {
303
306
 
304
307
  // Read stderr for debugging
305
308
  const readStderr = async () => {
309
+ if (!subprocess) {
310
+ throw new ACPTransportError('Subprocess not started')
311
+ }
306
312
  // Type assertion needed: Bun's ReadableStreamDefaultReader includes readMany
307
313
  // but node:stream/web reader returned by getReader() doesn't have it
308
- const reader = subprocess!.stderr.getReader() as globalThis.ReadableStreamDefaultReader<Uint8Array>
314
+ const reader = subprocess.stderr.getReader() as globalThis.ReadableStreamDefaultReader<Uint8Array>
309
315
  stderrReader = reader
310
316
  const decoder = new TextDecoder()
311
317
 
@@ -326,7 +332,8 @@ export const createACPTransport = (config: ACPTransportConfig) => {
326
332
  }
327
333
  }
328
334
 
329
- // Start reading streams
335
+ // Start reading streams (fire-and-forget pattern)
336
+ // These run concurrently and clean up via optional chaining in close()
330
337
  readStdout()
331
338
  readStderr()
332
339