@plaited/acp-harness 0.2.5 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +120 -16
- package/bin/cli.ts +105 -636
- package/bin/tests/cli.spec.ts +218 -51
- package/package.json +20 -4
- package/src/acp-client.ts +5 -4
- package/src/acp-transport.ts +14 -7
- package/src/adapter-check.ts +542 -0
- package/src/adapter-scaffold.ts +934 -0
- package/src/balance.ts +232 -0
- package/src/calibrate.ts +300 -0
- package/src/capture.ts +457 -0
- package/src/constants.ts +94 -0
- package/src/grader-loader.ts +174 -0
- package/src/harness.ts +35 -0
- package/src/schemas-cli.ts +239 -0
- package/src/schemas.ts +567 -0
- package/src/summarize.ts +245 -0
- package/src/tests/adapter-check.spec.ts +70 -0
- package/src/tests/adapter-scaffold.spec.ts +112 -0
- package/src/tests/fixtures/grader-bad-module.ts +5 -0
- package/src/tests/fixtures/grader-exec-fail.py +9 -0
- package/src/tests/fixtures/grader-exec-invalid.py +6 -0
- package/src/tests/fixtures/grader-exec.py +29 -0
- package/src/tests/fixtures/grader-module.ts +14 -0
- package/src/tests/grader-loader.spec.ts +153 -0
- package/src/trials.ts +395 -0
- package/src/validate-refs.ts +188 -0
- package/.claude/rules/accuracy.md +0 -43
- package/.claude/rules/bun-apis.md +0 -80
- package/.claude/rules/code-review.md +0 -254
- package/.claude/rules/git-workflow.md +0 -37
- package/.claude/rules/github.md +0 -154
- package/.claude/rules/testing.md +0 -172
- package/.claude/skills/acp-harness/SKILL.md +0 -310
- package/.claude/skills/acp-harness/assets/Dockerfile.acp +0 -25
- package/.claude/skills/acp-harness/assets/docker-compose.acp.yml +0 -19
- package/.claude/skills/acp-harness/references/downstream.md +0 -288
- package/.claude/skills/acp-harness/references/output-formats.md +0 -221
- package/.claude-plugin/marketplace.json +0 -15
- package/.claude-plugin/plugin.json +0 -16
- package/.github/CODEOWNERS +0 -6
- package/.github/workflows/ci.yml +0 -63
- package/.github/workflows/publish.yml +0 -146
- package/.mcp.json +0 -20
- package/CLAUDE.md +0 -92
- package/Dockerfile.test +0 -23
- package/biome.json +0 -96
- package/bun.lock +0 -513
- package/docker-compose.test.yml +0 -21
- package/scripts/bun-test-wrapper.sh +0 -46
- package/src/acp.constants.ts +0 -56
- package/src/acp.schemas.ts +0 -161
- package/src/acp.types.ts +0 -28
- package/src/tests/fixtures/.claude/settings.local.json +0 -8
- package/src/tests/fixtures/.claude/skills/greeting/SKILL.md +0 -17
- package/tsconfig.json +0 -32
package/bin/tests/cli.spec.ts
CHANGED
|
@@ -26,10 +26,11 @@ describe('CLI invocation', () => {
|
|
|
26
26
|
const exitCode = await proc.exited
|
|
27
27
|
|
|
28
28
|
expect(exitCode).toBe(0)
|
|
29
|
-
expect(stdout).toContain('
|
|
30
|
-
expect(stdout).toContain('
|
|
31
|
-
expect(stdout).toContain('
|
|
32
|
-
expect(stdout).toContain('
|
|
29
|
+
expect(stdout).toContain('acp-harness')
|
|
30
|
+
expect(stdout).toContain('Commands:')
|
|
31
|
+
expect(stdout).toContain('capture')
|
|
32
|
+
expect(stdout).toContain('trials')
|
|
33
|
+
expect(stdout).toContain('summarize')
|
|
33
34
|
})
|
|
34
35
|
|
|
35
36
|
test('shows help with -h flag', async () => {
|
|
@@ -41,7 +42,7 @@ describe('CLI invocation', () => {
|
|
|
41
42
|
const exitCode = await proc.exited
|
|
42
43
|
|
|
43
44
|
expect(exitCode).toBe(0)
|
|
44
|
-
expect(stdout).toContain('
|
|
45
|
+
expect(stdout).toContain('acp-harness')
|
|
45
46
|
})
|
|
46
47
|
|
|
47
48
|
test('shows help when no arguments provided', async () => {
|
|
@@ -52,8 +53,8 @@ describe('CLI invocation', () => {
|
|
|
52
53
|
const stdout = await new Response(proc.stdout).text()
|
|
53
54
|
const exitCode = await proc.exited
|
|
54
55
|
|
|
55
|
-
expect(exitCode).toBe(
|
|
56
|
-
expect(stdout).toContain('
|
|
56
|
+
expect(exitCode).toBe(0) // Exits cleanly when showing help
|
|
57
|
+
expect(stdout).toContain('acp-harness')
|
|
57
58
|
})
|
|
58
59
|
|
|
59
60
|
test('help shows example commands', async () => {
|
|
@@ -64,23 +65,27 @@ describe('CLI invocation', () => {
|
|
|
64
65
|
const stdout = await new Response(proc.stdout).text()
|
|
65
66
|
|
|
66
67
|
expect(stdout).toContain('bunx claude-code-acp')
|
|
67
|
-
expect(stdout).toContain('
|
|
68
|
-
expect(stdout).toContain('
|
|
68
|
+
expect(stdout).toContain('prompts.jsonl')
|
|
69
|
+
expect(stdout).toContain('results.jsonl')
|
|
69
70
|
})
|
|
70
71
|
|
|
71
|
-
test('help shows
|
|
72
|
+
test('help shows available commands', async () => {
|
|
72
73
|
const proc = Bun.spawn(['bun', CLI_PATH, '--help'], {
|
|
73
74
|
stdout: 'pipe',
|
|
74
75
|
stderr: 'pipe',
|
|
75
76
|
})
|
|
76
77
|
const stdout = await new Response(proc.stdout).text()
|
|
77
78
|
|
|
78
|
-
expect(stdout).toContain('
|
|
79
|
-
expect(stdout).toContain('
|
|
79
|
+
expect(stdout).toContain('capture')
|
|
80
|
+
expect(stdout).toContain('trials')
|
|
81
|
+
expect(stdout).toContain('summarize')
|
|
82
|
+
expect(stdout).toContain('calibrate')
|
|
83
|
+
expect(stdout).toContain('balance')
|
|
84
|
+
expect(stdout).toContain('schemas')
|
|
80
85
|
})
|
|
81
86
|
|
|
82
87
|
test('fails with non-existent prompts file', async () => {
|
|
83
|
-
const proc = Bun.spawn(['bun', CLI_PATH, 'nonexistent.jsonl'], {
|
|
88
|
+
const proc = Bun.spawn(['bun', CLI_PATH, 'capture', 'nonexistent.jsonl', 'bunx', 'claude-code-acp'], {
|
|
84
89
|
stdout: 'pipe',
|
|
85
90
|
stderr: 'pipe',
|
|
86
91
|
})
|
|
@@ -88,7 +93,62 @@ describe('CLI invocation', () => {
|
|
|
88
93
|
const exitCode = await proc.exited
|
|
89
94
|
|
|
90
95
|
expect(exitCode).not.toBe(0)
|
|
91
|
-
expect(stderr).toContain('
|
|
96
|
+
expect(stderr).toContain('no such file or directory')
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
test('fails when no agent command provided', async () => {
|
|
100
|
+
const tmpFile = `/tmp/test-prompts-${Date.now()}.jsonl`
|
|
101
|
+
await Bun.write(tmpFile, '{"id":"test-001","input":"test"}\n')
|
|
102
|
+
|
|
103
|
+
const proc = Bun.spawn(['bun', CLI_PATH, 'capture', tmpFile], {
|
|
104
|
+
stdout: 'pipe',
|
|
105
|
+
stderr: 'pipe',
|
|
106
|
+
})
|
|
107
|
+
const stderr = await new Response(proc.stderr).text()
|
|
108
|
+
const exitCode = await proc.exited
|
|
109
|
+
|
|
110
|
+
expect(exitCode).toBe(1)
|
|
111
|
+
expect(stderr).toContain('ACP agent command is required')
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
test('fails with unknown command', async () => {
|
|
115
|
+
const proc = Bun.spawn(['bun', CLI_PATH, 'unknown-command'], {
|
|
116
|
+
stdout: 'pipe',
|
|
117
|
+
stderr: 'pipe',
|
|
118
|
+
})
|
|
119
|
+
const stderr = await new Response(proc.stderr).text()
|
|
120
|
+
const exitCode = await proc.exited
|
|
121
|
+
|
|
122
|
+
expect(exitCode).toBe(1)
|
|
123
|
+
expect(stderr).toContain('Unknown command')
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
test('capture command shows help with --help', async () => {
|
|
127
|
+
const proc = Bun.spawn(['bun', CLI_PATH, 'capture', '--help'], {
|
|
128
|
+
stdout: 'pipe',
|
|
129
|
+
stderr: 'pipe',
|
|
130
|
+
})
|
|
131
|
+
const stdout = await new Response(proc.stdout).text()
|
|
132
|
+
const exitCode = await proc.exited
|
|
133
|
+
|
|
134
|
+
expect(exitCode).toBe(0)
|
|
135
|
+
expect(stdout).toContain('capture')
|
|
136
|
+
expect(stdout).toContain('prompts.jsonl')
|
|
137
|
+
expect(stdout).toContain('--output')
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
test('trials command shows help with --help', async () => {
|
|
141
|
+
const proc = Bun.spawn(['bun', CLI_PATH, 'trials', '--help'], {
|
|
142
|
+
stdout: 'pipe',
|
|
143
|
+
stderr: 'pipe',
|
|
144
|
+
})
|
|
145
|
+
const stdout = await new Response(proc.stdout).text()
|
|
146
|
+
const exitCode = await proc.exited
|
|
147
|
+
|
|
148
|
+
expect(exitCode).toBe(0)
|
|
149
|
+
expect(stdout).toContain('trials')
|
|
150
|
+
expect(stdout).toContain('-k')
|
|
151
|
+
expect(stdout).toContain('pass@k')
|
|
92
152
|
})
|
|
93
153
|
})
|
|
94
154
|
|
|
@@ -101,7 +161,6 @@ const SummaryResultSchema = z.object({
|
|
|
101
161
|
input: z.string(),
|
|
102
162
|
output: z.string(),
|
|
103
163
|
toolCalls: z.array(z.string()),
|
|
104
|
-
status: z.enum(['passed', 'failed', 'error', 'timeout']),
|
|
105
164
|
duration: z.number(),
|
|
106
165
|
})
|
|
107
166
|
|
|
@@ -110,13 +169,13 @@ const TrajectoryStepSchema = z.discriminatedUnion('type', [
|
|
|
110
169
|
type: z.literal('thought'),
|
|
111
170
|
content: z.string(),
|
|
112
171
|
timestamp: z.number(),
|
|
113
|
-
stepId: z.string(),
|
|
172
|
+
stepId: z.string().optional(),
|
|
114
173
|
}),
|
|
115
174
|
z.object({
|
|
116
175
|
type: z.literal('message'),
|
|
117
176
|
content: z.string(),
|
|
118
177
|
timestamp: z.number(),
|
|
119
|
-
stepId: z.string(),
|
|
178
|
+
stepId: z.string().optional(),
|
|
120
179
|
}),
|
|
121
180
|
z.object({
|
|
122
181
|
type: z.literal('tool_call'),
|
|
@@ -126,22 +185,17 @@ const TrajectoryStepSchema = z.discriminatedUnion('type', [
|
|
|
126
185
|
output: z.unknown().optional(),
|
|
127
186
|
duration: z.number().optional(),
|
|
128
187
|
timestamp: z.number(),
|
|
129
|
-
stepId: z.string(),
|
|
188
|
+
stepId: z.string().optional(),
|
|
130
189
|
}),
|
|
131
190
|
z.object({
|
|
132
191
|
type: z.literal('plan'),
|
|
133
|
-
entries: z.array(
|
|
134
|
-
z.object({
|
|
135
|
-
content: z.string(),
|
|
136
|
-
status: z.string(),
|
|
137
|
-
}),
|
|
138
|
-
),
|
|
192
|
+
entries: z.array(z.unknown()),
|
|
139
193
|
timestamp: z.number(),
|
|
140
|
-
stepId: z.string(),
|
|
194
|
+
stepId: z.string().optional(),
|
|
141
195
|
}),
|
|
142
196
|
])
|
|
143
197
|
|
|
144
|
-
const
|
|
198
|
+
const CaptureResultSchema = z.object({
|
|
145
199
|
id: z.string(),
|
|
146
200
|
input: z.string(),
|
|
147
201
|
output: z.string(),
|
|
@@ -153,7 +207,7 @@ const FullResultSchema = z.object({
|
|
|
153
207
|
end: z.number(),
|
|
154
208
|
firstResponse: z.number().optional(),
|
|
155
209
|
}),
|
|
156
|
-
|
|
210
|
+
toolErrors: z.boolean(),
|
|
157
211
|
errors: z.array(z.string()).optional(),
|
|
158
212
|
})
|
|
159
213
|
|
|
@@ -161,12 +215,12 @@ const FullResultSchema = z.object({
|
|
|
161
215
|
// Sample Output Data (matches harness output format)
|
|
162
216
|
// ============================================================================
|
|
163
217
|
|
|
164
|
-
const SAMPLE_SUMMARY_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","toolCalls":["Write"],"
|
|
165
|
-
{"id":"test-002","input":"Fix the bug","output":"I fixed the bug","toolCalls":["Read","Edit"],"
|
|
166
|
-
{"id":"test-003","input":"Broken test","output":"","toolCalls":[],"
|
|
218
|
+
const SAMPLE_SUMMARY_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","toolCalls":["Write"],"duration":1234}
|
|
219
|
+
{"id":"test-002","input":"Fix the bug","output":"I fixed the bug","toolCalls":["Read","Edit"],"duration":2567}
|
|
220
|
+
{"id":"test-003","input":"Broken test","output":"","toolCalls":[],"duration":500}`
|
|
167
221
|
|
|
168
|
-
const
|
|
169
|
-
{"id":"test-002","input":"Fix the bug","output":"I fixed the bug","trajectory":[{"type":"tool_call","name":"Read","status":"completed","input":{"file_path":"src/app.ts"},"output":"file contents...","duration":100,"timestamp":50,"stepId":"test-002-step-1"},{"type":"tool_call","name":"Edit","status":"completed","input":{"file_path":"src/app.ts","old_string":"bug","new_string":"fix"},"duration":150,"timestamp":200,"stepId":"test-002-step-2"},{"type":"message","content":"I fixed the bug","timestamp":400,"stepId":"test-002-step-3"}],"metadata":{"category":"bugfix","agent":"claude-code-acp"},"timing":{"start":1704067300000,"end":1704067302567},"
|
|
222
|
+
const SAMPLE_CAPTURE_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","trajectory":[{"type":"thought","content":"I'll create a button template","timestamp":100,"stepId":"test-001-step-1"},{"type":"tool_call","name":"Write","status":"completed","input":{"file_path":"src/button.tsx","content":"export const Button = () => <button>Click</button>"},"output":"File written","duration":234,"timestamp":150,"stepId":"test-001-step-2"},{"type":"message","content":"I created the button","timestamp":500,"stepId":"test-001-step-3"}],"metadata":{"category":"ui","agent":"claude-code-acp"},"timing":{"start":1704067200000,"end":1704067201234,"firstResponse":100},"toolErrors":false}
|
|
223
|
+
{"id":"test-002","input":"Fix the bug","output":"I fixed the bug","trajectory":[{"type":"tool_call","name":"Read","status":"completed","input":{"file_path":"src/app.ts"},"output":"file contents...","duration":100,"timestamp":50,"stepId":"test-002-step-1"},{"type":"tool_call","name":"Edit","status":"completed","input":{"file_path":"src/app.ts","old_string":"bug","new_string":"fix"},"duration":150,"timestamp":200,"stepId":"test-002-step-2"},{"type":"message","content":"I fixed the bug","timestamp":400,"stepId":"test-002-step-3"}],"metadata":{"category":"bugfix","agent":"claude-code-acp"},"timing":{"start":1704067300000,"end":1704067302567},"toolErrors":false}`
|
|
170
224
|
|
|
171
225
|
// ============================================================================
|
|
172
226
|
// Downstream Pattern Tests
|
|
@@ -188,12 +242,11 @@ describe('downstream patterns: summary JSONL', () => {
|
|
|
188
242
|
}
|
|
189
243
|
})
|
|
190
244
|
|
|
191
|
-
test('filters by
|
|
245
|
+
test('filters by output presence (jq pattern)', () => {
|
|
192
246
|
const results = parseResults(SAMPLE_SUMMARY_JSONL)
|
|
193
|
-
const
|
|
247
|
+
const withOutput = results.filter((r) => r.output.length > 0)
|
|
194
248
|
|
|
195
|
-
expect(
|
|
196
|
-
expect(failed[0]?.id).toBe('test-003')
|
|
249
|
+
expect(withOutput).toHaveLength(2)
|
|
197
250
|
})
|
|
198
251
|
|
|
199
252
|
test('calculates average duration (jq pattern)', () => {
|
|
@@ -214,35 +267,35 @@ describe('downstream patterns: summary JSONL', () => {
|
|
|
214
267
|
expect(toolCounts).toEqual({ Write: 1, Read: 1, Edit: 1 })
|
|
215
268
|
})
|
|
216
269
|
|
|
217
|
-
test('calculates
|
|
270
|
+
test('calculates success rate by output presence', () => {
|
|
218
271
|
const results = parseResults(SAMPLE_SUMMARY_JSONL)
|
|
219
|
-
const
|
|
272
|
+
const withOutput = results.filter((r) => r.output.length > 0).length
|
|
220
273
|
const total = results.length
|
|
221
274
|
|
|
222
|
-
expect(
|
|
275
|
+
expect(withOutput).toBe(2)
|
|
223
276
|
expect(total).toBe(3)
|
|
224
|
-
expect(
|
|
277
|
+
expect(withOutput / total).toBeCloseTo(0.667, 2)
|
|
225
278
|
})
|
|
226
279
|
})
|
|
227
280
|
|
|
228
|
-
describe('downstream patterns:
|
|
281
|
+
describe('downstream patterns: capture JSONL', () => {
|
|
229
282
|
const parseResults = (jsonl: string) =>
|
|
230
283
|
jsonl
|
|
231
284
|
.trim()
|
|
232
285
|
.split('\n')
|
|
233
286
|
.map((line) => JSON.parse(line))
|
|
234
287
|
|
|
235
|
-
test('parses
|
|
236
|
-
const results = parseResults(
|
|
288
|
+
test('parses capture JSONL with trajectories', () => {
|
|
289
|
+
const results = parseResults(SAMPLE_CAPTURE_JSONL)
|
|
237
290
|
|
|
238
291
|
expect(results).toHaveLength(2)
|
|
239
292
|
for (const result of results) {
|
|
240
|
-
expect(() =>
|
|
293
|
+
expect(() => CaptureResultSchema.parse(result)).not.toThrow()
|
|
241
294
|
}
|
|
242
295
|
})
|
|
243
296
|
|
|
244
297
|
test('step IDs follow expected format', () => {
|
|
245
|
-
const results = parseResults(
|
|
298
|
+
const results = parseResults(SAMPLE_CAPTURE_JSONL)
|
|
246
299
|
|
|
247
300
|
for (const result of results) {
|
|
248
301
|
for (const step of result.trajectory) {
|
|
@@ -252,7 +305,7 @@ describe('downstream patterns: full JSONL', () => {
|
|
|
252
305
|
})
|
|
253
306
|
|
|
254
307
|
test('step-level retrieval pattern works', () => {
|
|
255
|
-
const results = parseResults(
|
|
308
|
+
const results = parseResults(SAMPLE_CAPTURE_JSONL)
|
|
256
309
|
|
|
257
310
|
// Build step index (pattern from downstream.md)
|
|
258
311
|
const stepIndex = new Map<string, unknown>()
|
|
@@ -270,7 +323,7 @@ describe('downstream patterns: full JSONL', () => {
|
|
|
270
323
|
})
|
|
271
324
|
|
|
272
325
|
test('extracts tool calls from trajectory', () => {
|
|
273
|
-
const results = parseResults(
|
|
326
|
+
const results = parseResults(SAMPLE_CAPTURE_JSONL)
|
|
274
327
|
const result = results[1] // test-002
|
|
275
328
|
|
|
276
329
|
const toolCalls = result.trajectory.filter((s: { type: string }) => s.type === 'tool_call')
|
|
@@ -279,12 +332,19 @@ describe('downstream patterns: full JSONL', () => {
|
|
|
279
332
|
})
|
|
280
333
|
|
|
281
334
|
test('filters by metadata category', () => {
|
|
282
|
-
const results = parseResults(
|
|
335
|
+
const results = parseResults(SAMPLE_CAPTURE_JSONL)
|
|
283
336
|
const uiResults = results.filter((r) => r.metadata.category === 'ui')
|
|
284
337
|
|
|
285
338
|
expect(uiResults).toHaveLength(1)
|
|
286
339
|
expect(uiResults[0]?.id).toBe('test-001')
|
|
287
340
|
})
|
|
341
|
+
|
|
342
|
+
test('identifies results with tool errors', () => {
|
|
343
|
+
const results = parseResults(SAMPLE_CAPTURE_JSONL)
|
|
344
|
+
const withErrors = results.filter((r) => r.toolErrors)
|
|
345
|
+
|
|
346
|
+
expect(withErrors).toHaveLength(0) // Sample data has no errors
|
|
347
|
+
})
|
|
288
348
|
})
|
|
289
349
|
|
|
290
350
|
describe('downstream patterns: advanced filtering', () => {
|
|
@@ -321,7 +381,7 @@ describe('downstream patterns: advanced filtering', () => {
|
|
|
321
381
|
|
|
322
382
|
test('deduplicates by ID keeping latest (merge pattern)', () => {
|
|
323
383
|
const combinedJsonl = `${SAMPLE_SUMMARY_JSONL}
|
|
324
|
-
{"id":"test-001","input":"Create a button v2","output":"I created the button v2","toolCalls":["Write","Edit"],"
|
|
384
|
+
{"id":"test-001","input":"Create a button v2","output":"I created the button v2","toolCalls":["Write","Edit"],"duration":1500}`
|
|
325
385
|
|
|
326
386
|
const results = parseResults(combinedJsonl)
|
|
327
387
|
|
|
@@ -338,7 +398,7 @@ describe('downstream patterns: advanced filtering', () => {
|
|
|
338
398
|
})
|
|
339
399
|
|
|
340
400
|
test('groups by category and counts', () => {
|
|
341
|
-
const results = parseResults(
|
|
401
|
+
const results = parseResults(SAMPLE_CAPTURE_JSONL)
|
|
342
402
|
|
|
343
403
|
// Group by category (simulates jq group_by pattern)
|
|
344
404
|
const grouped = results.reduce<Record<string, number>>((acc, r) => {
|
|
@@ -351,7 +411,7 @@ describe('downstream patterns: advanced filtering', () => {
|
|
|
351
411
|
})
|
|
352
412
|
|
|
353
413
|
test('extracts timing information', () => {
|
|
354
|
-
const results = parseResults(
|
|
414
|
+
const results = parseResults(SAMPLE_CAPTURE_JSONL)
|
|
355
415
|
const result = results[0]
|
|
356
416
|
|
|
357
417
|
expect(result.timing.start).toBe(1704067200000)
|
|
@@ -360,3 +420,110 @@ describe('downstream patterns: advanced filtering', () => {
|
|
|
360
420
|
expect(result.timing.end - result.timing.start).toBe(1234) // matches duration
|
|
361
421
|
})
|
|
362
422
|
})
|
|
423
|
+
|
|
424
|
+
// ============================================================================
|
|
425
|
+
// MCP Server Config Parsing Tests
|
|
426
|
+
// ============================================================================
|
|
427
|
+
|
|
428
|
+
describe('MCP server config parsing', () => {
|
|
429
|
+
test('parses stdio MCP server config', () => {
|
|
430
|
+
const json = '{"type":"stdio","name":"fs","command":"mcp-filesystem","args":["/data"],"env":[]}'
|
|
431
|
+
const proc = Bun.spawn(
|
|
432
|
+
['bun', CLI_PATH, 'capture', '/tmp/test.jsonl', 'bunx', 'claude-code-acp', '--mcp-server', json, '--help'],
|
|
433
|
+
{
|
|
434
|
+
stdout: 'pipe',
|
|
435
|
+
stderr: 'pipe',
|
|
436
|
+
},
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
// If it doesn't crash, the parsing worked
|
|
440
|
+
expect(proc.exited).resolves.toBeDefined()
|
|
441
|
+
})
|
|
442
|
+
|
|
443
|
+
test('parses http MCP server config', () => {
|
|
444
|
+
const json =
|
|
445
|
+
'{"type":"http","name":"api","url":"https://example.com/mcp","headers":[{"name":"Authorization","value":"Bearer token"}]}'
|
|
446
|
+
const proc = Bun.spawn(
|
|
447
|
+
['bun', CLI_PATH, 'capture', '/tmp/test.jsonl', 'bunx', 'claude-code-acp', '--mcp-server', json, '--help'],
|
|
448
|
+
{
|
|
449
|
+
stdout: 'pipe',
|
|
450
|
+
stderr: 'pipe',
|
|
451
|
+
},
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
// If it doesn't crash, the parsing worked
|
|
455
|
+
expect(proc.exited).resolves.toBeDefined()
|
|
456
|
+
})
|
|
457
|
+
|
|
458
|
+
test('accepts multiple MCP servers', () => {
|
|
459
|
+
const json1 = '{"type":"stdio","name":"fs","command":"mcp-filesystem","args":[],"env":[]}'
|
|
460
|
+
const json2 = '{"type":"http","name":"api","url":"https://example.com","headers":[]}'
|
|
461
|
+
const proc = Bun.spawn(
|
|
462
|
+
[
|
|
463
|
+
'bun',
|
|
464
|
+
CLI_PATH,
|
|
465
|
+
'capture',
|
|
466
|
+
'/tmp/test.jsonl',
|
|
467
|
+
'bunx',
|
|
468
|
+
'claude-code-acp',
|
|
469
|
+
'--mcp-server',
|
|
470
|
+
json1,
|
|
471
|
+
'--mcp-server',
|
|
472
|
+
json2,
|
|
473
|
+
'--help',
|
|
474
|
+
],
|
|
475
|
+
{
|
|
476
|
+
stdout: 'pipe',
|
|
477
|
+
stderr: 'pipe',
|
|
478
|
+
},
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
// If it doesn't crash, the parsing worked
|
|
482
|
+
expect(proc.exited).resolves.toBeDefined()
|
|
483
|
+
})
|
|
484
|
+
})
|
|
485
|
+
|
|
486
|
+
// ============================================================================
|
|
487
|
+
// Error Handling Tests
|
|
488
|
+
// ============================================================================
|
|
489
|
+
|
|
490
|
+
describe('error handling', () => {
|
|
491
|
+
test('fails with invalid JSONL format', async () => {
|
|
492
|
+
const tmpFile = `/tmp/invalid-${Date.now()}.jsonl`
|
|
493
|
+
await Bun.write(tmpFile, '{invalid json}\n')
|
|
494
|
+
|
|
495
|
+
const proc = Bun.spawn(['bun', CLI_PATH, 'capture', tmpFile, 'bunx', 'claude-code-acp'], {
|
|
496
|
+
stdout: 'pipe',
|
|
497
|
+
stderr: 'pipe',
|
|
498
|
+
})
|
|
499
|
+
const stderr = await new Response(proc.stderr).text()
|
|
500
|
+
const exitCode = await proc.exited
|
|
501
|
+
|
|
502
|
+
expect(exitCode).not.toBe(0)
|
|
503
|
+
expect(stderr).toContain('Invalid prompt at line 1')
|
|
504
|
+
})
|
|
505
|
+
|
|
506
|
+
test('capture command requires prompts path', async () => {
|
|
507
|
+
const proc = Bun.spawn(['bun', CLI_PATH, 'capture'], {
|
|
508
|
+
stdout: 'pipe',
|
|
509
|
+
stderr: 'pipe',
|
|
510
|
+
})
|
|
511
|
+
const stderr = await new Response(proc.stderr).text()
|
|
512
|
+
const exitCode = await proc.exited
|
|
513
|
+
|
|
514
|
+
expect(exitCode).toBe(1)
|
|
515
|
+
expect(stderr).toContain('prompts.jsonl path is required')
|
|
516
|
+
})
|
|
517
|
+
|
|
518
|
+
test('summarize command requires input path', async () => {
|
|
519
|
+
const proc = Bun.spawn(['bun', CLI_PATH, 'summarize'], {
|
|
520
|
+
stdout: 'pipe',
|
|
521
|
+
stderr: 'pipe',
|
|
522
|
+
})
|
|
523
|
+
const stderr = await new Response(proc.stderr).text()
|
|
524
|
+
const exitCode = await proc.exited
|
|
525
|
+
|
|
526
|
+
expect(exitCode).toBe(1)
|
|
527
|
+
expect(stderr).toContain('results.jsonl path is required')
|
|
528
|
+
})
|
|
529
|
+
})
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@plaited/acp-harness",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "CLI tool for capturing agent trajectories from ACP-compatible agents",
|
|
5
5
|
"license": "ISC",
|
|
6
6
|
"engines": {
|
|
@@ -18,6 +18,21 @@
|
|
|
18
18
|
"acp-harness": "./bin/cli.ts"
|
|
19
19
|
},
|
|
20
20
|
"type": "module",
|
|
21
|
+
"exports": {
|
|
22
|
+
".": "./src/acp.ts",
|
|
23
|
+
"./schemas": "./src/schemas.ts",
|
|
24
|
+
"./harness": "./src/harness.ts"
|
|
25
|
+
},
|
|
26
|
+
"files": [
|
|
27
|
+
"./src/**",
|
|
28
|
+
"./bin/**",
|
|
29
|
+
"!./src/**/tests/*",
|
|
30
|
+
"!./src/**/*.spec.ts",
|
|
31
|
+
"!./src/**/*.docker.ts",
|
|
32
|
+
"!./bin/**/tests/*",
|
|
33
|
+
"!./bin/**/*.spec.ts",
|
|
34
|
+
"!./bin/**/*.docker.ts"
|
|
35
|
+
],
|
|
21
36
|
"publishConfig": {
|
|
22
37
|
"access": "public"
|
|
23
38
|
},
|
|
@@ -40,11 +55,12 @@
|
|
|
40
55
|
]
|
|
41
56
|
},
|
|
42
57
|
"dependencies": {
|
|
43
|
-
"
|
|
44
|
-
"
|
|
58
|
+
"zod": "^4.3.5",
|
|
59
|
+
"@plaited/development-skills": "0.4.1"
|
|
45
60
|
},
|
|
46
61
|
"peerDependencies": {
|
|
47
|
-
"
|
|
62
|
+
"typescript-language-server": "^5.1.3",
|
|
63
|
+
"@agentclientprotocol/sdk": "^0.13.0"
|
|
48
64
|
},
|
|
49
65
|
"devDependencies": {
|
|
50
66
|
"@biomejs/biome": "2.3.11",
|
package/src/acp-client.ts
CHANGED
|
@@ -30,10 +30,11 @@ import type {
|
|
|
30
30
|
SessionNotification,
|
|
31
31
|
} from '@agentclientprotocol/sdk'
|
|
32
32
|
import { version } from '../package.json' with { type: 'json' }
|
|
33
|
-
import { ACP_METHODS, ACP_PROTOCOL_VERSION, DEFAULT_ACP_CLIENT_NAME } from './acp.constants.ts'
|
|
34
|
-
import { RequestPermissionRequestSchema, SessionNotificationSchema } from './acp.schemas.ts'
|
|
35
|
-
import type { Session } from './acp.types.ts'
|
|
36
33
|
import { createACPTransport } from './acp-transport.ts'
|
|
34
|
+
import { ACP_METHODS, ACP_PROTOCOL_VERSION, DEFAULT_ACP_CLIENT_NAME, DEFAULT_POLLING_INTERVAL } from './constants.ts'
|
|
35
|
+
import type { Session } from './schemas.ts'
|
|
36
|
+
import { RequestPermissionRequestSchema, SessionNotificationSchema } from './schemas.ts'
|
|
37
|
+
|
|
37
38
|
// ============================================================================
|
|
38
39
|
// Types
|
|
39
40
|
// ============================================================================
|
|
@@ -120,7 +121,7 @@ export const createACPClient = (config: ACPClientConfig) => {
|
|
|
120
121
|
clientInfo = { name: DEFAULT_ACP_CLIENT_NAME, version },
|
|
121
122
|
capabilities = {},
|
|
122
123
|
timeout = 30000,
|
|
123
|
-
pollingInterval =
|
|
124
|
+
pollingInterval = DEFAULT_POLLING_INTERVAL,
|
|
124
125
|
onPermissionRequest,
|
|
125
126
|
} = config
|
|
126
127
|
|
package/src/acp-transport.ts
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* newline-delimited JSON messages with Zod runtime validation.
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
-
import { JSON_RPC_ERRORS } from './
|
|
13
|
+
import { JSON_RPC_ERRORS } from './constants.ts'
|
|
14
14
|
import type {
|
|
15
15
|
JsonRpcError,
|
|
16
16
|
JsonRpcErrorResponse,
|
|
@@ -19,8 +19,8 @@ import type {
|
|
|
19
19
|
JsonRpcRequest,
|
|
20
20
|
JsonRpcResponse,
|
|
21
21
|
JsonRpcSuccessResponse,
|
|
22
|
-
} from './
|
|
23
|
-
import { JsonRpcMessageSchema } from './
|
|
22
|
+
} from './schemas.ts'
|
|
23
|
+
import { JsonRpcMessageSchema } from './schemas.ts'
|
|
24
24
|
|
|
25
25
|
// ============================================================================
|
|
26
26
|
// Types
|
|
@@ -71,7 +71,7 @@ type PipedSubprocess = {
|
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
/** Custom error for ACP transport failures */
|
|
74
|
-
class ACPTransportError extends Error {
|
|
74
|
+
export class ACPTransportError extends Error {
|
|
75
75
|
constructor(
|
|
76
76
|
message: string,
|
|
77
77
|
public readonly code?: number,
|
|
@@ -275,9 +275,12 @@ export const createACPTransport = (config: ACPTransportConfig) => {
|
|
|
275
275
|
|
|
276
276
|
// Read stdout for JSON-RPC messages
|
|
277
277
|
const readStdout = async () => {
|
|
278
|
+
if (!subprocess) {
|
|
279
|
+
throw new ACPTransportError('Subprocess not started')
|
|
280
|
+
}
|
|
278
281
|
// Type assertion needed: Bun's ReadableStreamDefaultReader includes readMany
|
|
279
282
|
// but node:stream/web reader returned by getReader() doesn't have it
|
|
280
|
-
const reader = subprocess
|
|
283
|
+
const reader = subprocess.stdout.getReader() as globalThis.ReadableStreamDefaultReader<Uint8Array>
|
|
281
284
|
stdoutReader = reader
|
|
282
285
|
const decoder = new TextDecoder()
|
|
283
286
|
|
|
@@ -303,9 +306,12 @@ export const createACPTransport = (config: ACPTransportConfig) => {
|
|
|
303
306
|
|
|
304
307
|
// Read stderr for debugging
|
|
305
308
|
const readStderr = async () => {
|
|
309
|
+
if (!subprocess) {
|
|
310
|
+
throw new ACPTransportError('Subprocess not started')
|
|
311
|
+
}
|
|
306
312
|
// Type assertion needed: Bun's ReadableStreamDefaultReader includes readMany
|
|
307
313
|
// but node:stream/web reader returned by getReader() doesn't have it
|
|
308
|
-
const reader = subprocess
|
|
314
|
+
const reader = subprocess.stderr.getReader() as globalThis.ReadableStreamDefaultReader<Uint8Array>
|
|
309
315
|
stderrReader = reader
|
|
310
316
|
const decoder = new TextDecoder()
|
|
311
317
|
|
|
@@ -326,7 +332,8 @@ export const createACPTransport = (config: ACPTransportConfig) => {
|
|
|
326
332
|
}
|
|
327
333
|
}
|
|
328
334
|
|
|
329
|
-
// Start reading streams
|
|
335
|
+
// Start reading streams (fire-and-forget pattern)
|
|
336
|
+
// These run concurrently and clean up via optional chaining in close()
|
|
330
337
|
readStdout()
|
|
331
338
|
readStderr()
|
|
332
339
|
|