@plaited/acp-harness 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/.claude/rules/accuracy.md +43 -0
  2. package/.claude/rules/bun-apis.md +80 -0
  3. package/.claude/rules/code-review.md +254 -0
  4. package/.claude/rules/git-workflow.md +37 -0
  5. package/.claude/rules/github.md +154 -0
  6. package/.claude/rules/testing.md +172 -0
  7. package/.claude/skills/acp-harness/SKILL.md +310 -0
  8. package/.claude/skills/acp-harness/assets/Dockerfile.acp +25 -0
  9. package/.claude/skills/acp-harness/assets/docker-compose.acp.yml +19 -0
  10. package/.claude/skills/acp-harness/references/downstream.md +288 -0
  11. package/.claude/skills/acp-harness/references/output-formats.md +221 -0
  12. package/.claude-plugin/marketplace.json +15 -0
  13. package/.claude-plugin/plugin.json +16 -0
  14. package/.github/CODEOWNERS +6 -0
  15. package/.github/workflows/ci.yml +63 -0
  16. package/.github/workflows/publish.yml +146 -0
  17. package/.mcp.json +20 -0
  18. package/CLAUDE.md +92 -0
  19. package/Dockerfile.test +23 -0
  20. package/LICENSE +15 -0
  21. package/README.md +94 -0
  22. package/bin/cli.ts +670 -0
  23. package/bin/tests/cli.spec.ts +362 -0
  24. package/biome.json +96 -0
  25. package/bun.lock +513 -0
  26. package/docker-compose.test.yml +21 -0
  27. package/package.json +57 -0
  28. package/scripts/bun-test-wrapper.sh +46 -0
  29. package/src/acp-client.ts +503 -0
  30. package/src/acp-helpers.ts +121 -0
  31. package/src/acp-transport.ts +455 -0
  32. package/src/acp-utils.ts +341 -0
  33. package/src/acp.constants.ts +56 -0
  34. package/src/acp.schemas.ts +161 -0
  35. package/src/acp.ts +27 -0
  36. package/src/acp.types.ts +28 -0
  37. package/src/tests/acp-client.spec.ts +205 -0
  38. package/src/tests/acp-helpers.spec.ts +105 -0
  39. package/src/tests/acp-integration.docker.ts +214 -0
  40. package/src/tests/acp-transport.spec.ts +153 -0
  41. package/src/tests/acp-utils.spec.ts +394 -0
  42. package/src/tests/fixtures/.claude/settings.local.json +8 -0
  43. package/src/tests/fixtures/.claude/skills/greeting/SKILL.md +17 -0
  44. package/src/tests/fixtures/calculator-mcp.ts +215 -0
  45. package/tsconfig.json +32 -0
@@ -0,0 +1,362 @@
1
+ import { describe, expect, test } from 'bun:test'
2
+ import { join } from 'node:path'
3
+ import { z } from 'zod'
4
+
5
+ /**
6
+ * Tests for the acp-harness CLI.
7
+ *
8
+ * @remarks
9
+ * Tests CLI argument parsing, help output, and output format schemas.
10
+ * Integration tests requiring an actual ACP agent are in *.docker.ts files.
11
+ */
12
+
13
+ const CLI_PATH = join(import.meta.dir, '..', 'cli.ts')
14
+
15
+ // ============================================================================
16
+ // CLI Invocation Tests
17
+ // ============================================================================
18
+
19
+ describe('CLI invocation', () => {
20
+ test('shows help with --help flag', async () => {
21
+ const proc = Bun.spawn(['bun', CLI_PATH, '--help'], {
22
+ stdout: 'pipe',
23
+ stderr: 'pipe',
24
+ })
25
+ const stdout = await new Response(proc.stdout).text()
26
+ const exitCode = await proc.exited
27
+
28
+ expect(exitCode).toBe(0)
29
+ expect(stdout).toContain('Usage: acp-harness')
30
+ expect(stdout).toContain('--cmd, --command')
31
+ expect(stdout).toContain('--output')
32
+ expect(stdout).toContain('--format')
33
+ })
34
+
35
+ test('shows help with -h flag', async () => {
36
+ const proc = Bun.spawn(['bun', CLI_PATH, '-h'], {
37
+ stdout: 'pipe',
38
+ stderr: 'pipe',
39
+ })
40
+ const stdout = await new Response(proc.stdout).text()
41
+ const exitCode = await proc.exited
42
+
43
+ expect(exitCode).toBe(0)
44
+ expect(stdout).toContain('Usage: acp-harness')
45
+ })
46
+
47
+ test('shows help when no arguments provided', async () => {
48
+ const proc = Bun.spawn(['bun', CLI_PATH], {
49
+ stdout: 'pipe',
50
+ stderr: 'pipe',
51
+ })
52
+ const stdout = await new Response(proc.stdout).text()
53
+ const exitCode = await proc.exited
54
+
55
+ expect(exitCode).toBe(1) // Exits with error when no args
56
+ expect(stdout).toContain('Usage: acp-harness')
57
+ })
58
+
59
+ test('help shows example commands', async () => {
60
+ const proc = Bun.spawn(['bun', CLI_PATH, '--help'], {
61
+ stdout: 'pipe',
62
+ stderr: 'pipe',
63
+ })
64
+ const stdout = await new Response(proc.stdout).text()
65
+
66
+ expect(stdout).toContain('bunx claude-code-acp')
67
+ expect(stdout).toContain('bun ./my-adapter.ts')
68
+ expect(stdout).toContain('--format judge')
69
+ })
70
+
71
+ test('help shows both --cmd and --command flags', async () => {
72
+ const proc = Bun.spawn(['bun', CLI_PATH, '--help'], {
73
+ stdout: 'pipe',
74
+ stderr: 'pipe',
75
+ })
76
+ const stdout = await new Response(proc.stdout).text()
77
+
78
+ expect(stdout).toContain('--cmd')
79
+ expect(stdout).toContain('--command')
80
+ })
81
+
82
+ test('fails with non-existent prompts file', async () => {
83
+ const proc = Bun.spawn(['bun', CLI_PATH, 'nonexistent.jsonl'], {
84
+ stdout: 'pipe',
85
+ stderr: 'pipe',
86
+ })
87
+ const stderr = await new Response(proc.stderr).text()
88
+ const exitCode = await proc.exited
89
+
90
+ expect(exitCode).not.toBe(0)
91
+ expect(stderr).toContain('Error')
92
+ })
93
+ })
94
+
95
+ // ============================================================================
96
+ // Output Format Schemas (for downstream validation)
97
+ // ============================================================================
98
+
99
+ const SummaryResultSchema = z.object({
100
+ id: z.string(),
101
+ input: z.string(),
102
+ output: z.string(),
103
+ toolCalls: z.array(z.string()),
104
+ status: z.enum(['passed', 'failed', 'error', 'timeout']),
105
+ duration: z.number(),
106
+ })
107
+
108
+ const TrajectoryStepSchema = z.discriminatedUnion('type', [
109
+ z.object({
110
+ type: z.literal('thought'),
111
+ content: z.string(),
112
+ timestamp: z.number(),
113
+ stepId: z.string(),
114
+ }),
115
+ z.object({
116
+ type: z.literal('message'),
117
+ content: z.string(),
118
+ timestamp: z.number(),
119
+ stepId: z.string(),
120
+ }),
121
+ z.object({
122
+ type: z.literal('tool_call'),
123
+ name: z.string(),
124
+ status: z.string(),
125
+ input: z.unknown().optional(),
126
+ output: z.unknown().optional(),
127
+ duration: z.number().optional(),
128
+ timestamp: z.number(),
129
+ stepId: z.string(),
130
+ }),
131
+ z.object({
132
+ type: z.literal('plan'),
133
+ entries: z.array(
134
+ z.object({
135
+ content: z.string(),
136
+ status: z.string(),
137
+ }),
138
+ ),
139
+ timestamp: z.number(),
140
+ stepId: z.string(),
141
+ }),
142
+ ])
143
+
144
+ const FullResultSchema = z.object({
145
+ id: z.string(),
146
+ input: z.string(),
147
+ output: z.string(),
148
+ expected: z.string().optional(),
149
+ trajectory: z.array(TrajectoryStepSchema),
150
+ metadata: z.record(z.string(), z.unknown()),
151
+ timing: z.object({
152
+ start: z.number(),
153
+ end: z.number(),
154
+ firstResponse: z.number().optional(),
155
+ }),
156
+ status: z.enum(['passed', 'failed', 'error', 'timeout']),
157
+ errors: z.array(z.string()).optional(),
158
+ })
159
+
160
+ // ============================================================================
161
+ // Sample Output Data (matches harness output format)
162
+ // ============================================================================
163
+
164
+ const SAMPLE_SUMMARY_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","toolCalls":["Write"],"status":"passed","duration":1234}
165
+ {"id":"test-002","input":"Fix the bug","output":"I fixed the bug","toolCalls":["Read","Edit"],"status":"passed","duration":2567}
166
+ {"id":"test-003","input":"Broken test","output":"","toolCalls":[],"status":"failed","duration":500}`
167
+
168
+ const SAMPLE_FULL_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","trajectory":[{"type":"thought","content":"I'll create a button template","timestamp":100,"stepId":"test-001-step-1"},{"type":"tool_call","name":"Write","status":"completed","input":{"file_path":"src/button.tsx","content":"export const Button = () => <button>Click</button>"},"output":"File written","duration":234,"timestamp":150,"stepId":"test-001-step-2"},{"type":"message","content":"I created the button","timestamp":500,"stepId":"test-001-step-3"}],"metadata":{"category":"ui","agent":"claude-code-acp"},"timing":{"start":1704067200000,"end":1704067201234,"firstResponse":100},"status":"passed"}
169
+ {"id":"test-002","input":"Fix the bug","output":"I fixed the bug","trajectory":[{"type":"tool_call","name":"Read","status":"completed","input":{"file_path":"src/app.ts"},"output":"file contents...","duration":100,"timestamp":50,"stepId":"test-002-step-1"},{"type":"tool_call","name":"Edit","status":"completed","input":{"file_path":"src/app.ts","old_string":"bug","new_string":"fix"},"duration":150,"timestamp":200,"stepId":"test-002-step-2"},{"type":"message","content":"I fixed the bug","timestamp":400,"stepId":"test-002-step-3"}],"metadata":{"category":"bugfix","agent":"claude-code-acp"},"timing":{"start":1704067300000,"end":1704067302567},"status":"passed"}`
170
+
171
+ // ============================================================================
172
+ // Downstream Pattern Tests
173
+ // ============================================================================
174
+
175
+ describe('downstream patterns: summary JSONL', () => {
176
+ const parseResults = (jsonl: string) =>
177
+ jsonl
178
+ .trim()
179
+ .split('\n')
180
+ .map((line) => JSON.parse(line))
181
+
182
+ test('parses summary JSONL correctly', () => {
183
+ const results = parseResults(SAMPLE_SUMMARY_JSONL)
184
+
185
+ expect(results).toHaveLength(3)
186
+ for (const result of results) {
187
+ expect(() => SummaryResultSchema.parse(result)).not.toThrow()
188
+ }
189
+ })
190
+
191
+ test('filters by status (jq pattern)', () => {
192
+ const results = parseResults(SAMPLE_SUMMARY_JSONL)
193
+ const failed = results.filter((r) => r.status === 'failed')
194
+
195
+ expect(failed).toHaveLength(1)
196
+ expect(failed[0]?.id).toBe('test-003')
197
+ })
198
+
199
+ test('calculates average duration (jq pattern)', () => {
200
+ const results = parseResults(SAMPLE_SUMMARY_JSONL)
201
+ const avg = results.reduce((sum, r) => sum + r.duration, 0) / results.length
202
+
203
+ expect(avg).toBeCloseTo(1433.67, 0)
204
+ })
205
+
206
+ test('counts tool usage (jq pattern)', () => {
207
+ const results = parseResults(SAMPLE_SUMMARY_JSONL)
208
+ const allTools = results.flatMap((r) => r.toolCalls)
209
+ const toolCounts = allTools.reduce<Record<string, number>>((acc, tool) => {
210
+ acc[tool] = (acc[tool] ?? 0) + 1
211
+ return acc
212
+ }, {})
213
+
214
+ expect(toolCounts).toEqual({ Write: 1, Read: 1, Edit: 1 })
215
+ })
216
+
217
+ test('calculates pass rate (jq pattern)', () => {
218
+ const results = parseResults(SAMPLE_SUMMARY_JSONL)
219
+ const passed = results.filter((r) => r.status === 'passed').length
220
+ const total = results.length
221
+
222
+ expect(passed).toBe(2)
223
+ expect(total).toBe(3)
224
+ expect(passed / total).toBeCloseTo(0.667, 2)
225
+ })
226
+ })
227
+
228
+ describe('downstream patterns: full JSONL', () => {
229
+ const parseResults = (jsonl: string) =>
230
+ jsonl
231
+ .trim()
232
+ .split('\n')
233
+ .map((line) => JSON.parse(line))
234
+
235
+ test('parses full JSONL with trajectories', () => {
236
+ const results = parseResults(SAMPLE_FULL_JSONL)
237
+
238
+ expect(results).toHaveLength(2)
239
+ for (const result of results) {
240
+ expect(() => FullResultSchema.parse(result)).not.toThrow()
241
+ }
242
+ })
243
+
244
+ test('step IDs follow expected format', () => {
245
+ const results = parseResults(SAMPLE_FULL_JSONL)
246
+
247
+ for (const result of results) {
248
+ for (const step of result.trajectory) {
249
+ expect(step.stepId).toMatch(new RegExp(`^${result.id}-step-\\d+$`))
250
+ }
251
+ }
252
+ })
253
+
254
+ test('step-level retrieval pattern works', () => {
255
+ const results = parseResults(SAMPLE_FULL_JSONL)
256
+
257
+ // Build step index (pattern from downstream.md)
258
+ const stepIndex = new Map<string, unknown>()
259
+ for (const result of results) {
260
+ for (const step of result.trajectory) {
261
+ stepIndex.set(step.stepId, step)
262
+ }
263
+ }
264
+
265
+ // Retrieve specific step by ID
266
+ const step = stepIndex.get('test-001-step-2') as { name: string; input: { file_path: string } }
267
+ expect(step).toBeDefined()
268
+ expect(step.name).toBe('Write')
269
+ expect(step.input.file_path).toBe('src/button.tsx')
270
+ })
271
+
272
+ test('extracts tool calls from trajectory', () => {
273
+ const results = parseResults(SAMPLE_FULL_JSONL)
274
+ const result = results[1] // test-002
275
+
276
+ const toolCalls = result.trajectory.filter((s: { type: string }) => s.type === 'tool_call')
277
+ expect(toolCalls).toHaveLength(2)
278
+ expect(toolCalls.map((t: { name: string }) => t.name)).toEqual(['Read', 'Edit'])
279
+ })
280
+
281
+ test('filters by metadata category', () => {
282
+ const results = parseResults(SAMPLE_FULL_JSONL)
283
+ const uiResults = results.filter((r) => r.metadata.category === 'ui')
284
+
285
+ expect(uiResults).toHaveLength(1)
286
+ expect(uiResults[0]?.id).toBe('test-001')
287
+ })
288
+ })
289
+
290
+ describe('downstream patterns: advanced filtering', () => {
291
+ const parseResults = (jsonl: string) =>
292
+ jsonl
293
+ .trim()
294
+ .split('\n')
295
+ .map((line) => JSON.parse(line))
296
+
297
+ test('filters by tool usage (jq contains pattern)', () => {
298
+ const results = parseResults(SAMPLE_SUMMARY_JSONL)
299
+ const withWrite = results.filter((r) => r.toolCalls.includes('Write'))
300
+
301
+ expect(withWrite).toHaveLength(1)
302
+ expect(withWrite[0]?.id).toBe('test-001')
303
+ })
304
+
305
+ test('filters by duration threshold (slow evaluations)', () => {
306
+ const results = parseResults(SAMPLE_SUMMARY_JSONL)
307
+ const slow = results.filter((r) => r.duration > 2000)
308
+
309
+ expect(slow).toHaveLength(1)
310
+ expect(slow[0]?.id).toBe('test-002')
311
+ })
312
+
313
+ test('finds slowest evaluations (sorted)', () => {
314
+ const results = parseResults(SAMPLE_SUMMARY_JSONL)
315
+ const sorted = [...results].sort((a, b) => b.duration - a.duration)
316
+ const top2 = sorted.slice(0, 2)
317
+
318
+ expect(top2[0]?.id).toBe('test-002')
319
+ expect(top2[1]?.id).toBe('test-001')
320
+ })
321
+
322
+ test('deduplicates by ID keeping latest (merge pattern)', () => {
323
+ const combinedJsonl = `${SAMPLE_SUMMARY_JSONL}
324
+ {"id":"test-001","input":"Create a button v2","output":"I created the button v2","toolCalls":["Write","Edit"],"status":"passed","duration":1500}`
325
+
326
+ const results = parseResults(combinedJsonl)
327
+
328
+ // Group by ID and keep last occurrence (simulates jq group_by + last)
329
+ const byId = new Map<string, unknown>()
330
+ for (const result of results) {
331
+ byId.set(result.id, result)
332
+ }
333
+ const deduped = Array.from(byId.values())
334
+
335
+ expect(deduped).toHaveLength(3) // test-001, test-002, test-003
336
+ const test001 = deduped.find((r) => (r as { id: string }).id === 'test-001') as { input: string }
337
+ expect(test001?.input).toBe('Create a button v2')
338
+ })
339
+
340
+ test('groups by category and counts', () => {
341
+ const results = parseResults(SAMPLE_FULL_JSONL)
342
+
343
+ // Group by category (simulates jq group_by pattern)
344
+ const grouped = results.reduce<Record<string, number>>((acc, r) => {
345
+ const cat = r.metadata.category as string
346
+ acc[cat] = (acc[cat] ?? 0) + 1
347
+ return acc
348
+ }, {})
349
+
350
+ expect(grouped).toEqual({ ui: 1, bugfix: 1 })
351
+ })
352
+
353
+ test('extracts timing information', () => {
354
+ const results = parseResults(SAMPLE_FULL_JSONL)
355
+ const result = results[0]
356
+
357
+ expect(result.timing.start).toBe(1704067200000)
358
+ expect(result.timing.end).toBe(1704067201234)
359
+ expect(result.timing.firstResponse).toBe(100)
360
+ expect(result.timing.end - result.timing.start).toBe(1234) // matches duration
361
+ })
362
+ })
package/biome.json ADDED
@@ -0,0 +1,96 @@
1
+ {
2
+ "$schema": "https://biomejs.dev/schemas/2.3.11/schema.json",
3
+ "vcs": { "enabled": true, "clientKind": "git", "useIgnoreFile": true },
4
+ "files": {
5
+ "ignoreUnknown": false,
6
+ "includes": ["**"]
7
+ },
8
+ "formatter": {
9
+ "enabled": true,
10
+ "formatWithErrors": false,
11
+ "indentStyle": "space",
12
+ "indentWidth": 2,
13
+ "lineEnding": "lf",
14
+ "lineWidth": 120,
15
+ "attributePosition": "multiline",
16
+ "bracketSameLine": false,
17
+ "bracketSpacing": true,
18
+ "expand": "auto",
19
+ "useEditorconfig": true,
20
+ "includes": ["**", "!!**/dist/**/*", "!!**/*.json", "!!**/*.md", "!!**/*.d.ts"]
21
+ },
22
+ "linter": {
23
+ "enabled": true,
24
+ "rules": {
25
+ "recommended": true,
26
+ "correctness": {
27
+ "noUnusedVariables": "error",
28
+ "noUnusedImports": "error",
29
+ "useImportExtensions": "error"
30
+ },
31
+ "performance": {
32
+ "noAccumulatingSpread": "off"
33
+ },
34
+ "style": {
35
+ "noNestedTernary": "off",
36
+ "noNonNullAssertion": "off",
37
+ "useBlockStatements": "off",
38
+ "useTemplate": "error",
39
+ "useImportType": "error",
40
+ "noNegationElse": "warn"
41
+ },
42
+ "suspicious": {
43
+ "noExplicitAny": "error",
44
+ "noConsole": {
45
+ "level": "warn",
46
+ "options": {
47
+ "allow": ["error", "warn", "table", "info", "group", "groupEnd"]
48
+ }
49
+ }
50
+ },
51
+ "complexity": {
52
+ "useLiteralKeys": "warn"
53
+ }
54
+ },
55
+ "includes": ["**", "!!**/*.d.ts"]
56
+ },
57
+ "javascript": {
58
+ "formatter": {
59
+ "jsxQuoteStyle": "single",
60
+ "quoteProperties": "asNeeded",
61
+ "trailingCommas": "all",
62
+ "semicolons": "asNeeded",
63
+ "arrowParentheses": "always",
64
+ "bracketSameLine": false,
65
+ "quoteStyle": "single",
66
+ "attributePosition": "multiline",
67
+ "bracketSpacing": true
68
+ }
69
+ },
70
+ "overrides": [
71
+ {
72
+ "includes": ["**/*.ts", "**/*.tsx"],
73
+ "linter": {
74
+ "rules": {
75
+ "complexity": { "noArguments": "error" },
76
+ "style": { "useConst": "error" }
77
+ }
78
+ }
79
+ },
80
+ {
81
+ "includes": ["**/tests/**/*", "**/skills/**/*"],
82
+ "linter": {
83
+ "rules": {
84
+ "suspicious": {
85
+ "noExplicitAny": "warn",
86
+ "noConsole": "off"
87
+ }
88
+ }
89
+ }
90
+ }
91
+ ],
92
+ "assist": {
93
+ "enabled": true,
94
+ "actions": { "source": { "organizeImports": "on" } }
95
+ }
96
+ }