@plaited/acp-harness 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -31
- package/bin/cli.ts +15 -0
- package/package.json +5 -7
- package/src/acp-client.ts +7 -4
- package/src/adapter-check.ts +0 -1
- package/src/adapter-scaffold.ts +16 -15
- package/src/calibrate.ts +28 -8
- package/src/capture.ts +114 -33
- package/src/grader-loader.ts +3 -3
- package/src/harness.ts +4 -0
- package/src/headless-cli.ts +433 -0
- package/src/headless-history-builder.ts +141 -0
- package/src/headless-output-parser.ts +251 -0
- package/src/headless-session-manager.ts +389 -0
- package/src/headless.schemas.ts +241 -0
- package/src/headless.ts +71 -0
- package/src/headless.types.ts +19 -0
- package/src/integration_tests/acp-claude.spec.ts +170 -0
- package/src/integration_tests/acp-gemini.spec.ts +174 -0
- package/src/schemas.ts +88 -36
- package/src/summarize.ts +4 -8
- package/src/tests/acp-client.spec.ts +1 -1
- package/src/tests/capture-cli.spec.ts +188 -0
- package/src/tests/capture-helpers.spec.ts +229 -67
- package/src/tests/constants.spec.ts +121 -0
- package/src/tests/fixtures/grader-exec.py +3 -3
- package/src/tests/fixtures/grader-module.ts +2 -2
- package/src/tests/grader-loader.spec.ts +5 -5
- package/src/tests/headless.spec.ts +460 -0
- package/src/tests/schemas-cli.spec.ts +142 -0
- package/src/tests/schemas.spec.ts +657 -0
- package/src/tests/summarize-helpers.spec.ts +3 -3
- package/src/tests/trials-cli.spec.ts +145 -0
- package/src/trials.ts +6 -19
- package/src/validate-refs.ts +1 -1
- package/src/tests/acp-integration.docker.ts +0 -214
package/src/schemas.ts
CHANGED
|
@@ -222,14 +222,16 @@ export type McpServerConfig = z.infer<typeof McpServerSchema>
|
|
|
222
222
|
*
|
|
223
223
|
* @remarks
|
|
224
224
|
* Each line in a prompts.jsonl file should match this schema.
|
|
225
|
+
* - Single turn: `input: "Hello"` - one prompt, one session
|
|
226
|
+
* - Multi-turn: `input: ["Hello", "How are you?", "Goodbye"]` - sequential turns in one session
|
|
225
227
|
*/
|
|
226
228
|
export const PromptCaseSchema = z.object({
|
|
227
229
|
/** Unique identifier for the test case */
|
|
228
230
|
id: z.string(),
|
|
229
|
-
/**
|
|
230
|
-
input: z.string(),
|
|
231
|
-
/** Optional
|
|
232
|
-
|
|
231
|
+
/** Prompt text(s) - string for single turn, array for multi-turn conversation */
|
|
232
|
+
input: z.union([z.string(), z.array(z.string())]),
|
|
233
|
+
/** Optional grader context hint (not a strict expected match) */
|
|
234
|
+
hint: z.string().optional(),
|
|
233
235
|
/** Optional reference solution for validation */
|
|
234
236
|
reference: z.string().optional(),
|
|
235
237
|
/** Optional metadata for categorization and analysis */
|
|
@@ -268,25 +270,13 @@ export type GraderResult = z.infer<typeof GraderResultSchema>
|
|
|
268
270
|
*
|
|
269
271
|
* @remarks
|
|
270
272
|
* User-provided graders implement this interface to score agent outputs.
|
|
271
|
-
*
|
|
272
|
-
*
|
|
273
|
-
* ```typescript
|
|
274
|
-
* import type { Grader } from '@plaited/acp-harness/schemas'
|
|
275
|
-
*
|
|
276
|
-
* export const grade: Grader = async ({ input, output, expected, trajectory }) => {
|
|
277
|
-
* const pass = output.toLowerCase().includes(expected?.toLowerCase() ?? '')
|
|
278
|
-
* return {
|
|
279
|
-
* pass,
|
|
280
|
-
* score: pass ? 1 : 0,
|
|
281
|
-
* reasoning: pass ? 'Contains expected answer' : 'Missing expected answer'
|
|
282
|
-
* }
|
|
283
|
-
* }
|
|
284
|
-
* ```
|
|
273
|
+
* - `input` is the original prompt (string or array for multi-turn)
|
|
274
|
+
* - `hint` provides grader context (renamed from `expected`)
|
|
285
275
|
*/
|
|
286
276
|
export type Grader = (params: {
|
|
287
|
-
input: string
|
|
277
|
+
input: string | string[]
|
|
288
278
|
output: string
|
|
289
|
-
|
|
279
|
+
hint?: string
|
|
290
280
|
trajectory?: TrajectoryStep[]
|
|
291
281
|
}) => Promise<GraderResult>
|
|
292
282
|
|
|
@@ -307,6 +297,24 @@ export const ToolInputSchema = z
|
|
|
307
297
|
/** Tool input type */
|
|
308
298
|
export type ToolInput = z.infer<typeof ToolInputSchema>
|
|
309
299
|
|
|
300
|
+
/**
|
|
301
|
+
* Token usage schema for adapter-specific usage data.
|
|
302
|
+
*
|
|
303
|
+
* @remarks
|
|
304
|
+
* ACP SDK's SessionNotification doesn't declare a 'usage' field, but adapters
|
|
305
|
+
* like Claude Code extend responses with token counts at runtime. This schema
|
|
306
|
+
* provides runtime validation for that extension.
|
|
307
|
+
*/
|
|
308
|
+
export const TokenUsageSchema = z
|
|
309
|
+
.object({
|
|
310
|
+
inputTokens: z.number().optional(),
|
|
311
|
+
outputTokens: z.number().optional(),
|
|
312
|
+
})
|
|
313
|
+
.passthrough()
|
|
314
|
+
|
|
315
|
+
/** Token usage type */
|
|
316
|
+
export type TokenUsage = z.infer<typeof TokenUsageSchema>
|
|
317
|
+
|
|
310
318
|
/** Thought trajectory step */
|
|
311
319
|
export const ThoughtStepSchema = z.object({
|
|
312
320
|
type: z.literal('thought'),
|
|
@@ -366,36 +374,80 @@ export type IndexedStep = TrajectoryStep & { stepId: string }
|
|
|
366
374
|
// Capture Result Schemas
|
|
367
375
|
// ============================================================================
|
|
368
376
|
|
|
369
|
-
/**
|
|
377
|
+
/**
|
|
378
|
+
* Timing information for a capture result.
|
|
379
|
+
*
|
|
380
|
+
* @remarks
|
|
381
|
+
* Captures both absolute timestamps and derived durations for analysis:
|
|
382
|
+
* - `sessionCreation`: Time to initialize session (agent startup overhead)
|
|
383
|
+
* - `total`: End-to-end duration including all turns
|
|
384
|
+
* - `firstResponse`: Latency to first agent output (optional)
|
|
385
|
+
*
|
|
386
|
+
* Token counts are adapter-dependent and only present if the adapter
|
|
387
|
+
* exposes usage information (e.g., Claude Code includes them, others may not).
|
|
388
|
+
*
|
|
389
|
+
* @public
|
|
390
|
+
*/
|
|
370
391
|
export const TimingSchema = z.object({
|
|
392
|
+
/** Epoch timestamp when capture started */
|
|
371
393
|
start: z.number(),
|
|
394
|
+
/** Epoch timestamp when capture ended */
|
|
372
395
|
end: z.number(),
|
|
396
|
+
/** Time to first response (ms from start) */
|
|
373
397
|
firstResponse: z.number().optional(),
|
|
398
|
+
/** Time to create session (ms) - measures agent initialization overhead */
|
|
399
|
+
sessionCreation: z.number(),
|
|
400
|
+
/** Total duration (end - start) in milliseconds */
|
|
401
|
+
total: z.number(),
|
|
402
|
+
/** Input tokens consumed (if available from ACP adapter) */
|
|
403
|
+
inputTokens: z.number().optional(),
|
|
404
|
+
/** Output tokens generated (if available from ACP adapter) */
|
|
405
|
+
outputTokens: z.number().optional(),
|
|
374
406
|
})
|
|
375
407
|
|
|
376
|
-
/**
|
|
408
|
+
/**
|
|
409
|
+
* Timing information type inferred from TimingSchema.
|
|
410
|
+
*
|
|
411
|
+
* @public
|
|
412
|
+
*/
|
|
377
413
|
export type Timing = z.infer<typeof TimingSchema>
|
|
378
414
|
|
|
415
|
+
/**
|
|
416
|
+
* Trajectory richness level indicating the depth of captured agent activity.
|
|
417
|
+
*
|
|
418
|
+
* @remarks
|
|
419
|
+
* Different adapters provide varying levels of detail:
|
|
420
|
+
* - `full`: Thoughts, tool calls, plans (e.g., Claude Code adapter)
|
|
421
|
+
* - `minimal`: Basic output only (e.g., Droid adapter)
|
|
422
|
+
* - `messages-only`: Messages without internal reasoning
|
|
423
|
+
*/
|
|
424
|
+
export const TrajectoryRichnessSchema = z.enum(['full', 'minimal', 'messages-only'])
|
|
425
|
+
|
|
426
|
+
/** Trajectory richness type */
|
|
427
|
+
export type TrajectoryRichness = z.infer<typeof TrajectoryRichnessSchema>
|
|
428
|
+
|
|
379
429
|
/**
|
|
380
430
|
* Capture result schema.
|
|
381
431
|
*
|
|
382
432
|
* @remarks
|
|
383
433
|
* Full trajectory output from the `capture` command.
|
|
384
|
-
*
|
|
434
|
+
* - `input` can be string (single turn) or string[] (multi-turn)
|
|
435
|
+
* - `hint` provides grader context (renamed from `expected`)
|
|
436
|
+
* - `toolErrors` replaces misleading `status: 'passed'|'failed'`
|
|
385
437
|
* Real pass/fail determination comes from your grader.
|
|
386
438
|
*/
|
|
387
439
|
export const CaptureResultSchema = z.object({
|
|
388
440
|
/** Test case identifier */
|
|
389
441
|
id: z.string(),
|
|
390
|
-
/** Original prompt input */
|
|
391
|
-
input: z.string(),
|
|
442
|
+
/** Original prompt input (string for single turn, array for multi-turn) */
|
|
443
|
+
input: z.union([z.string(), z.array(z.string())]),
|
|
392
444
|
/** Final agent output */
|
|
393
445
|
output: z.string(),
|
|
394
|
-
/**
|
|
395
|
-
|
|
446
|
+
/** Grader context hint (renamed from expected) */
|
|
447
|
+
hint: z.string().optional(),
|
|
396
448
|
/** Full execution trajectory */
|
|
397
449
|
trajectory: z.array(TrajectoryStepSchema),
|
|
398
|
-
/** Metadata including category, agent info,
|
|
450
|
+
/** Metadata including category, agent info, trajectoryRichness, turnCount */
|
|
399
451
|
metadata: z.record(z.string(), z.unknown()),
|
|
400
452
|
/** Timing information */
|
|
401
453
|
timing: TimingSchema,
|
|
@@ -471,10 +523,10 @@ export type TrialEntry = z.infer<typeof TrialEntrySchema>
|
|
|
471
523
|
export const TrialResultSchema = z.object({
|
|
472
524
|
/** Test case identifier */
|
|
473
525
|
id: z.string(),
|
|
474
|
-
/** Original prompt input */
|
|
475
|
-
input: z.string(),
|
|
476
|
-
/**
|
|
477
|
-
|
|
526
|
+
/** Original prompt input (string for single turn, array for multi-turn) */
|
|
527
|
+
input: z.union([z.string(), z.array(z.string())]),
|
|
528
|
+
/** Grader context hint (renamed from expected) */
|
|
529
|
+
hint: z.string().optional(),
|
|
478
530
|
/** Number of trials (k) */
|
|
479
531
|
k: z.number(),
|
|
480
532
|
/** Simple pass rate: passes / k (with grader only) */
|
|
@@ -498,12 +550,12 @@ export type TrialResult = z.infer<typeof TrialResultSchema>
|
|
|
498
550
|
export const CalibrationSampleSchema = z.object({
|
|
499
551
|
/** Test case identifier */
|
|
500
552
|
id: z.string(),
|
|
501
|
-
/** Original prompt input */
|
|
502
|
-
input: z.string(),
|
|
553
|
+
/** Original prompt input (string for single turn, array for multi-turn) */
|
|
554
|
+
input: z.union([z.string(), z.array(z.string())]),
|
|
503
555
|
/** Agent output */
|
|
504
556
|
output: z.string(),
|
|
505
|
-
/**
|
|
506
|
-
|
|
557
|
+
/** Grader context hint (renamed from expected) */
|
|
558
|
+
hint: z.string().optional(),
|
|
507
559
|
/** Original grader score */
|
|
508
560
|
originalScore: GraderResultSchema,
|
|
509
561
|
/** Re-scored result (if different grader provided) */
|
package/src/summarize.ts
CHANGED
|
@@ -64,9 +64,10 @@ const loadResults = async (path: string): Promise<CaptureResult[]> => {
|
|
|
64
64
|
* @public
|
|
65
65
|
*/
|
|
66
66
|
export const formatSummary = (result: CaptureResult): SummaryResult => {
|
|
67
|
+
const inputText = Array.isArray(result.input) ? result.input.join('\n') : result.input
|
|
67
68
|
return {
|
|
68
69
|
id: result.id,
|
|
69
|
-
input:
|
|
70
|
+
input: inputText,
|
|
70
71
|
output: result.output,
|
|
71
72
|
toolCalls: result.trajectory.filter((s) => s.type === 'tool_call').map((s) => (s as { name: string }).name),
|
|
72
73
|
duration: result.timing.end - result.timing.start,
|
|
@@ -82,13 +83,8 @@ export const formatSummary = (result: CaptureResult): SummaryResult => {
|
|
|
82
83
|
* @public
|
|
83
84
|
*/
|
|
84
85
|
export const formatMarkdown = (result: CaptureResult): string => {
|
|
85
|
-
const
|
|
86
|
-
|
|
87
|
-
'',
|
|
88
|
-
`**Input:** ${result.input}`,
|
|
89
|
-
'',
|
|
90
|
-
'**Trajectory:**',
|
|
91
|
-
]
|
|
86
|
+
const inputText = Array.isArray(result.input) ? result.input.join('\n') : result.input
|
|
87
|
+
const lines: string[] = [`## Evaluation Record: ${result.id}`, '', `**Input:** ${inputText}`, '', '**Trajectory:**']
|
|
92
88
|
|
|
93
89
|
let stepNum = 1
|
|
94
90
|
for (const step of result.trajectory) {
|
|
@@ -102,7 +102,7 @@ describe('Operations before connection', () => {
|
|
|
102
102
|
command: ['echo', 'test'],
|
|
103
103
|
})
|
|
104
104
|
|
|
105
|
-
await expect(client.createSession({ cwd: '/tmp'
|
|
105
|
+
await expect(client.createSession({ cwd: '/tmp' })).rejects.toThrow('Not connected')
|
|
106
106
|
})
|
|
107
107
|
|
|
108
108
|
test('promptSync throws when not connected', async () => {
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
|
2
|
+
import type { CaptureConfig } from '../capture.ts'
|
|
3
|
+
import { loadPrompts } from '../capture.ts'
|
|
4
|
+
|
|
5
|
+
// ============================================================================
|
|
6
|
+
// loadPrompts
|
|
7
|
+
// ============================================================================
|
|
8
|
+
|
|
9
|
+
describe('loadPrompts', () => {
|
|
10
|
+
const testPromptFile = '/tmp/acp-harness-test-prompts.jsonl'
|
|
11
|
+
|
|
12
|
+
beforeEach(async () => {
|
|
13
|
+
await Bun.$`rm -f ${testPromptFile}`.nothrow()
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
afterEach(async () => {
|
|
17
|
+
await Bun.$`rm -f ${testPromptFile}`.nothrow()
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
test('loads single-turn prompts', async () => {
|
|
21
|
+
await Bun.write(
|
|
22
|
+
testPromptFile,
|
|
23
|
+
`{"id": "t1", "input": "Hello"}
|
|
24
|
+
{"id": "t2", "input": "World"}`,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
const prompts = await loadPrompts(testPromptFile)
|
|
28
|
+
|
|
29
|
+
expect(prompts).toHaveLength(2)
|
|
30
|
+
expect(prompts[0]?.id).toBe('t1')
|
|
31
|
+
expect(prompts[0]?.input).toBe('Hello')
|
|
32
|
+
expect(prompts[1]?.id).toBe('t2')
|
|
33
|
+
expect(prompts[1]?.input).toBe('World')
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
test('loads multi-turn prompts', async () => {
|
|
37
|
+
await Bun.write(testPromptFile, `{"id": "conv1", "input": ["Hi", "How are you?", "Bye"]}`)
|
|
38
|
+
|
|
39
|
+
const prompts = await loadPrompts(testPromptFile)
|
|
40
|
+
|
|
41
|
+
expect(prompts).toHaveLength(1)
|
|
42
|
+
expect(prompts[0]?.id).toBe('conv1')
|
|
43
|
+
expect(Array.isArray(prompts[0]?.input)).toBe(true)
|
|
44
|
+
expect(prompts[0]?.input).toEqual(['Hi', 'How are you?', 'Bye'])
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
test('loads prompts with hint field', async () => {
|
|
48
|
+
await Bun.write(testPromptFile, `{"id": "t1", "input": "2+2?", "hint": "4"}`)
|
|
49
|
+
|
|
50
|
+
const prompts = await loadPrompts(testPromptFile)
|
|
51
|
+
|
|
52
|
+
expect(prompts).toHaveLength(1)
|
|
53
|
+
expect(prompts[0]?.hint).toBe('4')
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
test('loads prompts with metadata', async () => {
|
|
57
|
+
await Bun.write(
|
|
58
|
+
testPromptFile,
|
|
59
|
+
`{"id": "t1", "input": "Test", "metadata": {"category": "math", "difficulty": "easy"}}`,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
const prompts = await loadPrompts(testPromptFile)
|
|
63
|
+
|
|
64
|
+
expect(prompts).toHaveLength(1)
|
|
65
|
+
expect(prompts[0]?.metadata).toEqual({ category: 'math', difficulty: 'easy' })
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
test('loads prompts with timeout override', async () => {
|
|
69
|
+
await Bun.write(testPromptFile, `{"id": "t1", "input": "Slow task", "timeout": 120000}`)
|
|
70
|
+
|
|
71
|
+
const prompts = await loadPrompts(testPromptFile)
|
|
72
|
+
|
|
73
|
+
expect(prompts).toHaveLength(1)
|
|
74
|
+
expect(prompts[0]?.timeout).toBe(120000)
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
test('skips empty lines', async () => {
|
|
78
|
+
await Bun.write(
|
|
79
|
+
testPromptFile,
|
|
80
|
+
`{"id": "t1", "input": "First"}
|
|
81
|
+
|
|
82
|
+
{"id": "t2", "input": "Second"}
|
|
83
|
+
`,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
const prompts = await loadPrompts(testPromptFile)
|
|
87
|
+
|
|
88
|
+
expect(prompts).toHaveLength(2)
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
test('throws on invalid JSON', async () => {
|
|
92
|
+
await Bun.write(testPromptFile, 'not valid json')
|
|
93
|
+
|
|
94
|
+
await expect(loadPrompts(testPromptFile)).rejects.toThrow()
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
test('throws on missing required fields', async () => {
|
|
98
|
+
await Bun.write(testPromptFile, `{"id": "t1"}`) // missing input
|
|
99
|
+
|
|
100
|
+
await expect(loadPrompts(testPromptFile)).rejects.toThrow()
|
|
101
|
+
})
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
// ============================================================================
|
|
105
|
+
// runCapture configuration
|
|
106
|
+
// ============================================================================
|
|
107
|
+
|
|
108
|
+
describe('runCapture configuration', () => {
|
|
109
|
+
test('CaptureConfig type accepts valid configuration', () => {
|
|
110
|
+
// Type-level test - if this compiles, the types are correct
|
|
111
|
+
const config: CaptureConfig = {
|
|
112
|
+
promptsPath: '/tmp/prompts.jsonl',
|
|
113
|
+
agentCommand: ['bunx', 'test-agent'],
|
|
114
|
+
outputPath: '/tmp/output.jsonl',
|
|
115
|
+
cwd: '/tmp',
|
|
116
|
+
timeout: 30000,
|
|
117
|
+
progress: true,
|
|
118
|
+
append: false,
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
expect(config.promptsPath).toBe('/tmp/prompts.jsonl')
|
|
122
|
+
expect(config.agentCommand).toEqual(['bunx', 'test-agent'])
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
test('CaptureConfig allows minimal configuration', () => {
|
|
126
|
+
const config: CaptureConfig = {
|
|
127
|
+
promptsPath: '/tmp/prompts.jsonl',
|
|
128
|
+
agentCommand: ['echo', 'test'],
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
expect(config.outputPath).toBeUndefined()
|
|
132
|
+
expect(config.cwd).toBeUndefined()
|
|
133
|
+
expect(config.timeout).toBeUndefined()
|
|
134
|
+
expect(config.progress).toBeUndefined()
|
|
135
|
+
expect(config.append).toBeUndefined()
|
|
136
|
+
expect(config.grader).toBeUndefined()
|
|
137
|
+
})
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
// ============================================================================
|
|
141
|
+
// CLI Help Output
|
|
142
|
+
// ============================================================================
|
|
143
|
+
|
|
144
|
+
describe('capture CLI', () => {
|
|
145
|
+
test('displays help with --help flag', async () => {
|
|
146
|
+
const proc = Bun.spawn(['bun', './bin/cli.ts', 'capture', '--help'], {
|
|
147
|
+
stdout: 'pipe',
|
|
148
|
+
stderr: 'pipe',
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
const stdout = await new Response(proc.stdout).text()
|
|
152
|
+
await proc.exited
|
|
153
|
+
|
|
154
|
+
expect(stdout).toContain('Usage: acp-harness capture')
|
|
155
|
+
expect(stdout).toContain('prompts.jsonl')
|
|
156
|
+
expect(stdout).toContain('-o, --output')
|
|
157
|
+
expect(stdout).toContain('-c, --cwd')
|
|
158
|
+
expect(stdout).toContain('-t, --timeout')
|
|
159
|
+
expect(stdout).toContain('--progress')
|
|
160
|
+
expect(stdout).toContain('-g, --grader')
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
test('shows error for missing prompts file argument', async () => {
|
|
164
|
+
const proc = Bun.spawn(['bun', './bin/cli.ts', 'capture'], {
|
|
165
|
+
stdout: 'pipe',
|
|
166
|
+
stderr: 'pipe',
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
const stderr = await new Response(proc.stderr).text()
|
|
170
|
+
const exitCode = await proc.exited
|
|
171
|
+
|
|
172
|
+
expect(exitCode).not.toBe(0)
|
|
173
|
+
expect(stderr).toContain('prompts.jsonl path is required')
|
|
174
|
+
})
|
|
175
|
+
|
|
176
|
+
test('shows error for missing agent command', async () => {
|
|
177
|
+
const proc = Bun.spawn(['bun', './bin/cli.ts', 'capture', '/tmp/prompts.jsonl'], {
|
|
178
|
+
stdout: 'pipe',
|
|
179
|
+
stderr: 'pipe',
|
|
180
|
+
})
|
|
181
|
+
|
|
182
|
+
const stderr = await new Response(proc.stderr).text()
|
|
183
|
+
const exitCode = await proc.exited
|
|
184
|
+
|
|
185
|
+
expect(exitCode).not.toBe(0)
|
|
186
|
+
expect(stderr).toContain('ACP agent command is required')
|
|
187
|
+
})
|
|
188
|
+
})
|