@plaited/agent-eval-harness 0.12.2 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/commands/tests/capture-helpers.spec.ts +131 -21
- package/src/core/tests/core.spec.ts +3 -2
- package/src/core/trajectory.ts +8 -2
- package/src/headless/headless-output-parser.ts +22 -0
- package/src/headless/headless.schemas.ts +19 -8
- package/src/headless/tests/fixtures/claude-headless.json +40 -0
- package/src/headless/tests/fixtures/gemini-headless.json +37 -0
- package/src/headless/tests/headless.spec.ts +199 -4
- package/src/integration_tests/claude.spec.ts +2 -2
- package/src/integration_tests/gemini.spec.ts +2 -2
package/README.md
CHANGED
|
@@ -25,7 +25,7 @@ export ANTHROPIC_API_KEY=sk-... # For Claude
|
|
|
25
25
|
export GEMINI_API_KEY=... # For Gemini
|
|
26
26
|
```
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
Create adapter schemas for any CLI agent that outputs JSON — see the [Schema Creation Guide](.agents/skills/headless-adapters/references/schema-creation-guide.md).
|
|
29
29
|
|
|
30
30
|
### Core Commands
|
|
31
31
|
|
|
@@ -416,7 +416,7 @@ ANTHROPIC_API_KEY=sk-... GEMINI_API_KEY=... \
|
|
|
416
416
|
## Requirements
|
|
417
417
|
|
|
418
418
|
- **Runtime:** Bun >= 1.2.9
|
|
419
|
-
- **Schema:** JSON schema describing CLI agent interaction (see
|
|
419
|
+
- **Schema:** JSON schema describing CLI agent interaction (see [Schema Creation Guide](.agents/skills/headless-adapters/references/schema-creation-guide.md))
|
|
420
420
|
- **API Key:** `ANTHROPIC_API_KEY` for Claude, `GEMINI_API_KEY` for Gemini
|
|
421
421
|
|
|
422
422
|
## License
|
package/package.json
CHANGED
|
@@ -108,6 +108,7 @@ describe('extractTrajectory', () => {
|
|
|
108
108
|
{
|
|
109
109
|
type: 'thought',
|
|
110
110
|
content: 'Let me think about this...',
|
|
111
|
+
timestamp: 100,
|
|
111
112
|
raw: { type: 'thought', text: 'Let me think about this...' },
|
|
112
113
|
},
|
|
113
114
|
]
|
|
@@ -125,6 +126,7 @@ describe('extractTrajectory', () => {
|
|
|
125
126
|
{
|
|
126
127
|
type: 'message',
|
|
127
128
|
content: 'Here is my answer.',
|
|
129
|
+
timestamp: 200,
|
|
128
130
|
raw: { type: 'message', text: 'Here is my answer.' },
|
|
129
131
|
},
|
|
130
132
|
]
|
|
@@ -143,6 +145,7 @@ describe('extractTrajectory', () => {
|
|
|
143
145
|
type: 'tool_call',
|
|
144
146
|
title: 'Read',
|
|
145
147
|
status: 'pending',
|
|
148
|
+
timestamp: 300,
|
|
146
149
|
raw: { tool: 'Read', input: { file_path: '/test.ts' } },
|
|
147
150
|
},
|
|
148
151
|
]
|
|
@@ -160,6 +163,7 @@ describe('extractTrajectory', () => {
|
|
|
160
163
|
const updates: ParsedUpdate[] = [
|
|
161
164
|
{
|
|
162
165
|
type: 'plan',
|
|
166
|
+
timestamp: 400,
|
|
163
167
|
raw: {
|
|
164
168
|
entries: [
|
|
165
169
|
{ content: 'Step 1', status: 'completed' },
|
|
@@ -185,29 +189,26 @@ describe('extractTrajectory', () => {
|
|
|
185
189
|
})
|
|
186
190
|
|
|
187
191
|
test('assigns timestamps relative to start time', () => {
|
|
188
|
-
const
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
currentTime = 1500 // 500ms later
|
|
192
|
+
const startTime = 1000
|
|
193
|
+
const updates: ParsedUpdate[] = [
|
|
194
|
+
{
|
|
195
|
+
type: 'message',
|
|
196
|
+
content: 'First',
|
|
197
|
+
timestamp: 1500,
|
|
198
|
+
raw: { type: 'message', text: 'First' },
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
type: 'message',
|
|
202
|
+
content: 'Second',
|
|
203
|
+
timestamp: 2000,
|
|
204
|
+
raw: { type: 'message', text: 'Second' },
|
|
205
|
+
},
|
|
206
|
+
]
|
|
204
207
|
|
|
205
|
-
|
|
208
|
+
const trajectory = extractTrajectory(updates, startTime)
|
|
206
209
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
Date.now = originalNow
|
|
210
|
-
}
|
|
210
|
+
expect(trajectory[0]?.timestamp).toBe(500)
|
|
211
|
+
expect(trajectory[1]?.timestamp).toBe(1000)
|
|
211
212
|
})
|
|
212
213
|
|
|
213
214
|
test('handles updates without content for message/thought types', () => {
|
|
@@ -215,11 +216,13 @@ describe('extractTrajectory', () => {
|
|
|
215
216
|
{
|
|
216
217
|
type: 'message',
|
|
217
218
|
content: undefined, // No content - will have empty string
|
|
219
|
+
timestamp: 100,
|
|
218
220
|
raw: { type: 'message' },
|
|
219
221
|
},
|
|
220
222
|
{
|
|
221
223
|
type: 'message',
|
|
222
224
|
content: 'Has content',
|
|
225
|
+
timestamp: 200,
|
|
223
226
|
raw: { type: 'message', text: 'Has content' },
|
|
224
227
|
},
|
|
225
228
|
]
|
|
@@ -231,6 +234,113 @@ describe('extractTrajectory', () => {
|
|
|
231
234
|
expect(trajectory[0]?.type).toBe('message')
|
|
232
235
|
expect(trajectory[1]?.type).toBe('message')
|
|
233
236
|
})
|
|
237
|
+
|
|
238
|
+
test('attaches input to new tool call from update', () => {
|
|
239
|
+
const updates: ParsedUpdate[] = [
|
|
240
|
+
{
|
|
241
|
+
type: 'tool_call',
|
|
242
|
+
title: 'Read',
|
|
243
|
+
status: 'pending',
|
|
244
|
+
input: { file_path: '/src/main.ts' },
|
|
245
|
+
timestamp: 500,
|
|
246
|
+
raw: {},
|
|
247
|
+
},
|
|
248
|
+
]
|
|
249
|
+
|
|
250
|
+
const trajectory = extractTrajectory(updates, baseTime)
|
|
251
|
+
|
|
252
|
+
expect(trajectory).toHaveLength(1)
|
|
253
|
+
const step = trajectory[0]!
|
|
254
|
+
expect(step.type === 'tool_call' && step.input).toEqual({ file_path: '/src/main.ts' })
|
|
255
|
+
})
|
|
256
|
+
|
|
257
|
+
test('attaches output to tool call on completion', () => {
|
|
258
|
+
const updates: ParsedUpdate[] = [
|
|
259
|
+
{
|
|
260
|
+
type: 'tool_call',
|
|
261
|
+
title: 'Read',
|
|
262
|
+
status: 'pending',
|
|
263
|
+
input: { file_path: '/src/main.ts' },
|
|
264
|
+
timestamp: 500,
|
|
265
|
+
raw: {},
|
|
266
|
+
},
|
|
267
|
+
{
|
|
268
|
+
type: 'tool_call',
|
|
269
|
+
title: 'Read',
|
|
270
|
+
status: 'completed',
|
|
271
|
+
output: 'file contents here',
|
|
272
|
+
timestamp: 800,
|
|
273
|
+
raw: {},
|
|
274
|
+
},
|
|
275
|
+
]
|
|
276
|
+
|
|
277
|
+
const trajectory = extractTrajectory(updates, baseTime)
|
|
278
|
+
|
|
279
|
+
expect(trajectory).toHaveLength(1)
|
|
280
|
+
const step = trajectory[0]!
|
|
281
|
+
expect(step.type).toBe('tool_call')
|
|
282
|
+
if (step.type === 'tool_call') {
|
|
283
|
+
expect(step.input).toEqual({ file_path: '/src/main.ts' })
|
|
284
|
+
expect(step.output).toBe('file contents here')
|
|
285
|
+
expect(step.status).toBe('completed')
|
|
286
|
+
expect(step.duration).toBe(300)
|
|
287
|
+
}
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
test('handles sequential same-named tool calls independently', () => {
|
|
291
|
+
const updates: ParsedUpdate[] = [
|
|
292
|
+
// First Read: pending → completed
|
|
293
|
+
{
|
|
294
|
+
type: 'tool_call',
|
|
295
|
+
title: 'Read',
|
|
296
|
+
status: 'pending',
|
|
297
|
+
input: { file_path: '/src/a.ts' },
|
|
298
|
+
timestamp: 100,
|
|
299
|
+
raw: {},
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
type: 'tool_call',
|
|
303
|
+
title: 'Read',
|
|
304
|
+
status: 'completed',
|
|
305
|
+
output: 'contents of a.ts',
|
|
306
|
+
timestamp: 300,
|
|
307
|
+
raw: {},
|
|
308
|
+
},
|
|
309
|
+
// Second Read: pending → completed (same tool name, different args)
|
|
310
|
+
{
|
|
311
|
+
type: 'tool_call',
|
|
312
|
+
title: 'Read',
|
|
313
|
+
status: 'pending',
|
|
314
|
+
input: { file_path: '/src/b.ts' },
|
|
315
|
+
timestamp: 500,
|
|
316
|
+
raw: {},
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
type: 'tool_call',
|
|
320
|
+
title: 'Read',
|
|
321
|
+
status: 'completed',
|
|
322
|
+
output: 'contents of b.ts',
|
|
323
|
+
timestamp: 700,
|
|
324
|
+
raw: {},
|
|
325
|
+
},
|
|
326
|
+
]
|
|
327
|
+
|
|
328
|
+
const trajectory = extractTrajectory(updates, baseTime)
|
|
329
|
+
|
|
330
|
+
// Both calls should appear as separate trajectory steps
|
|
331
|
+
const toolCalls = trajectory.filter((s) => s.type === 'tool_call')
|
|
332
|
+
expect(toolCalls).toHaveLength(2)
|
|
333
|
+
|
|
334
|
+
const first = toolCalls[0]!
|
|
335
|
+
expect(first.type === 'tool_call' && first.input).toEqual({ file_path: '/src/a.ts' })
|
|
336
|
+
expect(first.type === 'tool_call' && first.output).toBe('contents of a.ts')
|
|
337
|
+
expect(first.type === 'tool_call' && first.status).toBe('completed')
|
|
338
|
+
|
|
339
|
+
const second = toolCalls[1]!
|
|
340
|
+
expect(second.type === 'tool_call' && second.input).toEqual({ file_path: '/src/b.ts' })
|
|
341
|
+
expect(second.type === 'tool_call' && second.output).toBe('contents of b.ts')
|
|
342
|
+
expect(second.type === 'tool_call' && second.status).toBe('completed')
|
|
343
|
+
})
|
|
234
344
|
})
|
|
235
345
|
|
|
236
346
|
// ============================================================================
|
|
@@ -123,7 +123,7 @@ describe('extractTrajectory', () => {
|
|
|
123
123
|
const startTime = 1000
|
|
124
124
|
|
|
125
125
|
test('extracts message updates', () => {
|
|
126
|
-
const updates: ParsedUpdate[] = [{ type: 'message', content: 'Hello', raw: {} }]
|
|
126
|
+
const updates: ParsedUpdate[] = [{ type: 'message', content: 'Hello', timestamp: 1100, raw: {} }]
|
|
127
127
|
const trajectory = extractTrajectory(updates, startTime)
|
|
128
128
|
expect(trajectory.length).toBe(1)
|
|
129
129
|
expect(trajectory[0]?.type).toBe('message')
|
|
@@ -131,7 +131,7 @@ describe('extractTrajectory', () => {
|
|
|
131
131
|
})
|
|
132
132
|
|
|
133
133
|
test('extracts thought updates', () => {
|
|
134
|
-
const updates: ParsedUpdate[] = [{ type: 'thought', content: 'Thinking...', raw: {} }]
|
|
134
|
+
const updates: ParsedUpdate[] = [{ type: 'thought', content: 'Thinking...', timestamp: 1200, raw: {} }]
|
|
135
135
|
const trajectory = extractTrajectory(updates, startTime)
|
|
136
136
|
expect(trajectory.length).toBe(1)
|
|
137
137
|
expect(trajectory[0]?.type).toBe('thought')
|
|
@@ -143,6 +143,7 @@ describe('extractTrajectory', () => {
|
|
|
143
143
|
type: 'tool_call',
|
|
144
144
|
title: 'Read',
|
|
145
145
|
status: 'completed',
|
|
146
|
+
timestamp: 1300,
|
|
146
147
|
raw: {},
|
|
147
148
|
},
|
|
148
149
|
]
|
package/src/core/trajectory.ts
CHANGED
|
@@ -30,7 +30,7 @@ export const extractTrajectory = (updates: ParsedUpdate[], startTime: number): T
|
|
|
30
30
|
const toolCallMap = new Map<string, { start: number; step: TrajectoryStep & { type: 'tool_call' } }>()
|
|
31
31
|
|
|
32
32
|
for (const update of updates) {
|
|
33
|
-
const timestamp =
|
|
33
|
+
const timestamp = update.timestamp - startTime
|
|
34
34
|
|
|
35
35
|
if (update.type === 'thought') {
|
|
36
36
|
trajectory.push({
|
|
@@ -45,19 +45,25 @@ export const extractTrajectory = (updates: ParsedUpdate[], startTime: number): T
|
|
|
45
45
|
timestamp,
|
|
46
46
|
})
|
|
47
47
|
} else if (update.type === 'tool_call') {
|
|
48
|
-
const toolCallId = update.title ?? `tool_${
|
|
48
|
+
const toolCallId = update.title ?? `tool_${timestamp}`
|
|
49
49
|
const existing = toolCallMap.get(toolCallId)
|
|
50
50
|
|
|
51
51
|
if (existing && update.status === 'completed') {
|
|
52
52
|
// Update existing tool call with completion info
|
|
53
53
|
existing.step.status = update.status
|
|
54
54
|
existing.step.duration = timestamp - existing.start
|
|
55
|
+
if (update.output !== undefined) {
|
|
56
|
+
existing.step.output = update.output
|
|
57
|
+
}
|
|
58
|
+
// Remove from map so a subsequent call with the same name starts fresh
|
|
59
|
+
toolCallMap.delete(toolCallId)
|
|
55
60
|
} else if (!existing) {
|
|
56
61
|
// New tool call
|
|
57
62
|
const step: TrajectoryStep & { type: 'tool_call' } = {
|
|
58
63
|
type: 'tool_call',
|
|
59
64
|
name: update.title ?? 'unknown',
|
|
60
65
|
status: update.status ?? 'pending',
|
|
66
|
+
...(update.input !== undefined && { input: update.input }),
|
|
61
67
|
timestamp,
|
|
62
68
|
}
|
|
63
69
|
toolCallMap.set(toolCallId, { start: timestamp, step })
|
|
@@ -23,6 +23,9 @@ export type ParsedUpdate = {
|
|
|
23
23
|
content?: string
|
|
24
24
|
title?: string
|
|
25
25
|
status?: string
|
|
26
|
+
input?: unknown
|
|
27
|
+
output?: unknown
|
|
28
|
+
timestamp: number
|
|
26
29
|
raw: unknown
|
|
27
30
|
}
|
|
28
31
|
|
|
@@ -200,6 +203,9 @@ const parsePassthrough = (line: string, typeMap: PassthroughTypeMap): ParsedUpda
|
|
|
200
203
|
content: typeof event.content === 'string' ? event.content : undefined,
|
|
201
204
|
title: typeof event.name === 'string' ? event.name : typeof event.title === 'string' ? event.title : undefined,
|
|
202
205
|
status: typeof event.status === 'string' ? event.status : undefined,
|
|
206
|
+
input: event.input,
|
|
207
|
+
output: event.output,
|
|
208
|
+
timestamp: Date.now(),
|
|
203
209
|
raw: event,
|
|
204
210
|
}
|
|
205
211
|
}
|
|
@@ -210,6 +216,9 @@ const parsePassthrough = (line: string, typeMap: PassthroughTypeMap): ParsedUpda
|
|
|
210
216
|
content: typeof event.content === 'string' ? event.content : undefined,
|
|
211
217
|
title: typeof event.name === 'string' ? event.name : typeof event.title === 'string' ? event.title : undefined,
|
|
212
218
|
status: typeof event.status === 'string' ? event.status : undefined,
|
|
219
|
+
input: event.input,
|
|
220
|
+
output: event.output,
|
|
221
|
+
timestamp: Date.now(),
|
|
213
222
|
raw: event,
|
|
214
223
|
}
|
|
215
224
|
}
|
|
@@ -307,6 +316,7 @@ export const createOutputParser = (config: HeadlessAdapterConfig) => {
|
|
|
307
316
|
const createUpdate = (event: unknown, mapping: OutputEventMapping): ParsedUpdate => {
|
|
308
317
|
const update: ParsedUpdate = {
|
|
309
318
|
type: mapping.emitAs,
|
|
319
|
+
timestamp: Date.now(),
|
|
310
320
|
raw: event,
|
|
311
321
|
}
|
|
312
322
|
|
|
@@ -320,6 +330,18 @@ export const createOutputParser = (config: HeadlessAdapterConfig) => {
|
|
|
320
330
|
if (mapping.extract.status) {
|
|
321
331
|
update.status = jsonPathString(event, mapping.extract.status)
|
|
322
332
|
}
|
|
333
|
+
if (mapping.extract.input) {
|
|
334
|
+
const value = jsonPath(event, mapping.extract.input)
|
|
335
|
+
if (value !== undefined) {
|
|
336
|
+
update.input = value
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
if (mapping.extract.output) {
|
|
340
|
+
const value = jsonPath(event, mapping.extract.output)
|
|
341
|
+
if (value !== undefined) {
|
|
342
|
+
update.output = value
|
|
343
|
+
}
|
|
344
|
+
}
|
|
323
345
|
}
|
|
324
346
|
|
|
325
347
|
return update
|
|
@@ -36,18 +36,29 @@ export type OutputEventMatch = z.infer<typeof OutputEventMatchSchema>
|
|
|
36
36
|
* Schema for extracting content from matched events.
|
|
37
37
|
*
|
|
38
38
|
* @remarks
|
|
39
|
+
* Known fields (`content`, `title`, `status`, `input`, `output`) are used by the
|
|
40
|
+
* output parser to populate `ParsedUpdate` properties. Additional string-valued
|
|
41
|
+
* fields are preserved during validation for forward compatibility but are not
|
|
42
|
+
* consumed by the parser.
|
|
43
|
+
*
|
|
39
44
|
* Paths can be:
|
|
40
45
|
* - JSONPath expressions (e.g., "$.message.text")
|
|
41
46
|
* - Literal strings in single quotes (e.g., "'pending'")
|
|
42
47
|
*/
|
|
43
|
-
export const OutputEventExtractSchema = z
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
48
|
+
export const OutputEventExtractSchema = z
|
|
49
|
+
.object({
|
|
50
|
+
/** JSONPath to extract main content */
|
|
51
|
+
content: z.string().optional(),
|
|
52
|
+
/** JSONPath to extract title (for tool calls) */
|
|
53
|
+
title: z.string().optional(),
|
|
54
|
+
/** JSONPath to extract status (or literal like "'pending'") */
|
|
55
|
+
status: z.string().optional(),
|
|
56
|
+
/** JSONPath to extract tool input arguments (e.g., "$.input") */
|
|
57
|
+
input: z.string().optional(),
|
|
58
|
+
/** JSONPath to extract tool output/result content (e.g., "$.content") */
|
|
59
|
+
output: z.string().optional(),
|
|
60
|
+
})
|
|
61
|
+
.catchall(z.string())
|
|
51
62
|
|
|
52
63
|
/** Output event extract type */
|
|
53
64
|
export type OutputEventExtract = z.infer<typeof OutputEventExtractSchema>
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 1,
|
|
3
|
+
"name": "claude-headless",
|
|
4
|
+
"command": ["claude"],
|
|
5
|
+
"sessionMode": "stream",
|
|
6
|
+
"prompt": {
|
|
7
|
+
"flag": "-p"
|
|
8
|
+
},
|
|
9
|
+
"output": {
|
|
10
|
+
"flag": "--output-format",
|
|
11
|
+
"value": "stream-json"
|
|
12
|
+
},
|
|
13
|
+
"autoApprove": ["--dangerously-skip-permissions", "--verbose"],
|
|
14
|
+
"resume": {
|
|
15
|
+
"flag": "--resume",
|
|
16
|
+
"sessionIdPath": "$.session_id"
|
|
17
|
+
},
|
|
18
|
+
"outputEvents": [
|
|
19
|
+
{
|
|
20
|
+
"match": { "path": "$.type", "value": "assistant" },
|
|
21
|
+
"emitAs": "message",
|
|
22
|
+
"extract": { "content": "$.message.content[0].text" }
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"match": { "path": "$.type", "value": "tool_use" },
|
|
26
|
+
"emitAs": "tool_call",
|
|
27
|
+
"extract": { "title": "$.name", "status": "'pending'", "input": "$.input" }
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"match": { "path": "$.type", "value": "tool_result" },
|
|
31
|
+
"emitAs": "tool_call",
|
|
32
|
+
"extract": { "title": "$.name", "status": "'completed'", "output": "$.content" }
|
|
33
|
+
}
|
|
34
|
+
],
|
|
35
|
+
"result": {
|
|
36
|
+
"matchPath": "$.type",
|
|
37
|
+
"matchValue": "result",
|
|
38
|
+
"contentPath": "$.result"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 1,
|
|
3
|
+
"name": "gemini-headless",
|
|
4
|
+
"command": ["gemini"],
|
|
5
|
+
"sessionMode": "iterative",
|
|
6
|
+
"prompt": {
|
|
7
|
+
"flag": ""
|
|
8
|
+
},
|
|
9
|
+
"output": {
|
|
10
|
+
"flag": "--output-format",
|
|
11
|
+
"value": "stream-json"
|
|
12
|
+
},
|
|
13
|
+
"autoApprove": ["--sandbox", "false"],
|
|
14
|
+
"outputEvents": [
|
|
15
|
+
{
|
|
16
|
+
"match": { "path": "$.type", "value": "message" },
|
|
17
|
+
"emitAs": "message",
|
|
18
|
+
"extract": { "content": "$.content" }
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"match": { "path": "$.type", "value": "tool_use" },
|
|
22
|
+
"emitAs": "tool_call",
|
|
23
|
+
"extract": { "title": "$.tool_name", "status": "'pending'", "input": "$.args" }
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"match": { "path": "$.type", "value": "tool_result" },
|
|
27
|
+
"emitAs": "tool_call",
|
|
28
|
+
"extract": { "title": "$.tool_name", "status": "'completed'", "output": "$.output" }
|
|
29
|
+
}
|
|
30
|
+
],
|
|
31
|
+
"result": {
|
|
32
|
+
"matchPath": "$.type",
|
|
33
|
+
"matchValue": "result",
|
|
34
|
+
"contentPath": "$.content"
|
|
35
|
+
},
|
|
36
|
+
"historyTemplate": "User: {{input}}\nAssistant: {{output}}"
|
|
37
|
+
}
|
|
@@ -36,7 +36,12 @@ const validClaudeSchema = {
|
|
|
36
36
|
{
|
|
37
37
|
match: { path: '$.type', value: 'tool_use' },
|
|
38
38
|
emitAs: 'tool_call',
|
|
39
|
-
extract: { title: '$.name', status: "'pending'" },
|
|
39
|
+
extract: { title: '$.name', status: "'pending'", input: '$.input' },
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
match: { path: '$.type', value: 'tool_result' },
|
|
43
|
+
emitAs: 'tool_call',
|
|
44
|
+
extract: { title: '$.name', status: "'completed'", output: '$.content' },
|
|
40
45
|
},
|
|
41
46
|
],
|
|
42
47
|
result: {
|
|
@@ -86,21 +91,83 @@ describe('HeadlessAdapterSchema', () => {
|
|
|
86
91
|
})
|
|
87
92
|
|
|
88
93
|
describe('validates schema files from disk', () => {
|
|
89
|
-
const
|
|
94
|
+
const fixturesDir = 'src/headless/tests/fixtures'
|
|
90
95
|
|
|
91
96
|
test('validates claude-headless.json from disk', async () => {
|
|
92
|
-
const content = await Bun.file(`${
|
|
97
|
+
const content = await Bun.file(`${fixturesDir}/claude-headless.json`).json()
|
|
93
98
|
const result = HeadlessAdapterSchema.safeParse(content)
|
|
94
99
|
expect(result.success).toBe(true)
|
|
95
100
|
})
|
|
96
101
|
|
|
97
102
|
test('validates gemini-headless.json from disk', async () => {
|
|
98
|
-
const content = await Bun.file(`${
|
|
103
|
+
const content = await Bun.file(`${fixturesDir}/gemini-headless.json`).json()
|
|
99
104
|
const result = HeadlessAdapterSchema.safeParse(content)
|
|
100
105
|
expect(result.success).toBe(true)
|
|
101
106
|
})
|
|
102
107
|
})
|
|
103
108
|
|
|
109
|
+
describe('extract input/output fields', () => {
|
|
110
|
+
test('validates schema with input and output in extract config', () => {
|
|
111
|
+
const schemaWithIO = {
|
|
112
|
+
...validClaudeSchema,
|
|
113
|
+
outputEvents: [
|
|
114
|
+
...validClaudeSchema.outputEvents,
|
|
115
|
+
{
|
|
116
|
+
match: { path: '$.type', value: 'custom' },
|
|
117
|
+
emitAs: 'tool_call',
|
|
118
|
+
extract: { title: '$.name', input: '$.args', output: '$.result' },
|
|
119
|
+
},
|
|
120
|
+
],
|
|
121
|
+
}
|
|
122
|
+
const result = HeadlessAdapterSchema.safeParse(schemaWithIO)
|
|
123
|
+
expect(result.success).toBe(true)
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
test('preserves extra extract fields via catchall', () => {
|
|
127
|
+
const schemaWithExtras = {
|
|
128
|
+
...validClaudeSchema,
|
|
129
|
+
outputEvents: [
|
|
130
|
+
{
|
|
131
|
+
match: { path: '$.type', value: 'tool_use' },
|
|
132
|
+
emitAs: 'tool_call',
|
|
133
|
+
extract: {
|
|
134
|
+
title: '$.name',
|
|
135
|
+
status: "'pending'",
|
|
136
|
+
input: '$.input',
|
|
137
|
+
toolName: '$.name',
|
|
138
|
+
mcpServer: '$.server',
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
],
|
|
142
|
+
}
|
|
143
|
+
const result = HeadlessAdapterSchema.safeParse(schemaWithExtras)
|
|
144
|
+
expect(result.success).toBe(true)
|
|
145
|
+
if (result.success) {
|
|
146
|
+
const extract = result.data.outputEvents![0]!.extract!
|
|
147
|
+
expect(extract.title).toBe('$.name')
|
|
148
|
+
expect(extract.input).toBe('$.input')
|
|
149
|
+
// Catchall fields aren't in the inferred type — cast needed to access them
|
|
150
|
+
expect((extract as Record<string, string>).toolName).toBe('$.name')
|
|
151
|
+
expect((extract as Record<string, string>).mcpServer).toBe('$.server')
|
|
152
|
+
}
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
test('rejects non-string extra extract fields', () => {
|
|
156
|
+
const schemaWithBadExtras = {
|
|
157
|
+
...validClaudeSchema,
|
|
158
|
+
outputEvents: [
|
|
159
|
+
{
|
|
160
|
+
match: { path: '$.type', value: 'tool_use' },
|
|
161
|
+
emitAs: 'tool_call',
|
|
162
|
+
extract: { title: '$.name', badField: 123 },
|
|
163
|
+
},
|
|
164
|
+
],
|
|
165
|
+
}
|
|
166
|
+
const result = HeadlessAdapterSchema.safeParse(schemaWithBadExtras)
|
|
167
|
+
expect(result.success).toBe(false)
|
|
168
|
+
})
|
|
169
|
+
})
|
|
170
|
+
|
|
104
171
|
describe('minimal valid schema', () => {
|
|
105
172
|
test('validates minimal required fields', () => {
|
|
106
173
|
const minimal = {
|
|
@@ -397,6 +464,70 @@ describe('createOutputParser', () => {
|
|
|
397
464
|
const singleResult = Array.isArray(result) ? result[0] : result
|
|
398
465
|
expect(singleResult?.raw).toEqual(event)
|
|
399
466
|
})
|
|
467
|
+
|
|
468
|
+
test('extracts input from tool_use event', () => {
|
|
469
|
+
const line = JSON.stringify({ type: 'tool_use', name: 'Read', input: { file_path: '/test.ts' } })
|
|
470
|
+
const result = parser.parseLine(line)
|
|
471
|
+
const singleResult = Array.isArray(result) ? result[0] : result
|
|
472
|
+
expect(singleResult?.input).toEqual({ file_path: '/test.ts' })
|
|
473
|
+
})
|
|
474
|
+
|
|
475
|
+
test('extracts output from tool_result event', () => {
|
|
476
|
+
const line = JSON.stringify({ type: 'tool_result', name: 'Read', content: 'file contents' })
|
|
477
|
+
const result = parser.parseLine(line)
|
|
478
|
+
const singleResult = Array.isArray(result) ? result[0] : result
|
|
479
|
+
expect(singleResult?.output).toBe('file contents')
|
|
480
|
+
})
|
|
481
|
+
|
|
482
|
+
test('sets timestamp on parsed updates', () => {
|
|
483
|
+
const before = Date.now()
|
|
484
|
+
const line = JSON.stringify({ type: 'assistant', message: { text: 'Hello' } })
|
|
485
|
+
const result = parser.parseLine(line)
|
|
486
|
+
const after = Date.now()
|
|
487
|
+
const singleResult = Array.isArray(result) ? result[0] : result
|
|
488
|
+
expect(singleResult?.timestamp).toBeGreaterThanOrEqual(before)
|
|
489
|
+
expect(singleResult?.timestamp).toBeLessThanOrEqual(after)
|
|
490
|
+
})
|
|
491
|
+
})
|
|
492
|
+
|
|
493
|
+
describe('parseLine with extra extract fields', () => {
|
|
494
|
+
test('extra extract fields do not break parser', () => {
|
|
495
|
+
const configWithExtras = parseHeadlessConfig({
|
|
496
|
+
version: 1,
|
|
497
|
+
name: 'extras-test',
|
|
498
|
+
command: ['test'],
|
|
499
|
+
sessionMode: 'stream',
|
|
500
|
+
prompt: { flag: '-p' },
|
|
501
|
+
output: { flag: '--output', value: 'json' },
|
|
502
|
+
outputEvents: [
|
|
503
|
+
{
|
|
504
|
+
match: { path: '$.type', value: 'tool_use' },
|
|
505
|
+
emitAs: 'tool_call',
|
|
506
|
+
extract: {
|
|
507
|
+
title: '$.name',
|
|
508
|
+
status: "'pending'",
|
|
509
|
+
input: '$.input',
|
|
510
|
+
toolName: '$.name',
|
|
511
|
+
mcpServer: '$.server',
|
|
512
|
+
},
|
|
513
|
+
},
|
|
514
|
+
],
|
|
515
|
+
result: { matchPath: '$.type', matchValue: 'done', contentPath: '$.text' },
|
|
516
|
+
})
|
|
517
|
+
const extrasParser = createOutputParser(configWithExtras)
|
|
518
|
+
const line = JSON.stringify({
|
|
519
|
+
type: 'tool_use',
|
|
520
|
+
name: 'WebSearch',
|
|
521
|
+
input: { query: 'test' },
|
|
522
|
+
server: 'mcp-search',
|
|
523
|
+
})
|
|
524
|
+
const result = extrasParser.parseLine(line)
|
|
525
|
+
const singleResult = Array.isArray(result) ? result[0] : result
|
|
526
|
+
expect(singleResult).not.toBeNull()
|
|
527
|
+
expect(singleResult?.type).toBe('tool_call')
|
|
528
|
+
expect(singleResult?.title).toBe('WebSearch')
|
|
529
|
+
expect(singleResult?.input).toEqual({ query: 'test' })
|
|
530
|
+
})
|
|
400
531
|
})
|
|
401
532
|
|
|
402
533
|
describe('parseLine with array wildcards', () => {
|
|
@@ -574,6 +705,70 @@ describe('createOutputParser', () => {
|
|
|
574
705
|
})
|
|
575
706
|
})
|
|
576
707
|
|
|
708
|
+
// ============================================================================
|
|
709
|
+
// Passthrough Mode Tests
|
|
710
|
+
// ============================================================================
|
|
711
|
+
|
|
712
|
+
describe('passthrough mode', () => {
|
|
713
|
+
const passthroughConfig = parseHeadlessConfig({
|
|
714
|
+
version: 1,
|
|
715
|
+
name: 'passthrough-test',
|
|
716
|
+
command: ['test-agent'],
|
|
717
|
+
sessionMode: 'stream',
|
|
718
|
+
prompt: { flag: '-p' },
|
|
719
|
+
output: { flag: '--output', value: 'json' },
|
|
720
|
+
outputMode: 'passthrough',
|
|
721
|
+
passthroughTypeMap: {
|
|
722
|
+
typeField: 'type',
|
|
723
|
+
typeValues: { tool_use: 'tool_call', tool_result: 'tool_call' },
|
|
724
|
+
},
|
|
725
|
+
result: { matchPath: '$.type', matchValue: 'result', contentPath: '$.content' },
|
|
726
|
+
})
|
|
727
|
+
const passthroughParser = createOutputParser(passthroughConfig)
|
|
728
|
+
|
|
729
|
+
test('extracts input from tool_call event', () => {
|
|
730
|
+
const line = JSON.stringify({ type: 'tool_use', name: 'Read', input: { file_path: '/test.ts' }, status: 'pending' })
|
|
731
|
+
const result = passthroughParser.parseLine(line)
|
|
732
|
+
const singleResult = Array.isArray(result) ? result[0] : result
|
|
733
|
+
expect(singleResult?.type).toBe('tool_call')
|
|
734
|
+
expect(singleResult?.input).toEqual({ file_path: '/test.ts' })
|
|
735
|
+
})
|
|
736
|
+
|
|
737
|
+
test('extracts output from tool_result event', () => {
|
|
738
|
+
const line = JSON.stringify({ type: 'tool_result', name: 'Read', output: 'file contents', status: 'completed' })
|
|
739
|
+
const result = passthroughParser.parseLine(line)
|
|
740
|
+
const singleResult = Array.isArray(result) ? result[0] : result
|
|
741
|
+
expect(singleResult?.type).toBe('tool_call')
|
|
742
|
+
expect(singleResult?.output).toBe('file contents')
|
|
743
|
+
})
|
|
744
|
+
|
|
745
|
+
test('preserves object input type', () => {
|
|
746
|
+
const line = JSON.stringify({ type: 'tool_use', name: 'Write', input: { path: '/a.ts', content: 'code' } })
|
|
747
|
+
const result = passthroughParser.parseLine(line)
|
|
748
|
+
const singleResult = Array.isArray(result) ? result[0] : result
|
|
749
|
+
expect(singleResult?.input).toEqual({ path: '/a.ts', content: 'code' })
|
|
750
|
+
})
|
|
751
|
+
|
|
752
|
+
test('sets timestamp on passthrough updates', () => {
|
|
753
|
+
const before = Date.now()
|
|
754
|
+
const line = JSON.stringify({ type: 'message', content: 'Hello' })
|
|
755
|
+
const result = passthroughParser.parseLine(line)
|
|
756
|
+
const after = Date.now()
|
|
757
|
+
const singleResult = Array.isArray(result) ? result[0] : result
|
|
758
|
+
expect(singleResult?.timestamp).toBeGreaterThanOrEqual(before)
|
|
759
|
+
expect(singleResult?.timestamp).toBeLessThanOrEqual(after)
|
|
760
|
+
})
|
|
761
|
+
|
|
762
|
+
test('handles absent input/output fields gracefully', () => {
|
|
763
|
+
const line = JSON.stringify({ type: 'tool_use', name: 'Bash', status: 'pending' })
|
|
764
|
+
const result = passthroughParser.parseLine(line)
|
|
765
|
+
const singleResult = Array.isArray(result) ? result[0] : result
|
|
766
|
+
expect(singleResult?.type).toBe('tool_call')
|
|
767
|
+
expect(singleResult?.input).toBeUndefined()
|
|
768
|
+
expect(singleResult?.output).toBeUndefined()
|
|
769
|
+
})
|
|
770
|
+
})
|
|
771
|
+
|
|
577
772
|
// ============================================================================
|
|
578
773
|
// History Builder Tests
|
|
579
774
|
// ============================================================================
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* @remarks
|
|
5
5
|
* Tests verify the headless session manager works correctly with Claude Code CLI
|
|
6
|
-
* using the schema-driven
|
|
6
|
+
* using the schema-driven headless adapter approach.
|
|
7
7
|
*
|
|
8
8
|
* Run locally with API key:
|
|
9
9
|
* ```bash
|
|
@@ -29,7 +29,7 @@ setDefaultTimeout(120000)
|
|
|
29
29
|
const PROJECT_ROOT = process.cwd()
|
|
30
30
|
|
|
31
31
|
// Schema path for Claude headless adapter
|
|
32
|
-
const SCHEMA_PATH = join(PROJECT_ROOT, '
|
|
32
|
+
const SCHEMA_PATH = join(PROJECT_ROOT, 'src/headless/tests/fixtures/claude-headless.json')
|
|
33
33
|
|
|
34
34
|
// Get API key from environment
|
|
35
35
|
const API_KEY = process.env.ANTHROPIC_API_KEY ?? ''
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* @remarks
|
|
5
5
|
* Tests verify the headless session manager works correctly with Gemini CLI
|
|
6
|
-
* using the schema-driven
|
|
6
|
+
* using the schema-driven headless adapter approach.
|
|
7
7
|
*
|
|
8
8
|
* Run locally with API key:
|
|
9
9
|
* ```bash
|
|
@@ -29,7 +29,7 @@ setDefaultTimeout(120000)
|
|
|
29
29
|
const PROJECT_ROOT = process.cwd()
|
|
30
30
|
|
|
31
31
|
// Schema path for Gemini headless adapter
|
|
32
|
-
const SCHEMA_PATH = join(PROJECT_ROOT, '
|
|
32
|
+
const SCHEMA_PATH = join(PROJECT_ROOT, 'src/headless/tests/fixtures/gemini-headless.json')
|
|
33
33
|
|
|
34
34
|
// Get API key from environment
|
|
35
35
|
const GEMINI_API_KEY = process.env.GEMINI_API_KEY ?? ''
|