@plaited/acp-harness 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -31
- package/bin/cli.ts +15 -0
- package/package.json +5 -7
- package/src/acp-client.ts +7 -4
- package/src/adapter-check.ts +0 -1
- package/src/adapter-scaffold.ts +16 -15
- package/src/calibrate.ts +28 -8
- package/src/capture.ts +114 -33
- package/src/grader-loader.ts +3 -3
- package/src/harness.ts +4 -0
- package/src/headless-cli.ts +433 -0
- package/src/headless-history-builder.ts +141 -0
- package/src/headless-output-parser.ts +251 -0
- package/src/headless-session-manager.ts +389 -0
- package/src/headless.schemas.ts +241 -0
- package/src/headless.ts +71 -0
- package/src/headless.types.ts +19 -0
- package/src/integration_tests/acp-claude.spec.ts +170 -0
- package/src/integration_tests/acp-gemini.spec.ts +174 -0
- package/src/schemas.ts +88 -36
- package/src/summarize.ts +4 -8
- package/src/tests/acp-client.spec.ts +1 -1
- package/src/tests/capture-cli.spec.ts +188 -0
- package/src/tests/capture-helpers.spec.ts +229 -67
- package/src/tests/constants.spec.ts +121 -0
- package/src/tests/fixtures/grader-exec.py +3 -3
- package/src/tests/fixtures/grader-module.ts +2 -2
- package/src/tests/grader-loader.spec.ts +5 -5
- package/src/tests/headless.spec.ts +460 -0
- package/src/tests/schemas-cli.spec.ts +142 -0
- package/src/tests/schemas.spec.ts +657 -0
- package/src/tests/summarize-helpers.spec.ts +3 -3
- package/src/tests/trials-cli.spec.ts +145 -0
- package/src/trials.ts +6 -19
- package/src/validate-refs.ts +1 -1
- package/src/tests/acp-integration.docker.ts +0 -214
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for headless ACP adapter factory.
|
|
3
|
+
*
|
|
4
|
+
* @remarks
|
|
5
|
+
* Tests cover:
|
|
6
|
+
* - Schema validation with Zod
|
|
7
|
+
* - JSONPath extraction
|
|
8
|
+
* - Output parsing with event mappings
|
|
9
|
+
* - History building for iterative mode
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { describe, expect, test } from 'bun:test'
|
|
13
|
+
import { HeadlessAdapterSchema, parseHeadlessConfig, safeParseHeadlessConfig } from '../headless.schemas.ts'
|
|
14
|
+
import { createHistoryBuilder } from '../headless-history-builder.ts'
|
|
15
|
+
import { createOutputParser, jsonPath, jsonPathString } from '../headless-output-parser.ts'
|
|
16
|
+
|
|
17
|
+
// ============================================================================
|
|
18
|
+
// Test Fixtures
|
|
19
|
+
// ============================================================================
|
|
20
|
+
|
|
21
|
+
const validClaudeSchema = {
|
|
22
|
+
version: 1,
|
|
23
|
+
name: 'claude-headless',
|
|
24
|
+
command: ['claude'],
|
|
25
|
+
sessionMode: 'stream',
|
|
26
|
+
prompt: { flag: '-p' },
|
|
27
|
+
output: { flag: '--output-format', value: 'stream-json' },
|
|
28
|
+
autoApprove: ['--dangerously-skip-permissions'],
|
|
29
|
+
resume: { flag: '--resume', sessionIdPath: '$.session_id' },
|
|
30
|
+
outputEvents: [
|
|
31
|
+
{
|
|
32
|
+
match: { path: '$.type', value: 'assistant' },
|
|
33
|
+
emitAs: 'message',
|
|
34
|
+
extract: { content: '$.message.text' },
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
match: { path: '$.type', value: 'tool_use' },
|
|
38
|
+
emitAs: 'tool_call',
|
|
39
|
+
extract: { title: '$.name', status: "'pending'" },
|
|
40
|
+
},
|
|
41
|
+
],
|
|
42
|
+
result: {
|
|
43
|
+
matchPath: '$.type',
|
|
44
|
+
matchValue: 'result',
|
|
45
|
+
contentPath: '$.result',
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const validGeminiSchema = {
|
|
50
|
+
version: 1,
|
|
51
|
+
name: 'gemini-headless',
|
|
52
|
+
command: ['gemini'],
|
|
53
|
+
sessionMode: 'iterative',
|
|
54
|
+
prompt: { flag: '--prompt' },
|
|
55
|
+
output: { flag: '--output-format', value: 'json' },
|
|
56
|
+
outputEvents: [
|
|
57
|
+
{
|
|
58
|
+
match: { path: '$.type', value: 'message' },
|
|
59
|
+
emitAs: 'message',
|
|
60
|
+
extract: { content: '$.content' },
|
|
61
|
+
},
|
|
62
|
+
],
|
|
63
|
+
result: {
|
|
64
|
+
matchPath: '$.type',
|
|
65
|
+
matchValue: 'result',
|
|
66
|
+
contentPath: '$.response',
|
|
67
|
+
},
|
|
68
|
+
historyTemplate: 'User: {{input}}\nAssistant: {{output}}',
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// ============================================================================
|
|
72
|
+
// Schema Validation Tests
|
|
73
|
+
// ============================================================================
|
|
74
|
+
|
|
75
|
+
describe('HeadlessAdapterSchema', () => {
|
|
76
|
+
describe('valid schemas', () => {
|
|
77
|
+
test('validates Claude headless schema', () => {
|
|
78
|
+
const result = HeadlessAdapterSchema.safeParse(validClaudeSchema)
|
|
79
|
+
expect(result.success).toBe(true)
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
test('validates Gemini headless schema', () => {
|
|
83
|
+
const result = HeadlessAdapterSchema.safeParse(validGeminiSchema)
|
|
84
|
+
expect(result.success).toBe(true)
|
|
85
|
+
})
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
describe('validates schema files from disk', () => {
|
|
89
|
+
const schemasDir = '.claude/skills/acp-adapters/schemas'
|
|
90
|
+
|
|
91
|
+
test('validates claude-headless.json from disk', async () => {
|
|
92
|
+
const content = await Bun.file(`${schemasDir}/claude-headless.json`).json()
|
|
93
|
+
const result = HeadlessAdapterSchema.safeParse(content)
|
|
94
|
+
expect(result.success).toBe(true)
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
test('validates gemini-headless.json from disk', async () => {
|
|
98
|
+
const content = await Bun.file(`${schemasDir}/gemini-headless.json`).json()
|
|
99
|
+
const result = HeadlessAdapterSchema.safeParse(content)
|
|
100
|
+
expect(result.success).toBe(true)
|
|
101
|
+
})
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
describe('minimal valid schema', () => {
|
|
105
|
+
test('validates minimal required fields', () => {
|
|
106
|
+
const minimal = {
|
|
107
|
+
version: 1,
|
|
108
|
+
name: 'minimal',
|
|
109
|
+
command: ['agent'],
|
|
110
|
+
sessionMode: 'iterative',
|
|
111
|
+
prompt: {},
|
|
112
|
+
output: { flag: '--format', value: 'json' },
|
|
113
|
+
outputEvents: [],
|
|
114
|
+
result: { matchPath: '$.type', matchValue: 'done', contentPath: '$.text' },
|
|
115
|
+
}
|
|
116
|
+
const result = HeadlessAdapterSchema.safeParse(minimal)
|
|
117
|
+
expect(result.success).toBe(true)
|
|
118
|
+
})
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
describe('invalid schemas', () => {
|
|
122
|
+
test('rejects missing version', () => {
|
|
123
|
+
const invalid = { ...validClaudeSchema, version: undefined }
|
|
124
|
+
const result = HeadlessAdapterSchema.safeParse(invalid)
|
|
125
|
+
expect(result.success).toBe(false)
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
test('rejects wrong version', () => {
|
|
129
|
+
const invalid = { ...validClaudeSchema, version: 2 }
|
|
130
|
+
const result = HeadlessAdapterSchema.safeParse(invalid)
|
|
131
|
+
expect(result.success).toBe(false)
|
|
132
|
+
})
|
|
133
|
+
|
|
134
|
+
test('rejects invalid sessionMode', () => {
|
|
135
|
+
const invalid = { ...validClaudeSchema, sessionMode: 'batch' }
|
|
136
|
+
const result = HeadlessAdapterSchema.safeParse(invalid)
|
|
137
|
+
expect(result.success).toBe(false)
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
test('rejects missing command', () => {
|
|
141
|
+
const invalid = { ...validClaudeSchema, command: undefined }
|
|
142
|
+
const result = HeadlessAdapterSchema.safeParse(invalid)
|
|
143
|
+
expect(result.success).toBe(false)
|
|
144
|
+
})
|
|
145
|
+
|
|
146
|
+
test('rejects invalid emitAs type', () => {
|
|
147
|
+
const invalid = {
|
|
148
|
+
...validClaudeSchema,
|
|
149
|
+
outputEvents: [
|
|
150
|
+
{
|
|
151
|
+
match: { path: '$.type', value: 'x' },
|
|
152
|
+
emitAs: 'invalid_type',
|
|
153
|
+
},
|
|
154
|
+
],
|
|
155
|
+
}
|
|
156
|
+
const result = HeadlessAdapterSchema.safeParse(invalid)
|
|
157
|
+
expect(result.success).toBe(false)
|
|
158
|
+
})
|
|
159
|
+
})
|
|
160
|
+
|
|
161
|
+
describe('parseHeadlessConfig', () => {
|
|
162
|
+
test('returns parsed config for valid input', () => {
|
|
163
|
+
const config = parseHeadlessConfig(validClaudeSchema)
|
|
164
|
+
expect(config.name).toBe('claude-headless')
|
|
165
|
+
expect(config.command).toEqual(['claude'])
|
|
166
|
+
expect(config.sessionMode).toBe('stream')
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
test('throws for invalid input', () => {
|
|
170
|
+
expect(() => parseHeadlessConfig({ version: 2 })).toThrow()
|
|
171
|
+
})
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
describe('safeParseHeadlessConfig', () => {
|
|
175
|
+
test('returns success for valid input', () => {
|
|
176
|
+
const result = safeParseHeadlessConfig(validClaudeSchema)
|
|
177
|
+
expect(result.success).toBe(true)
|
|
178
|
+
if (result.success) {
|
|
179
|
+
expect(result.data.name).toBe('claude-headless')
|
|
180
|
+
}
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
test('returns failure for invalid input', () => {
|
|
184
|
+
const result = safeParseHeadlessConfig({ version: 2 })
|
|
185
|
+
expect(result.success).toBe(false)
|
|
186
|
+
})
|
|
187
|
+
})
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
// ============================================================================
|
|
191
|
+
// JSONPath Tests
|
|
192
|
+
// ============================================================================
|
|
193
|
+
|
|
194
|
+
describe('jsonPath', () => {
|
|
195
|
+
const testObj = {
|
|
196
|
+
type: 'message',
|
|
197
|
+
message: {
|
|
198
|
+
text: 'Hello world',
|
|
199
|
+
nested: { value: 42 },
|
|
200
|
+
},
|
|
201
|
+
array: [1, 2, 3],
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
describe('basic extraction', () => {
|
|
205
|
+
test('extracts root field', () => {
|
|
206
|
+
expect(jsonPath(testObj, '$.type')).toBe('message')
|
|
207
|
+
})
|
|
208
|
+
|
|
209
|
+
test('extracts nested field', () => {
|
|
210
|
+
expect(jsonPath(testObj, '$.message.text')).toBe('Hello world')
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
test('extracts deeply nested field', () => {
|
|
214
|
+
expect(jsonPath(testObj, '$.message.nested.value')).toBe(42)
|
|
215
|
+
})
|
|
216
|
+
|
|
217
|
+
test('returns undefined for non-existent path', () => {
|
|
218
|
+
expect(jsonPath(testObj, '$.missing')).toBeUndefined()
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
test('returns undefined for non-existent nested path', () => {
|
|
222
|
+
expect(jsonPath(testObj, '$.message.missing.deep')).toBeUndefined()
|
|
223
|
+
})
|
|
224
|
+
})
|
|
225
|
+
|
|
226
|
+
describe('literal strings', () => {
|
|
227
|
+
test('returns literal string value', () => {
|
|
228
|
+
expect(jsonPath(testObj, "'pending'")).toBe('pending')
|
|
229
|
+
})
|
|
230
|
+
|
|
231
|
+
test('returns empty literal string', () => {
|
|
232
|
+
expect(jsonPath(testObj, "''")).toBe('')
|
|
233
|
+
})
|
|
234
|
+
|
|
235
|
+
test('returns literal with spaces', () => {
|
|
236
|
+
expect(jsonPath(testObj, "'hello world'")).toBe('hello world')
|
|
237
|
+
})
|
|
238
|
+
})
|
|
239
|
+
|
|
240
|
+
describe('edge cases', () => {
|
|
241
|
+
test('handles null input', () => {
|
|
242
|
+
expect(jsonPath(null, '$.type')).toBeUndefined()
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
test('handles undefined input', () => {
|
|
246
|
+
expect(jsonPath(undefined, '$.type')).toBeUndefined()
|
|
247
|
+
})
|
|
248
|
+
|
|
249
|
+
test('handles non-object input', () => {
|
|
250
|
+
expect(jsonPath('string', '$.type')).toBeUndefined()
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
test('handles invalid path format', () => {
|
|
254
|
+
expect(jsonPath(testObj, 'type')).toBeUndefined()
|
|
255
|
+
})
|
|
256
|
+
})
|
|
257
|
+
})
|
|
258
|
+
|
|
259
|
+
describe('jsonPathString', () => {
|
|
260
|
+
test('extracts string value', () => {
|
|
261
|
+
expect(jsonPathString({ text: 'hello' }, '$.text')).toBe('hello')
|
|
262
|
+
})
|
|
263
|
+
|
|
264
|
+
test('converts number to string', () => {
|
|
265
|
+
expect(jsonPathString({ num: 42 }, '$.num')).toBe('42')
|
|
266
|
+
})
|
|
267
|
+
|
|
268
|
+
test('returns undefined for missing path', () => {
|
|
269
|
+
expect(jsonPathString({ x: 1 }, '$.y')).toBeUndefined()
|
|
270
|
+
})
|
|
271
|
+
|
|
272
|
+
test('returns undefined for null value', () => {
|
|
273
|
+
expect(jsonPathString({ x: null }, '$.x')).toBeUndefined()
|
|
274
|
+
})
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
// ============================================================================
|
|
278
|
+
// Output Parser Tests
|
|
279
|
+
// ============================================================================
|
|
280
|
+
|
|
281
|
+
describe('createOutputParser', () => {
|
|
282
|
+
const config = parseHeadlessConfig(validClaudeSchema)
|
|
283
|
+
const parser = createOutputParser(config)
|
|
284
|
+
|
|
285
|
+
describe('parseLine', () => {
|
|
286
|
+
test('maps assistant type to message', () => {
|
|
287
|
+
const line = JSON.stringify({ type: 'assistant', message: { text: 'Hello' } })
|
|
288
|
+
const result = parser.parseLine(line)
|
|
289
|
+
expect(result).not.toBeNull()
|
|
290
|
+
expect(result?.type).toBe('message')
|
|
291
|
+
expect(result?.content).toBe('Hello')
|
|
292
|
+
})
|
|
293
|
+
|
|
294
|
+
test('maps tool_use type to tool_call', () => {
|
|
295
|
+
const line = JSON.stringify({ type: 'tool_use', name: 'Read' })
|
|
296
|
+
const result = parser.parseLine(line)
|
|
297
|
+
expect(result).not.toBeNull()
|
|
298
|
+
expect(result?.type).toBe('tool_call')
|
|
299
|
+
expect(result?.title).toBe('Read')
|
|
300
|
+
expect(result?.status).toBe('pending')
|
|
301
|
+
})
|
|
302
|
+
|
|
303
|
+
test('returns null for unmapped event types', () => {
|
|
304
|
+
const line = JSON.stringify({ type: 'unknown', data: 'test' })
|
|
305
|
+
const result = parser.parseLine(line)
|
|
306
|
+
expect(result).toBeNull()
|
|
307
|
+
})
|
|
308
|
+
|
|
309
|
+
test('returns null for invalid JSON', () => {
|
|
310
|
+
const result = parser.parseLine('not valid json')
|
|
311
|
+
expect(result).toBeNull()
|
|
312
|
+
})
|
|
313
|
+
|
|
314
|
+
test('returns null for empty line', () => {
|
|
315
|
+
const result = parser.parseLine('')
|
|
316
|
+
expect(result).toBeNull()
|
|
317
|
+
})
|
|
318
|
+
|
|
319
|
+
test('preserves raw event in result', () => {
|
|
320
|
+
const event = { type: 'assistant', message: { text: 'Hi' } }
|
|
321
|
+
const line = JSON.stringify(event)
|
|
322
|
+
const result = parser.parseLine(line)
|
|
323
|
+
expect(result?.raw).toEqual(event)
|
|
324
|
+
})
|
|
325
|
+
})
|
|
326
|
+
|
|
327
|
+
describe('parseResult', () => {
|
|
328
|
+
test('detects result event', () => {
|
|
329
|
+
const line = JSON.stringify({ type: 'result', result: 'Final answer' })
|
|
330
|
+
const result = parser.parseResult(line)
|
|
331
|
+
expect(result.isResult).toBe(true)
|
|
332
|
+
if (result.isResult) {
|
|
333
|
+
expect(result.content).toBe('Final answer')
|
|
334
|
+
}
|
|
335
|
+
})
|
|
336
|
+
|
|
337
|
+
test('returns not-result for non-result events', () => {
|
|
338
|
+
const line = JSON.stringify({ type: 'assistant', message: { text: 'Hi' } })
|
|
339
|
+
const result = parser.parseResult(line)
|
|
340
|
+
expect(result.isResult).toBe(false)
|
|
341
|
+
})
|
|
342
|
+
|
|
343
|
+
test('returns not-result for invalid JSON', () => {
|
|
344
|
+
const result = parser.parseResult('invalid')
|
|
345
|
+
expect(result.isResult).toBe(false)
|
|
346
|
+
})
|
|
347
|
+
|
|
348
|
+
test('handles missing content path', () => {
|
|
349
|
+
const line = JSON.stringify({ type: 'result' })
|
|
350
|
+
const result = parser.parseResult(line)
|
|
351
|
+
expect(result.isResult).toBe(true)
|
|
352
|
+
if (result.isResult) {
|
|
353
|
+
expect(result.content).toBe('')
|
|
354
|
+
}
|
|
355
|
+
})
|
|
356
|
+
})
|
|
357
|
+
})
|
|
358
|
+
|
|
359
|
+
// ============================================================================
|
|
360
|
+
// History Builder Tests
|
|
361
|
+
// ============================================================================
|
|
362
|
+
|
|
363
|
+
describe('createHistoryBuilder', () => {
|
|
364
|
+
describe('basic operations', () => {
|
|
365
|
+
test('starts with empty history', () => {
|
|
366
|
+
const builder = createHistoryBuilder()
|
|
367
|
+
expect(builder.getLength()).toBe(0)
|
|
368
|
+
expect(builder.getHistory()).toEqual([])
|
|
369
|
+
})
|
|
370
|
+
|
|
371
|
+
test('adds turns to history', () => {
|
|
372
|
+
const builder = createHistoryBuilder()
|
|
373
|
+
builder.addTurn('Hello', 'Hi there')
|
|
374
|
+
expect(builder.getLength()).toBe(1)
|
|
375
|
+
expect(builder.getHistory()).toEqual([{ input: 'Hello', output: 'Hi there' }])
|
|
376
|
+
})
|
|
377
|
+
|
|
378
|
+
test('accumulates multiple turns', () => {
|
|
379
|
+
const builder = createHistoryBuilder()
|
|
380
|
+
builder.addTurn('Hello', 'Hi')
|
|
381
|
+
builder.addTurn('How are you?', 'Fine')
|
|
382
|
+
expect(builder.getLength()).toBe(2)
|
|
383
|
+
})
|
|
384
|
+
|
|
385
|
+
test('clears history', () => {
|
|
386
|
+
const builder = createHistoryBuilder()
|
|
387
|
+
builder.addTurn('Hello', 'Hi')
|
|
388
|
+
builder.clear()
|
|
389
|
+
expect(builder.getLength()).toBe(0)
|
|
390
|
+
})
|
|
391
|
+
})
|
|
392
|
+
|
|
393
|
+
describe('formatHistory', () => {
|
|
394
|
+
test('uses default template', () => {
|
|
395
|
+
const builder = createHistoryBuilder()
|
|
396
|
+
builder.addTurn('Hello', 'Hi there')
|
|
397
|
+
const formatted = builder.formatHistory()
|
|
398
|
+
expect(formatted).toBe('User: Hello\nAssistant: Hi there')
|
|
399
|
+
})
|
|
400
|
+
|
|
401
|
+
test('uses custom template', () => {
|
|
402
|
+
const builder = createHistoryBuilder({
|
|
403
|
+
template: 'Q: {{input}}\nA: {{output}}',
|
|
404
|
+
})
|
|
405
|
+
builder.addTurn('Question', 'Answer')
|
|
406
|
+
const formatted = builder.formatHistory()
|
|
407
|
+
expect(formatted).toBe('Q: Question\nA: Answer')
|
|
408
|
+
})
|
|
409
|
+
|
|
410
|
+
test('separates multiple turns with double newline', () => {
|
|
411
|
+
const builder = createHistoryBuilder()
|
|
412
|
+
builder.addTurn('First', 'One')
|
|
413
|
+
builder.addTurn('Second', 'Two')
|
|
414
|
+
const formatted = builder.formatHistory()
|
|
415
|
+
expect(formatted).toBe('User: First\nAssistant: One\n\nUser: Second\nAssistant: Two')
|
|
416
|
+
})
|
|
417
|
+
|
|
418
|
+
test('returns empty string for no history', () => {
|
|
419
|
+
const builder = createHistoryBuilder()
|
|
420
|
+
expect(builder.formatHistory()).toBe('')
|
|
421
|
+
})
|
|
422
|
+
})
|
|
423
|
+
|
|
424
|
+
describe('buildPrompt', () => {
|
|
425
|
+
test('returns just input for first turn', () => {
|
|
426
|
+
const builder = createHistoryBuilder()
|
|
427
|
+
const prompt = builder.buildPrompt('Hello')
|
|
428
|
+
expect(prompt).toBe('Hello')
|
|
429
|
+
})
|
|
430
|
+
|
|
431
|
+
test('includes history for subsequent turns', () => {
|
|
432
|
+
const builder = createHistoryBuilder()
|
|
433
|
+
builder.addTurn('Hello', 'Hi')
|
|
434
|
+
const prompt = builder.buildPrompt('Next question')
|
|
435
|
+
expect(prompt).toContain('User: Hello')
|
|
436
|
+
expect(prompt).toContain('Assistant: Hi')
|
|
437
|
+
expect(prompt).toContain('User: Next question')
|
|
438
|
+
})
|
|
439
|
+
|
|
440
|
+
test('builds complete context with multiple turns', () => {
|
|
441
|
+
const builder = createHistoryBuilder()
|
|
442
|
+
builder.addTurn('One', 'Reply one')
|
|
443
|
+
builder.addTurn('Two', 'Reply two')
|
|
444
|
+
const prompt = builder.buildPrompt('Three')
|
|
445
|
+
expect(prompt).toContain('User: One')
|
|
446
|
+
expect(prompt).toContain('User: Two')
|
|
447
|
+
expect(prompt).toContain('User: Three')
|
|
448
|
+
})
|
|
449
|
+
})
|
|
450
|
+
|
|
451
|
+
describe('getHistory returns copy', () => {
|
|
452
|
+
test('modifying returned array does not affect internal state', () => {
|
|
453
|
+
const builder = createHistoryBuilder()
|
|
454
|
+
builder.addTurn('Hello', 'Hi')
|
|
455
|
+
const history = builder.getHistory()
|
|
456
|
+
history.push({ input: 'Fake', output: 'Fake' })
|
|
457
|
+
expect(builder.getLength()).toBe(1)
|
|
458
|
+
})
|
|
459
|
+
})
|
|
460
|
+
})
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
|
2
|
+
import { runSchemas } from '../schemas-cli.ts'
|
|
3
|
+
|
|
4
|
+
// ============================================================================
|
|
5
|
+
// runSchemas
|
|
6
|
+
// ============================================================================
|
|
7
|
+
|
|
8
|
+
describe('runSchemas', () => {
|
|
9
|
+
const testOutputDir = '/tmp/acp-harness-test-schemas'
|
|
10
|
+
|
|
11
|
+
beforeEach(async () => {
|
|
12
|
+
// Clean up test directory
|
|
13
|
+
await Bun.$`rm -rf ${testOutputDir}`.nothrow()
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
afterEach(async () => {
|
|
17
|
+
// Clean up test directory
|
|
18
|
+
await Bun.$`rm -rf ${testOutputDir}`.nothrow()
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
describe('list mode', () => {
|
|
22
|
+
test('returns array of schema names', async () => {
|
|
23
|
+
const result = await runSchemas({ list: true })
|
|
24
|
+
expect(Array.isArray(result)).toBe(true)
|
|
25
|
+
const names = result as string[]
|
|
26
|
+
expect(names).toContain('PromptCase')
|
|
27
|
+
expect(names).toContain('CaptureResult')
|
|
28
|
+
expect(names).toContain('GraderResult')
|
|
29
|
+
})
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
describe('single schema mode', () => {
|
|
33
|
+
test('returns single schema by name', async () => {
|
|
34
|
+
const result = await runSchemas({ schemaName: 'PromptCase', json: true })
|
|
35
|
+
expect(typeof result).toBe('object')
|
|
36
|
+
const schemas = result as Record<string, object>
|
|
37
|
+
expect(schemas.PromptCase).toBeDefined()
|
|
38
|
+
expect(schemas.PromptCase).toHaveProperty('$schema')
|
|
39
|
+
expect(schemas.PromptCase).toHaveProperty('title', 'PromptCase')
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
test('writes schema to file when outputPath provided', async () => {
|
|
43
|
+
const outputPath = `${testOutputDir}/prompt-case.json`
|
|
44
|
+
await Bun.$`mkdir -p ${testOutputDir}`
|
|
45
|
+
|
|
46
|
+
await runSchemas({
|
|
47
|
+
schemaName: 'GraderResult',
|
|
48
|
+
outputPath,
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
const content = await Bun.file(outputPath).text()
|
|
52
|
+
const schema = JSON.parse(content)
|
|
53
|
+
expect(schema.title).toBe('GraderResult')
|
|
54
|
+
expect(schema.$schema).toBe('https://json-schema.org/draft/2020-12/schema')
|
|
55
|
+
})
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
describe('all schemas mode', () => {
|
|
59
|
+
test('returns all schemas as object', async () => {
|
|
60
|
+
const result = await runSchemas({ json: true })
|
|
61
|
+
expect(typeof result).toBe('object')
|
|
62
|
+
const schemas = result as Record<string, object>
|
|
63
|
+
|
|
64
|
+
// Check a sampling of expected schemas
|
|
65
|
+
expect(schemas.PromptCase).toBeDefined()
|
|
66
|
+
expect(schemas.CaptureResult).toBeDefined()
|
|
67
|
+
expect(schemas.GraderResult).toBeDefined()
|
|
68
|
+
expect(schemas.TrajectoryStep).toBeDefined()
|
|
69
|
+
expect(schemas.Session).toBeDefined()
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
test('writes all schemas to single file', async () => {
|
|
73
|
+
const outputPath = `${testOutputDir}/all-schemas.json`
|
|
74
|
+
await Bun.$`mkdir -p ${testOutputDir}`
|
|
75
|
+
|
|
76
|
+
await runSchemas({
|
|
77
|
+
json: true,
|
|
78
|
+
outputPath,
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
const content = await Bun.file(outputPath).text()
|
|
82
|
+
const schemas = JSON.parse(content)
|
|
83
|
+
expect(schemas.PromptCase).toBeDefined()
|
|
84
|
+
expect(schemas.CaptureResult).toBeDefined()
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
test('splits schemas into separate files', async () => {
|
|
88
|
+
await runSchemas({
|
|
89
|
+
json: true,
|
|
90
|
+
split: true,
|
|
91
|
+
outputPath: testOutputDir,
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
// Check that individual files were created
|
|
95
|
+
const promptCaseExists = await Bun.file(`${testOutputDir}/PromptCase.json`).exists()
|
|
96
|
+
const captureResultExists = await Bun.file(`${testOutputDir}/CaptureResult.json`).exists()
|
|
97
|
+
const graderResultExists = await Bun.file(`${testOutputDir}/GraderResult.json`).exists()
|
|
98
|
+
|
|
99
|
+
expect(promptCaseExists).toBe(true)
|
|
100
|
+
expect(captureResultExists).toBe(true)
|
|
101
|
+
expect(graderResultExists).toBe(true)
|
|
102
|
+
|
|
103
|
+
// Verify content
|
|
104
|
+
const promptCaseContent = await Bun.file(`${testOutputDir}/PromptCase.json`).text()
|
|
105
|
+
const promptCaseSchema = JSON.parse(promptCaseContent)
|
|
106
|
+
expect(promptCaseSchema.title).toBe('PromptCase')
|
|
107
|
+
})
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
describe('schema content validation', () => {
|
|
111
|
+
test('PromptCase schema has correct structure', async () => {
|
|
112
|
+
const result = await runSchemas({ schemaName: 'PromptCase', json: true })
|
|
113
|
+
const schemas = result as Record<string, object>
|
|
114
|
+
const schema = schemas.PromptCase as Record<string, unknown>
|
|
115
|
+
|
|
116
|
+
expect(schema.$schema).toBe('https://json-schema.org/draft/2020-12/schema')
|
|
117
|
+
expect(schema.title).toBe('PromptCase')
|
|
118
|
+
expect(schema.type).toBe('object')
|
|
119
|
+
|
|
120
|
+
// Check properties exist
|
|
121
|
+
const properties = schema.properties as Record<string, unknown>
|
|
122
|
+
expect(properties).toBeDefined()
|
|
123
|
+
expect(properties.id).toBeDefined()
|
|
124
|
+
expect(properties.input).toBeDefined()
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
test('GraderResult schema has correct constraints', async () => {
|
|
128
|
+
const result = await runSchemas({ schemaName: 'GraderResult', json: true })
|
|
129
|
+
const schemas = result as Record<string, object>
|
|
130
|
+
const schema = schemas.GraderResult as Record<string, unknown>
|
|
131
|
+
|
|
132
|
+
expect(schema.type).toBe('object')
|
|
133
|
+
const properties = schema.properties as Record<string, Record<string, unknown>>
|
|
134
|
+
expect(properties.pass).toBeDefined()
|
|
135
|
+
expect(properties.score).toBeDefined()
|
|
136
|
+
expect(properties.pass?.type).toBe('boolean')
|
|
137
|
+
expect(properties.score?.type).toBe('number')
|
|
138
|
+
expect(properties.score?.minimum).toBe(0)
|
|
139
|
+
expect(properties.score?.maximum).toBe(1)
|
|
140
|
+
})
|
|
141
|
+
})
|
|
142
|
+
})
|