@plaited/acp-harness 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,553 @@
1
+ import { describe, expect, test } from 'bun:test'
2
+ import type { SessionNotification } from '@agentclientprotocol/sdk'
3
+ import {
4
+ extractContent,
5
+ extractFilePath,
6
+ extractOutput,
7
+ extractTrajectory,
8
+ hasToolErrors,
9
+ headTailPreview,
10
+ loadPrompts,
11
+ } from '../capture.ts'
12
+ import type { TrajectoryStep } from '../schemas.ts'
13
+
14
+ // ============================================================================
15
+ // loadPrompts
16
+ // ============================================================================
17
+
18
+ describe('loadPrompts', () => {
19
+ test('parses valid JSONL file', async () => {
20
+ // Create a temporary test file
21
+ const testPath = '/tmp/test-prompts-valid.jsonl'
22
+ await Bun.write(
23
+ testPath,
24
+ `{"id": "test-1", "input": "What is 2+2?"}
25
+ {"id": "test-2", "input": "Hello world", "expected": "greeting"}`,
26
+ )
27
+
28
+ const prompts = await loadPrompts(testPath)
29
+
30
+ expect(prompts).toHaveLength(2)
31
+ expect(prompts[0]?.id).toBe('test-1')
32
+ expect(prompts[0]?.input).toBe('What is 2+2?')
33
+ expect(prompts[1]?.id).toBe('test-2')
34
+ expect(prompts[1]?.expected).toBe('greeting')
35
+ })
36
+
37
+ test('parses prompts with metadata', async () => {
38
+ const testPath = '/tmp/test-prompts-metadata.jsonl'
39
+ await Bun.write(
40
+ testPath,
41
+ `{"id": "test-1", "input": "Test", "metadata": {"category": "math", "difficulty": "easy"}}`,
42
+ )
43
+
44
+ const prompts = await loadPrompts(testPath)
45
+
46
+ expect(prompts).toHaveLength(1)
47
+ expect(prompts[0]?.metadata?.category).toBe('math')
48
+ expect(prompts[0]?.metadata?.difficulty).toBe('easy')
49
+ })
50
+
51
+ test('throws on invalid JSON at specific line', async () => {
52
+ const testPath = '/tmp/test-prompts-invalid.jsonl'
53
+ await Bun.write(
54
+ testPath,
55
+ `{"id": "test-1", "input": "Valid"}
56
+ {invalid json here}
57
+ {"id": "test-3", "input": "Also valid"}`,
58
+ )
59
+
60
+ await expect(loadPrompts(testPath)).rejects.toThrow('Invalid prompt at line 2')
61
+ })
62
+
63
+ test('throws on missing required fields', async () => {
64
+ const testPath = '/tmp/test-prompts-missing.jsonl'
65
+ await Bun.write(testPath, `{"id": "test-1"}`)
66
+
67
+ await expect(loadPrompts(testPath)).rejects.toThrow('Invalid prompt at line 1')
68
+ })
69
+
70
+ test('handles empty lines gracefully', async () => {
71
+ const testPath = '/tmp/test-prompts-empty-lines.jsonl'
72
+ await Bun.write(
73
+ testPath,
74
+ `{"id": "test-1", "input": "First"}
75
+
76
+ {"id": "test-2", "input": "Second"}
77
+ `,
78
+ )
79
+
80
+ const prompts = await loadPrompts(testPath)
81
+ expect(prompts).toHaveLength(2)
82
+ })
83
+ })
84
+
85
+ // ============================================================================
86
+ // extractTrajectory
87
+ // ============================================================================
88
+
89
+ describe('extractTrajectory', () => {
90
+ const baseTime = 0
91
+
92
+ test('extracts thoughts from agent_thought_chunk notifications', () => {
93
+ const notifications: SessionNotification[] = [
94
+ {
95
+ sessionId: 's1',
96
+ update: {
97
+ sessionUpdate: 'agent_thought_chunk',
98
+ content: { type: 'text', text: 'Let me think about this...' },
99
+ },
100
+ },
101
+ ]
102
+
103
+ const trajectory = extractTrajectory(notifications, baseTime)
104
+
105
+ expect(trajectory).toHaveLength(1)
106
+ expect(trajectory[0]?.type).toBe('thought')
107
+ if (trajectory[0]?.type === 'thought') {
108
+ expect(trajectory[0].content).toBe('Let me think about this...')
109
+ }
110
+ })
111
+
112
+ test('extracts messages from agent_message_chunk notifications', () => {
113
+ const notifications: SessionNotification[] = [
114
+ {
115
+ sessionId: 's1',
116
+ update: {
117
+ sessionUpdate: 'agent_message_chunk',
118
+ content: { type: 'text', text: 'Here is my answer.' },
119
+ },
120
+ },
121
+ ]
122
+
123
+ const trajectory = extractTrajectory(notifications, baseTime)
124
+
125
+ expect(trajectory).toHaveLength(1)
126
+ expect(trajectory[0]?.type).toBe('message')
127
+ if (trajectory[0]?.type === 'message') {
128
+ expect(trajectory[0].content).toBe('Here is my answer.')
129
+ }
130
+ })
131
+
132
+ test('extracts tool calls with initial pending status', () => {
133
+ const notifications: SessionNotification[] = [
134
+ {
135
+ sessionId: 's1',
136
+ update: {
137
+ sessionUpdate: 'tool_call',
138
+ toolCallId: 't1',
139
+ title: 'Read',
140
+ status: 'pending',
141
+ rawInput: '{"file_path": "/test.ts"}',
142
+ },
143
+ },
144
+ ]
145
+
146
+ const trajectory = extractTrajectory(notifications, baseTime)
147
+
148
+ expect(trajectory).toHaveLength(1)
149
+ expect(trajectory[0]?.type).toBe('tool_call')
150
+ if (trajectory[0]?.type === 'tool_call') {
151
+ expect(trajectory[0].name).toBe('Read')
152
+ expect(trajectory[0].status).toBe('pending')
153
+ expect(trajectory[0].input).toBe('{"file_path": "/test.ts"}')
154
+ }
155
+ })
156
+
157
+ test('updates tool call status on subsequent notifications', () => {
158
+ const notifications: SessionNotification[] = [
159
+ {
160
+ sessionId: 's1',
161
+ update: {
162
+ sessionUpdate: 'tool_call',
163
+ toolCallId: 't1',
164
+ title: 'Read',
165
+ status: 'pending',
166
+ },
167
+ },
168
+ {
169
+ sessionId: 's1',
170
+ update: {
171
+ sessionUpdate: 'tool_call',
172
+ toolCallId: 't1',
173
+ title: 'Read',
174
+ status: 'completed',
175
+ rawOutput: 'file contents here',
176
+ },
177
+ },
178
+ ]
179
+
180
+ const trajectory = extractTrajectory(notifications, baseTime)
181
+
182
+ // Should still be 1 entry, just updated
183
+ expect(trajectory).toHaveLength(1)
184
+ if (trajectory[0]?.type === 'tool_call') {
185
+ expect(trajectory[0].status).toBe('completed')
186
+ expect(trajectory[0].output).toBe('file contents here')
187
+ }
188
+ })
189
+
190
+ test('tracks multiple independent tool calls', () => {
191
+ const notifications: SessionNotification[] = [
192
+ {
193
+ sessionId: 's1',
194
+ update: { sessionUpdate: 'tool_call', toolCallId: 't1', title: 'Read', status: 'completed' },
195
+ },
196
+ {
197
+ sessionId: 's1',
198
+ update: { sessionUpdate: 'tool_call', toolCallId: 't2', title: 'Write', status: 'completed' },
199
+ },
200
+ ]
201
+
202
+ const trajectory = extractTrajectory(notifications, baseTime)
203
+
204
+ expect(trajectory).toHaveLength(2)
205
+ expect(trajectory[0]?.type === 'tool_call' && trajectory[0].name).toBe('Read')
206
+ expect(trajectory[1]?.type === 'tool_call' && trajectory[1].name).toBe('Write')
207
+ })
208
+
209
+ test('extracts plan entries', () => {
210
+ const notifications: SessionNotification[] = [
211
+ {
212
+ sessionId: 's1',
213
+ update: {
214
+ sessionUpdate: 'plan',
215
+ entries: [
216
+ { content: 'Step 1', status: 'completed', priority: 'high' },
217
+ { content: 'Step 2', status: 'in_progress', priority: 'medium' },
218
+ ],
219
+ },
220
+ },
221
+ ]
222
+
223
+ const trajectory = extractTrajectory(notifications, baseTime)
224
+
225
+ expect(trajectory).toHaveLength(1)
226
+ expect(trajectory[0]?.type).toBe('plan')
227
+ if (trajectory[0]?.type === 'plan') {
228
+ expect(trajectory[0].entries).toHaveLength(2)
229
+ }
230
+ })
231
+
232
+ test('handles empty notifications', () => {
233
+ const trajectory = extractTrajectory([], baseTime)
234
+ expect(trajectory).toEqual([])
235
+ })
236
+
237
+ test('assigns timestamps relative to start time', () => {
238
+ // Mock Date.now to control timestamps
239
+ const originalNow = Date.now
240
+ let currentTime = 1000
241
+
242
+ Date.now = () => currentTime
243
+
244
+ const notifications: SessionNotification[] = [
245
+ {
246
+ sessionId: 's1',
247
+ update: { sessionUpdate: 'agent_message_chunk', content: { type: 'text', text: 'First' } },
248
+ },
249
+ ]
250
+
251
+ const startTime = 1000
252
+ currentTime = 1500 // 500ms later
253
+
254
+ const trajectory = extractTrajectory(notifications, startTime)
255
+
256
+ expect(trajectory[0]?.timestamp).toBe(500)
257
+
258
+ // Restore
259
+ Date.now = originalNow
260
+ })
261
+
262
+ test('calculates tool call duration correctly', () => {
263
+ const originalNow = Date.now
264
+ let currentTime = 1000
265
+
266
+ Date.now = () => currentTime
267
+
268
+ const startTime = 1000
269
+
270
+ // Simulate time passing between notifications
271
+ // First notification at t=100 (currentTime = 1100)
272
+ // Second notification at t=600 (currentTime = 1600)
273
+ const notifications: SessionNotification[] = []
274
+
275
+ currentTime = 1100 // First call at 100ms relative to start
276
+ notifications.push({
277
+ sessionId: 's1',
278
+ update: { sessionUpdate: 'tool_call', toolCallId: 't1', title: 'Bash', status: 'pending' },
279
+ })
280
+
281
+ currentTime = 1600 // Second call at 600ms relative to start
282
+ notifications.push({
283
+ sessionId: 's1',
284
+ update: { sessionUpdate: 'tool_call', toolCallId: 't1', title: 'Bash', status: 'completed' },
285
+ })
286
+
287
+ // Now process all notifications in one call
288
+ // But the issue is extractTrajectory calls Date.now() for each notification
289
+ // so we need to mock it to return different values for each call
290
+
291
+ let callCount = 0
292
+ const times = [1100, 1600]
293
+ Date.now = () => times[callCount++] ?? 1600
294
+
295
+ const trajectory = extractTrajectory(notifications, startTime)
296
+
297
+ if (trajectory[0]?.type === 'tool_call') {
298
+ // Duration should be 500ms (600 - 100)
299
+ expect(trajectory[0].duration).toBe(500)
300
+ }
301
+
302
+ Date.now = originalNow
303
+ })
304
+
305
+ test('ignores non-text content in thought chunks', () => {
306
+ const notifications: SessionNotification[] = [
307
+ {
308
+ sessionId: 's1',
309
+ update: {
310
+ sessionUpdate: 'agent_thought_chunk',
311
+ // Image content should be skipped
312
+ content: { type: 'image', data: 'base64', mimeType: 'image/png' },
313
+ },
314
+ },
315
+ ]
316
+
317
+ const trajectory = extractTrajectory(notifications, baseTime)
318
+ expect(trajectory).toHaveLength(0)
319
+ })
320
+ })
321
+
322
+ // ============================================================================
323
+ // extractOutput
324
+ // ============================================================================
325
+
326
+ describe('extractOutput', () => {
327
+ test('joins message contents with newlines', () => {
328
+ const trajectory: TrajectoryStep[] = [
329
+ { type: 'message', content: 'First line', timestamp: 0 },
330
+ { type: 'message', content: 'Second line', timestamp: 100 },
331
+ ]
332
+
333
+ expect(extractOutput(trajectory)).toBe('First line\nSecond line')
334
+ })
335
+
336
+ test('filters out non-message steps', () => {
337
+ const trajectory: TrajectoryStep[] = [
338
+ { type: 'thought', content: 'Thinking...', timestamp: 0 },
339
+ { type: 'message', content: 'Answer', timestamp: 100 },
340
+ { type: 'tool_call', name: 'Read', status: 'completed', timestamp: 200 },
341
+ { type: 'message', content: 'Done', timestamp: 300 },
342
+ ]
343
+
344
+ expect(extractOutput(trajectory)).toBe('Answer\nDone')
345
+ })
346
+
347
+ test('returns empty string for empty trajectory', () => {
348
+ expect(extractOutput([])).toBe('')
349
+ })
350
+
351
+ test('returns empty string when no messages', () => {
352
+ const trajectory: TrajectoryStep[] = [
353
+ { type: 'thought', content: 'Just thinking', timestamp: 0 },
354
+ { type: 'tool_call', name: 'Read', status: 'completed', timestamp: 100 },
355
+ ]
356
+
357
+ expect(extractOutput(trajectory)).toBe('')
358
+ })
359
+
360
+ test('handles single message', () => {
361
+ const trajectory: TrajectoryStep[] = [{ type: 'message', content: 'Only message', timestamp: 0 }]
362
+
363
+ expect(extractOutput(trajectory)).toBe('Only message')
364
+ })
365
+ })
366
+
367
+ // ============================================================================
368
+ // hasToolErrors
369
+ // ============================================================================
370
+
371
+ describe('hasToolErrors', () => {
372
+ test('returns false when no tool calls', () => {
373
+ const trajectory: TrajectoryStep[] = [
374
+ { type: 'thought', content: 'Thinking', timestamp: 0 },
375
+ { type: 'message', content: 'Done', timestamp: 100 },
376
+ ]
377
+
378
+ expect(hasToolErrors(trajectory)).toBe(false)
379
+ })
380
+
381
+ test('returns false when all tool calls succeeded', () => {
382
+ const trajectory: TrajectoryStep[] = [
383
+ { type: 'tool_call', name: 'Read', status: 'completed', timestamp: 0 },
384
+ { type: 'tool_call', name: 'Write', status: 'completed', timestamp: 100 },
385
+ ]
386
+
387
+ expect(hasToolErrors(trajectory)).toBe(false)
388
+ })
389
+
390
+ test('returns true when any tool call failed', () => {
391
+ const trajectory: TrajectoryStep[] = [
392
+ { type: 'tool_call', name: 'Read', status: 'completed', timestamp: 0 },
393
+ { type: 'tool_call', name: 'Write', status: 'failed', timestamp: 100 },
394
+ { type: 'tool_call', name: 'Bash', status: 'completed', timestamp: 200 },
395
+ ]
396
+
397
+ expect(hasToolErrors(trajectory)).toBe(true)
398
+ })
399
+
400
+ test('returns false for empty trajectory', () => {
401
+ expect(hasToolErrors([])).toBe(false)
402
+ })
403
+
404
+ test('returns true when only tool call failed', () => {
405
+ const trajectory: TrajectoryStep[] = [{ type: 'tool_call', name: 'Bash', status: 'failed', timestamp: 0 }]
406
+
407
+ expect(hasToolErrors(trajectory)).toBe(true)
408
+ })
409
+ })
410
+
411
+ // ============================================================================
412
+ // headTailPreview
413
+ // ============================================================================
414
+
415
+ describe('headTailPreview', () => {
416
+ test('returns full content when under limit', () => {
417
+ const content = 'line1\nline2\nline3'
418
+ expect(headTailPreview(content, 5, 5)).toBe(content)
419
+ })
420
+
421
+ test('truncates with omitted count for long content', () => {
422
+ const lines = Array.from({ length: 20 }, (_, i) => `line${i + 1}`)
423
+ const content = lines.join('\n')
424
+
425
+ const result = headTailPreview(content, 3, 3)
426
+
427
+ expect(result).toContain('line1')
428
+ expect(result).toContain('line2')
429
+ expect(result).toContain('line3')
430
+ expect(result).toContain('line18')
431
+ expect(result).toContain('line19')
432
+ expect(result).toContain('line20')
433
+ expect(result).toContain('14 lines omitted')
434
+ })
435
+
436
+ test('respects custom head line count', () => {
437
+ const lines = Array.from({ length: 10 }, (_, i) => `line${i + 1}`)
438
+ const content = lines.join('\n')
439
+
440
+ const result = headTailPreview(content, 2, 2)
441
+
442
+ expect(result).toContain('line1')
443
+ expect(result).toContain('line2')
444
+ expect(result).not.toContain('line3')
445
+ expect(result).toContain('6 lines omitted')
446
+ })
447
+
448
+ test('respects custom tail line count', () => {
449
+ const lines = Array.from({ length: 10 }, (_, i) => `line${i + 1}`)
450
+ const content = lines.join('\n')
451
+
452
+ const result = headTailPreview(content, 1, 4)
453
+
454
+ expect(result).toContain('line1')
455
+ expect(result).toContain('line7')
456
+ expect(result).toContain('line10')
457
+ expect(result).toContain('5 lines omitted')
458
+ })
459
+
460
+ test('handles content exactly at boundary', () => {
461
+ const content = 'line1\nline2\nline3\nline4\nline5\nline6'
462
+ // 6 lines, head=3, tail=3 means no truncation needed
463
+ expect(headTailPreview(content, 3, 3)).toBe(content)
464
+ })
465
+
466
+ test('handles single line content', () => {
467
+ const content = 'single line'
468
+ expect(headTailPreview(content, 3, 3)).toBe(content)
469
+ })
470
+
471
+ test('handles empty content', () => {
472
+ expect(headTailPreview('', 3, 3)).toBe('')
473
+ })
474
+ })
475
+
476
+ // ============================================================================
477
+ // extractFilePath
478
+ // ============================================================================
479
+
480
+ describe('extractFilePath', () => {
481
+ test('extracts file_path field', () => {
482
+ const input = { file_path: '/path/to/file.ts' }
483
+ expect(extractFilePath(input)).toBe('/path/to/file.ts')
484
+ })
485
+
486
+ test('extracts path field as fallback', () => {
487
+ const input = { path: '/another/path.js' }
488
+ expect(extractFilePath(input)).toBe('/another/path.js')
489
+ })
490
+
491
+ test('prefers file_path over path', () => {
492
+ const input = { file_path: '/preferred.ts', path: '/fallback.ts' }
493
+ expect(extractFilePath(input)).toBe('/preferred.ts')
494
+ })
495
+
496
+ test('returns undefined for invalid input', () => {
497
+ expect(extractFilePath(null)).toBeUndefined()
498
+ expect(extractFilePath(undefined)).toBeUndefined()
499
+ expect(extractFilePath('string')).toBeUndefined()
500
+ expect(extractFilePath(123)).toBeUndefined()
501
+ })
502
+
503
+ test('returns undefined when no path fields present', () => {
504
+ const input = { content: 'some content' }
505
+ expect(extractFilePath(input)).toBeUndefined()
506
+ })
507
+
508
+ test('handles empty object', () => {
509
+ expect(extractFilePath({})).toBeUndefined()
510
+ })
511
+ })
512
+
513
+ // ============================================================================
514
+ // extractContent
515
+ // ============================================================================
516
+
517
+ describe('extractContent', () => {
518
+ test('extracts content field', () => {
519
+ const input = { content: 'const x = 1;' }
520
+ expect(extractContent(input)).toBe('const x = 1;')
521
+ })
522
+
523
+ test('extracts new_string field as fallback', () => {
524
+ const input = { new_string: 'const y = 2;' }
525
+ expect(extractContent(input)).toBe('const y = 2;')
526
+ })
527
+
528
+ test('prefers content over new_string', () => {
529
+ const input = { content: 'preferred', new_string: 'fallback' }
530
+ expect(extractContent(input)).toBe('preferred')
531
+ })
532
+
533
+ test('returns undefined for invalid input', () => {
534
+ expect(extractContent(null)).toBeUndefined()
535
+ expect(extractContent(undefined)).toBeUndefined()
536
+ expect(extractContent('string')).toBeUndefined()
537
+ expect(extractContent(123)).toBeUndefined()
538
+ })
539
+
540
+ test('returns undefined when no content fields present', () => {
541
+ const input = { file_path: '/some/path.ts' }
542
+ expect(extractContent(input)).toBeUndefined()
543
+ })
544
+
545
+ test('handles empty object', () => {
546
+ expect(extractContent({})).toBeUndefined()
547
+ })
548
+
549
+ test('handles multiline content', () => {
550
+ const input = { content: 'line1\nline2\nline3' }
551
+ expect(extractContent(input)).toBe('line1\nline2\nline3')
552
+ })
553
+ })