@plaited/acp-harness 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,11 @@
1
1
  import { describe, expect, test } from 'bun:test'
2
2
  import type { SessionNotification } from '@agentclientprotocol/sdk'
3
3
  import {
4
+ detectTrajectoryRichness,
4
5
  extractContent,
5
6
  extractFilePath,
6
7
  extractOutput,
8
+ extractTokenCounts,
7
9
  extractTrajectory,
8
10
  hasToolErrors,
9
11
  headTailPreview,
@@ -16,13 +18,13 @@ import type { TrajectoryStep } from '../schemas.ts'
16
18
  // ============================================================================
17
19
 
18
20
  describe('loadPrompts', () => {
19
- test('parses valid JSONL file', async () => {
21
+ test('parses valid JSONL file with string input', async () => {
20
22
  // Create a temporary test file
21
23
  const testPath = '/tmp/test-prompts-valid.jsonl'
22
24
  await Bun.write(
23
25
  testPath,
24
26
  `{"id": "test-1", "input": "What is 2+2?"}
25
- {"id": "test-2", "input": "Hello world", "expected": "greeting"}`,
27
+ {"id": "test-2", "input": "Hello world", "hint": "greeting"}`,
26
28
  )
27
29
 
28
30
  const prompts = await loadPrompts(testPath)
@@ -31,7 +33,20 @@ describe('loadPrompts', () => {
31
33
  expect(prompts[0]?.id).toBe('test-1')
32
34
  expect(prompts[0]?.input).toBe('What is 2+2?')
33
35
  expect(prompts[1]?.id).toBe('test-2')
34
- expect(prompts[1]?.expected).toBe('greeting')
36
+ expect(prompts[1]?.hint).toBe('greeting')
37
+ })
38
+
39
+ test('parses multi-turn input (string array)', async () => {
40
+ const testPath = '/tmp/test-prompts-multiturn.jsonl'
41
+ await Bun.write(testPath, `{"id": "test-1", "input": ["Hello", "How are you?", "Goodbye"], "hint": "farewell"}`)
42
+
43
+ const prompts = await loadPrompts(testPath)
44
+
45
+ expect(prompts).toHaveLength(1)
46
+ expect(prompts[0]?.id).toBe('test-1')
47
+ expect(Array.isArray(prompts[0]?.input)).toBe(true)
48
+ expect(prompts[0]?.input).toEqual(['Hello', 'How are you?', 'Goodbye'])
49
+ expect(prompts[0]?.hint).toBe('farewell')
35
50
  })
36
51
 
37
52
  test('parses prompts with metadata', async () => {
@@ -104,9 +119,9 @@ describe('extractTrajectory', () => {
104
119
 
105
120
  expect(trajectory).toHaveLength(1)
106
121
  expect(trajectory[0]?.type).toBe('thought')
107
- if (trajectory[0]?.type === 'thought') {
108
- expect(trajectory[0].content).toBe('Let me think about this...')
109
- }
122
+ // Type narrowing after explicit assertion
123
+ const step = trajectory[0]!
124
+ expect(step.type === 'thought' && step.content).toBe('Let me think about this...')
110
125
  })
111
126
 
112
127
  test('extracts messages from agent_message_chunk notifications', () => {
@@ -124,9 +139,9 @@ describe('extractTrajectory', () => {
124
139
 
125
140
  expect(trajectory).toHaveLength(1)
126
141
  expect(trajectory[0]?.type).toBe('message')
127
- if (trajectory[0]?.type === 'message') {
128
- expect(trajectory[0].content).toBe('Here is my answer.')
129
- }
142
+ // Type narrowing after explicit assertion
143
+ const step = trajectory[0]!
144
+ expect(step.type === 'message' && step.content).toBe('Here is my answer.')
130
145
  })
131
146
 
132
147
  test('extracts tool calls with initial pending status', () => {
@@ -147,11 +162,11 @@ describe('extractTrajectory', () => {
147
162
 
148
163
  expect(trajectory).toHaveLength(1)
149
164
  expect(trajectory[0]?.type).toBe('tool_call')
150
- if (trajectory[0]?.type === 'tool_call') {
151
- expect(trajectory[0].name).toBe('Read')
152
- expect(trajectory[0].status).toBe('pending')
153
- expect(trajectory[0].input).toBe('{"file_path": "/test.ts"}')
154
- }
165
+ // Type narrowing after explicit assertion
166
+ const step = trajectory[0]!
167
+ expect(step.type === 'tool_call' && step.name).toBe('Read')
168
+ expect(step.type === 'tool_call' && step.status).toBe('pending')
169
+ expect(step.type === 'tool_call' && step.input).toBe('{"file_path": "/test.ts"}')
155
170
  })
156
171
 
157
172
  test('updates tool call status on subsequent notifications', () => {
@@ -181,10 +196,11 @@ describe('extractTrajectory', () => {
181
196
 
182
197
  // Should still be 1 entry, just updated
183
198
  expect(trajectory).toHaveLength(1)
184
- if (trajectory[0]?.type === 'tool_call') {
185
- expect(trajectory[0].status).toBe('completed')
186
- expect(trajectory[0].output).toBe('file contents here')
187
- }
199
+ expect(trajectory[0]?.type).toBe('tool_call')
200
+ // Type narrowing after explicit assertion
201
+ const step = trajectory[0]!
202
+ expect(step.type === 'tool_call' && step.status).toBe('completed')
203
+ expect(step.type === 'tool_call' && step.output).toBe('file contents here')
188
204
  })
189
205
 
190
206
  test('tracks multiple independent tool calls', () => {
@@ -202,8 +218,13 @@ describe('extractTrajectory', () => {
202
218
  const trajectory = extractTrajectory(notifications, baseTime)
203
219
 
204
220
  expect(trajectory).toHaveLength(2)
205
- expect(trajectory[0]?.type === 'tool_call' && trajectory[0].name).toBe('Read')
206
- expect(trajectory[1]?.type === 'tool_call' && trajectory[1].name).toBe('Write')
221
+ expect(trajectory[0]?.type).toBe('tool_call')
222
+ expect(trajectory[1]?.type).toBe('tool_call')
223
+ // Type narrowing after explicit assertions
224
+ const step0 = trajectory[0]!
225
+ const step1 = trajectory[1]!
226
+ expect(step0.type === 'tool_call' && step0.name).toBe('Read')
227
+ expect(step1.type === 'tool_call' && step1.name).toBe('Write')
207
228
  })
208
229
 
209
230
  test('extracts plan entries', () => {
@@ -224,9 +245,9 @@ describe('extractTrajectory', () => {
224
245
 
225
246
  expect(trajectory).toHaveLength(1)
226
247
  expect(trajectory[0]?.type).toBe('plan')
227
- if (trajectory[0]?.type === 'plan') {
228
- expect(trajectory[0].entries).toHaveLength(2)
229
- }
248
+ // Type narrowing after explicit assertion
249
+ const step = trajectory[0]!
250
+ expect(step.type === 'plan' && step.entries).toHaveLength(2)
230
251
  })
231
252
 
232
253
  test('handles empty notifications', () => {
@@ -237,69 +258,72 @@ describe('extractTrajectory', () => {
237
258
  test('assigns timestamps relative to start time', () => {
238
259
  // Mock Date.now to control timestamps
239
260
  const originalNow = Date.now
240
- let currentTime = 1000
261
+ try {
262
+ let currentTime = 1000
241
263
 
242
- Date.now = () => currentTime
264
+ Date.now = () => currentTime
243
265
 
244
- const notifications: SessionNotification[] = [
245
- {
246
- sessionId: 's1',
247
- update: { sessionUpdate: 'agent_message_chunk', content: { type: 'text', text: 'First' } },
248
- },
249
- ]
250
-
251
- const startTime = 1000
252
- currentTime = 1500 // 500ms later
266
+ const notifications: SessionNotification[] = [
267
+ {
268
+ sessionId: 's1',
269
+ update: { sessionUpdate: 'agent_message_chunk', content: { type: 'text', text: 'First' } },
270
+ },
271
+ ]
253
272
 
254
- const trajectory = extractTrajectory(notifications, startTime)
273
+ const startTime = 1000
274
+ currentTime = 1500 // 500ms later
255
275
 
256
- expect(trajectory[0]?.timestamp).toBe(500)
276
+ const trajectory = extractTrajectory(notifications, startTime)
257
277
 
258
- // Restore
259
- Date.now = originalNow
278
+ expect(trajectory[0]?.timestamp).toBe(500)
279
+ } finally {
280
+ Date.now = originalNow
281
+ }
260
282
  })
261
283
 
262
284
  test('calculates tool call duration correctly', () => {
263
285
  const originalNow = Date.now
264
- let currentTime = 1000
286
+ try {
287
+ let currentTime = 1000
265
288
 
266
- Date.now = () => currentTime
289
+ Date.now = () => currentTime
267
290
 
268
- const startTime = 1000
291
+ const startTime = 1000
269
292
 
270
- // Simulate time passing between notifications
271
- // First notification at t=100 (currentTime = 1100)
272
- // Second notification at t=600 (currentTime = 1600)
273
- const notifications: SessionNotification[] = []
293
+ // Simulate time passing between notifications
294
+ // First notification at t=100 (currentTime = 1100)
295
+ // Second notification at t=600 (currentTime = 1600)
296
+ const notifications: SessionNotification[] = []
274
297
 
275
- currentTime = 1100 // First call at 100ms relative to start
276
- notifications.push({
277
- sessionId: 's1',
278
- update: { sessionUpdate: 'tool_call', toolCallId: 't1', title: 'Bash', status: 'pending' },
279
- })
298
+ currentTime = 1100 // First call at 100ms relative to start
299
+ notifications.push({
300
+ sessionId: 's1',
301
+ update: { sessionUpdate: 'tool_call', toolCallId: 't1', title: 'Bash', status: 'pending' },
302
+ })
280
303
 
281
- currentTime = 1600 // Second call at 600ms relative to start
282
- notifications.push({
283
- sessionId: 's1',
284
- update: { sessionUpdate: 'tool_call', toolCallId: 't1', title: 'Bash', status: 'completed' },
285
- })
304
+ currentTime = 1600 // Second call at 600ms relative to start
305
+ notifications.push({
306
+ sessionId: 's1',
307
+ update: { sessionUpdate: 'tool_call', toolCallId: 't1', title: 'Bash', status: 'completed' },
308
+ })
286
309
 
287
- // Now process all notifications in one call
288
- // But the issue is extractTrajectory calls Date.now() for each notification
289
- // so we need to mock it to return different values for each call
310
+ // Now process all notifications in one call
311
+ // But the issue is extractTrajectory calls Date.now() for each notification
312
+ // so we need to mock it to return different values for each call
290
313
 
291
- let callCount = 0
292
- const times = [1100, 1600]
293
- Date.now = () => times[callCount++] ?? 1600
314
+ let callCount = 0
315
+ const times = [1100, 1600]
316
+ Date.now = () => times[callCount++] ?? 1600
294
317
 
295
- const trajectory = extractTrajectory(notifications, startTime)
318
+ const trajectory = extractTrajectory(notifications, startTime)
296
319
 
297
- if (trajectory[0]?.type === 'tool_call') {
298
- // Duration should be 500ms (600 - 100)
299
- expect(trajectory[0].duration).toBe(500)
320
+ expect(trajectory[0]?.type).toBe('tool_call')
321
+ // Type narrowing after explicit assertion - Duration should be 500ms (600 - 100)
322
+ const step = trajectory[0]!
323
+ expect(step.type === 'tool_call' && step.duration).toBe(500)
324
+ } finally {
325
+ Date.now = originalNow
300
326
  }
301
-
302
- Date.now = originalNow
303
327
  })
304
328
 
305
329
  test('ignores non-text content in thought chunks', () => {
@@ -551,3 +575,141 @@ describe('extractContent', () => {
551
575
  expect(extractContent(input)).toBe('line1\nline2\nline3')
552
576
  })
553
577
  })
578
+
579
+ // ============================================================================
580
+ // detectTrajectoryRichness
581
+ // ============================================================================
582
+
583
+ describe('detectTrajectoryRichness', () => {
584
+ test('returns "full" when trajectory has thoughts', () => {
585
+ const trajectory: TrajectoryStep[] = [
586
+ { type: 'thought', content: 'Let me think...', timestamp: 0 },
587
+ { type: 'message', content: 'Answer', timestamp: 100 },
588
+ ]
589
+
590
+ expect(detectTrajectoryRichness(trajectory)).toBe('full')
591
+ })
592
+
593
+ test('returns "full" when trajectory has tool calls', () => {
594
+ const trajectory: TrajectoryStep[] = [
595
+ { type: 'tool_call', name: 'Read', status: 'completed', timestamp: 0 },
596
+ { type: 'message', content: 'Answer', timestamp: 100 },
597
+ ]
598
+
599
+ expect(detectTrajectoryRichness(trajectory)).toBe('full')
600
+ })
601
+
602
+ test('returns "full" when trajectory has plans', () => {
603
+ const trajectory: TrajectoryStep[] = [
604
+ { type: 'plan', entries: [{ content: 'Step 1', status: 'completed' }], timestamp: 0 },
605
+ { type: 'message', content: 'Answer', timestamp: 100 },
606
+ ]
607
+
608
+ expect(detectTrajectoryRichness(trajectory)).toBe('full')
609
+ })
610
+
611
+ test('returns "messages-only" when trajectory only has messages', () => {
612
+ const trajectory: TrajectoryStep[] = [
613
+ { type: 'message', content: 'First', timestamp: 0 },
614
+ { type: 'message', content: 'Second', timestamp: 100 },
615
+ ]
616
+
617
+ expect(detectTrajectoryRichness(trajectory)).toBe('messages-only')
618
+ })
619
+
620
+ test('returns "minimal" when trajectory is empty', () => {
621
+ expect(detectTrajectoryRichness([])).toBe('minimal')
622
+ })
623
+
624
+ test('returns "full" when trajectory has mixed rich content', () => {
625
+ const trajectory: TrajectoryStep[] = [
626
+ { type: 'thought', content: 'Thinking...', timestamp: 0 },
627
+ { type: 'tool_call', name: 'Read', status: 'completed', timestamp: 50 },
628
+ { type: 'plan', entries: [], timestamp: 100 },
629
+ { type: 'message', content: 'Done', timestamp: 150 },
630
+ ]
631
+
632
+ expect(detectTrajectoryRichness(trajectory)).toBe('full')
633
+ })
634
+ })
635
+
636
+ // ============================================================================
637
+ // extractTokenCounts
638
+ // ============================================================================
639
+
640
+ describe('extractTokenCounts', () => {
641
+ test('returns undefined when no usage data present', () => {
642
+ const updates: SessionNotification[] = [
643
+ {
644
+ sessionId: 's1',
645
+ update: { sessionUpdate: 'agent_message_chunk', content: { type: 'text', text: 'Hello' } },
646
+ },
647
+ ]
648
+
649
+ const result = extractTokenCounts(updates)
650
+
651
+ expect(result.inputTokens).toBeUndefined()
652
+ expect(result.outputTokens).toBeUndefined()
653
+ })
654
+
655
+ test('extracts token counts from usage field when present', () => {
656
+ const updates: SessionNotification[] = [
657
+ {
658
+ sessionId: 's1',
659
+ update: { sessionUpdate: 'agent_message_chunk', content: { type: 'text', text: 'Hello' } },
660
+ // @ts-expect-error - SessionNotification type doesn't include 'usage' field, but adapters like Claude Code add it at runtime
661
+ usage: { inputTokens: 50, outputTokens: 30 },
662
+ },
663
+ ]
664
+
665
+ const result = extractTokenCounts(updates)
666
+
667
+ expect(result.inputTokens).toBe(50)
668
+ expect(result.outputTokens).toBe(30)
669
+ })
670
+
671
+ test('accumulates token counts across multiple updates', () => {
672
+ const updates: SessionNotification[] = [
673
+ {
674
+ sessionId: 's1',
675
+ update: { sessionUpdate: 'agent_message_chunk', content: { type: 'text', text: 'First' } },
676
+ // @ts-expect-error - SessionNotification type doesn't include 'usage' field, but adapters like Claude Code add it at runtime
677
+ usage: { inputTokens: 50, outputTokens: 30 },
678
+ },
679
+ {
680
+ sessionId: 's1',
681
+ update: { sessionUpdate: 'agent_message_chunk', content: { type: 'text', text: 'Second' } },
682
+ // @ts-expect-error - SessionNotification type doesn't include 'usage' field, but adapters like Claude Code add it at runtime
683
+ usage: { inputTokens: 25, outputTokens: 45 },
684
+ },
685
+ ]
686
+
687
+ const result = extractTokenCounts(updates)
688
+
689
+ expect(result.inputTokens).toBe(75) // 50 + 25
690
+ expect(result.outputTokens).toBe(75) // 30 + 45
691
+ })
692
+
693
+ test('handles empty updates array', () => {
694
+ const result = extractTokenCounts([])
695
+
696
+ expect(result.inputTokens).toBeUndefined()
697
+ expect(result.outputTokens).toBeUndefined()
698
+ })
699
+
700
+ test('handles partial token counts (only input or output)', () => {
701
+ const updates: SessionNotification[] = [
702
+ {
703
+ sessionId: 's1',
704
+ update: { sessionUpdate: 'agent_message_chunk', content: { type: 'text', text: 'Hello' } },
705
+ // @ts-expect-error - SessionNotification type doesn't include 'usage' field, but adapters like Claude Code add it at runtime
706
+ usage: { inputTokens: 100 },
707
+ },
708
+ ]
709
+
710
+ const result = extractTokenCounts(updates)
711
+
712
+ expect(result.inputTokens).toBe(100)
713
+ expect(result.outputTokens).toBeUndefined()
714
+ })
715
+ })
@@ -0,0 +1,121 @@
1
+ import { describe, expect, test } from 'bun:test'
2
+ import {
3
+ ACP_METHODS,
4
+ ACP_PROTOCOL_VERSION,
5
+ DEFAULT_ACP_CLIENT_NAME,
6
+ DEFAULT_ACP_TIMEOUT,
7
+ DEFAULT_CALIBRATION_SAMPLE_SIZE,
8
+ DEFAULT_HARNESS_TIMEOUT,
9
+ DEFAULT_POLLING_INTERVAL,
10
+ DEFAULT_TRIAL_COUNT,
11
+ HEAD_LINES,
12
+ JSON_RPC_ERRORS,
13
+ MAX_CONTENT_LENGTH,
14
+ TAIL_LINES,
15
+ } from '../constants.ts'
16
+
17
+ // ============================================================================
18
+ // ACP Protocol Constants
19
+ // ============================================================================
20
+
21
+ describe('ACP_METHODS', () => {
22
+ test('contains all required lifecycle methods', () => {
23
+ expect(ACP_METHODS.INITIALIZE).toBe('initialize')
24
+ expect(ACP_METHODS.SHUTDOWN).toBe('shutdown')
25
+ })
26
+
27
+ test('contains all required session methods', () => {
28
+ expect(ACP_METHODS.CREATE_SESSION).toBe('session/new')
29
+ expect(ACP_METHODS.LOAD_SESSION).toBe('session/load')
30
+ expect(ACP_METHODS.PROMPT).toBe('session/prompt')
31
+ expect(ACP_METHODS.CANCEL).toBe('session/cancel')
32
+ expect(ACP_METHODS.UPDATE).toBe('session/update')
33
+ expect(ACP_METHODS.REQUEST_PERMISSION).toBe('session/request_permission')
34
+ expect(ACP_METHODS.SET_MODEL).toBe('session/set_model')
35
+ })
36
+
37
+ test('contains protocol-level methods', () => {
38
+ expect(ACP_METHODS.CANCEL_REQUEST).toBe('$/cancel_request')
39
+ })
40
+ })
41
+
42
+ describe('ACP_PROTOCOL_VERSION', () => {
43
+ test('is version 1', () => {
44
+ expect(ACP_PROTOCOL_VERSION).toBe(1)
45
+ })
46
+ })
47
+
48
+ // ============================================================================
49
+ // JSON-RPC Error Codes
50
+ // ============================================================================
51
+
52
+ describe('JSON_RPC_ERRORS', () => {
53
+ test('contains standard JSON-RPC error codes', () => {
54
+ expect(JSON_RPC_ERRORS.PARSE_ERROR).toBe(-32700)
55
+ expect(JSON_RPC_ERRORS.INVALID_REQUEST).toBe(-32600)
56
+ expect(JSON_RPC_ERRORS.METHOD_NOT_FOUND).toBe(-32601)
57
+ expect(JSON_RPC_ERRORS.INVALID_PARAMS).toBe(-32602)
58
+ expect(JSON_RPC_ERRORS.INTERNAL_ERROR).toBe(-32603)
59
+ })
60
+
61
+ test('contains ACP extension error codes', () => {
62
+ expect(JSON_RPC_ERRORS.REQUEST_CANCELLED).toBe(-32800)
63
+ })
64
+ })
65
+
66
+ // ============================================================================
67
+ // ACP Client Defaults
68
+ // ============================================================================
69
+
70
+ describe('ACP Client defaults', () => {
71
+ test('DEFAULT_ACP_CLIENT_NAME is set', () => {
72
+ expect(DEFAULT_ACP_CLIENT_NAME).toBe('plaited-acp-client')
73
+ })
74
+
75
+ test('DEFAULT_ACP_TIMEOUT is 30 seconds', () => {
76
+ expect(DEFAULT_ACP_TIMEOUT).toBe(30000)
77
+ })
78
+
79
+ test('DEFAULT_POLLING_INTERVAL is 50ms', () => {
80
+ expect(DEFAULT_POLLING_INTERVAL).toBe(50)
81
+ })
82
+ })
83
+
84
+ // ============================================================================
85
+ // Harness Preview Configuration
86
+ // ============================================================================
87
+
88
+ describe('Preview configuration', () => {
89
+ test('HEAD_LINES is positive', () => {
90
+ expect(HEAD_LINES).toBeGreaterThan(0)
91
+ expect(HEAD_LINES).toBe(8)
92
+ })
93
+
94
+ test('TAIL_LINES is positive', () => {
95
+ expect(TAIL_LINES).toBeGreaterThan(0)
96
+ expect(TAIL_LINES).toBe(4)
97
+ })
98
+
99
+ test('MAX_CONTENT_LENGTH is reasonable', () => {
100
+ expect(MAX_CONTENT_LENGTH).toBeGreaterThan(0)
101
+ expect(MAX_CONTENT_LENGTH).toBe(500)
102
+ })
103
+ })
104
+
105
+ // ============================================================================
106
+ // Harness Defaults
107
+ // ============================================================================
108
+
109
+ describe('Harness defaults', () => {
110
+ test('DEFAULT_HARNESS_TIMEOUT is 60 seconds', () => {
111
+ expect(DEFAULT_HARNESS_TIMEOUT).toBe(60000)
112
+ })
113
+
114
+ test('DEFAULT_TRIAL_COUNT is 5', () => {
115
+ expect(DEFAULT_TRIAL_COUNT).toBe(5)
116
+ })
117
+
118
+ test('DEFAULT_CALIBRATION_SAMPLE_SIZE is 10', () => {
119
+ expect(DEFAULT_CALIBRATION_SAMPLE_SIZE).toBe(10)
120
+ })
121
+ })
@@ -10,10 +10,10 @@ def main():
10
10
  data = json.load(sys.stdin)
11
11
 
12
12
  output = data.get("output", "").lower()
13
- expected = (data.get("expected") or "").lower()
13
+ hint = (data.get("hint") or "").lower()
14
14
 
15
- if expected:
16
- pass_result = expected in output
15
+ if hint:
16
+ pass_result = hint in output
17
17
  else:
18
18
  pass_result = True
19
19
 
@@ -4,8 +4,8 @@
4
4
 
5
5
  import type { Grader } from '../../schemas.ts'
6
6
 
7
- export const grade: Grader = async ({ input: _input, output, expected }) => {
8
- const pass = expected ? output.toLowerCase().includes(expected.toLowerCase()) : true
7
+ export const grade: Grader = async ({ input: _input, output, hint }) => {
8
+ const pass = hint ? output.toLowerCase().includes(hint.toLowerCase()) : true
9
9
  return {
10
10
  pass,
11
11
  score: pass ? 1.0 : 0.0,
@@ -15,7 +15,7 @@ describe('loadGrader - module graders', () => {
15
15
  const result = await grader({
16
16
  input: 'What is 2+2?',
17
17
  output: 'The answer is 4',
18
- expected: '4',
18
+ hint: '4',
19
19
  })
20
20
 
21
21
  expect(result.pass).toBe(true)
@@ -45,7 +45,7 @@ describe('loadGrader - executable graders', () => {
45
45
  const result = await grader({
46
46
  input: 'What is 2+2?',
47
47
  output: 'The answer is 4',
48
- expected: '4',
48
+ hint: '4',
49
49
  })
50
50
 
51
51
  expect(result.pass).toBe(true)
@@ -59,7 +59,7 @@ describe('loadGrader - executable graders', () => {
59
59
  const result = await grader({
60
60
  input: 'What is 2+2?',
61
61
  output: 'I do not know',
62
- expected: '4',
62
+ hint: '4',
63
63
  })
64
64
 
65
65
  expect(result.pass).toBe(false)
@@ -126,7 +126,7 @@ describe('loadGrader - trajectory support', () => {
126
126
  const result = await grader({
127
127
  input: 'test',
128
128
  output: 'The answer is 4',
129
- expected: '4',
129
+ hint: '4',
130
130
  trajectory,
131
131
  })
132
132
 
@@ -144,7 +144,7 @@ describe('loadGrader - trajectory support', () => {
144
144
  const result = await grader({
145
145
  input: 'test',
146
146
  output: 'The answer is 4',
147
- expected: '4',
147
+ hint: '4',
148
148
  trajectory,
149
149
  })
150
150