@agentv/eval 2.6.0 → 2.7.1-next.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -46,6 +46,9 @@ declare const TraceSummarySchema: z.ZodObject<{
46
46
  costUsd: z.ZodOptional<z.ZodNumber>;
47
47
  durationMs: z.ZodOptional<z.ZodNumber>;
48
48
  toolDurations: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodNumber, "many">>>;
49
+ startTime: z.ZodOptional<z.ZodString>;
50
+ endTime: z.ZodOptional<z.ZodString>;
51
+ llmCallCount: z.ZodOptional<z.ZodNumber>;
49
52
  }, "strip", z.ZodTypeAny, {
50
53
  eventCount: number;
51
54
  toolNames: string[];
@@ -59,6 +62,9 @@ declare const TraceSummarySchema: z.ZodObject<{
59
62
  costUsd?: number | undefined;
60
63
  durationMs?: number | undefined;
61
64
  toolDurations?: Record<string, number[]> | undefined;
65
+ startTime?: string | undefined;
66
+ endTime?: string | undefined;
67
+ llmCallCount?: number | undefined;
62
68
  }, {
63
69
  eventCount: number;
64
70
  toolNames: string[];
@@ -72,6 +78,9 @@ declare const TraceSummarySchema: z.ZodObject<{
72
78
  costUsd?: number | undefined;
73
79
  durationMs?: number | undefined;
74
80
  toolDurations?: Record<string, number[]> | undefined;
81
+ startTime?: string | undefined;
82
+ endTime?: string | undefined;
83
+ llmCallCount?: number | undefined;
75
84
  }>;
76
85
  /**
77
86
  * Tool call schema.
@@ -81,19 +90,25 @@ declare const ToolCallSchema: z.ZodObject<{
81
90
  input: z.ZodOptional<z.ZodUnknown>;
82
91
  output: z.ZodOptional<z.ZodUnknown>;
83
92
  id: z.ZodOptional<z.ZodString>;
84
- timestamp: z.ZodOptional<z.ZodString>;
93
+ startTime: z.ZodOptional<z.ZodString>;
94
+ endTime: z.ZodOptional<z.ZodString>;
95
+ durationMs: z.ZodOptional<z.ZodNumber>;
85
96
  }, "strip", z.ZodTypeAny, {
86
97
  tool: string;
87
98
  input?: unknown;
88
99
  output?: unknown;
100
+ durationMs?: number | undefined;
101
+ startTime?: string | undefined;
102
+ endTime?: string | undefined;
89
103
  id?: string | undefined;
90
- timestamp?: string | undefined;
91
104
  }, {
92
105
  tool: string;
93
106
  input?: unknown;
94
107
  output?: unknown;
108
+ durationMs?: number | undefined;
109
+ startTime?: string | undefined;
110
+ endTime?: string | undefined;
95
111
  id?: string | undefined;
96
- timestamp?: string | undefined;
97
112
  }>;
98
113
  /**
99
114
  * Unified message schema for input, expected, and output messages.
@@ -106,46 +121,62 @@ declare const MessageSchema: z.ZodObject<{
106
121
  input: z.ZodOptional<z.ZodUnknown>;
107
122
  output: z.ZodOptional<z.ZodUnknown>;
108
123
  id: z.ZodOptional<z.ZodString>;
109
- timestamp: z.ZodOptional<z.ZodString>;
124
+ startTime: z.ZodOptional<z.ZodString>;
125
+ endTime: z.ZodOptional<z.ZodString>;
126
+ durationMs: z.ZodOptional<z.ZodNumber>;
110
127
  }, "strip", z.ZodTypeAny, {
111
128
  tool: string;
112
129
  input?: unknown;
113
130
  output?: unknown;
131
+ durationMs?: number | undefined;
132
+ startTime?: string | undefined;
133
+ endTime?: string | undefined;
114
134
  id?: string | undefined;
115
- timestamp?: string | undefined;
116
135
  }, {
117
136
  tool: string;
118
137
  input?: unknown;
119
138
  output?: unknown;
139
+ durationMs?: number | undefined;
140
+ startTime?: string | undefined;
141
+ endTime?: string | undefined;
120
142
  id?: string | undefined;
121
- timestamp?: string | undefined;
122
143
  }>, "many">>;
123
144
  name: z.ZodOptional<z.ZodString>;
124
- timestamp: z.ZodOptional<z.ZodString>;
145
+ startTime: z.ZodOptional<z.ZodString>;
146
+ endTime: z.ZodOptional<z.ZodString>;
147
+ durationMs: z.ZodOptional<z.ZodNumber>;
125
148
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
126
149
  }, "strip", z.ZodTypeAny, {
127
150
  role: "tool" | "assistant" | "user" | "system";
128
- timestamp?: string | undefined;
151
+ durationMs?: number | undefined;
152
+ startTime?: string | undefined;
153
+ endTime?: string | undefined;
129
154
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
130
155
  toolCalls?: {
131
156
  tool: string;
132
157
  input?: unknown;
133
158
  output?: unknown;
159
+ durationMs?: number | undefined;
160
+ startTime?: string | undefined;
161
+ endTime?: string | undefined;
134
162
  id?: string | undefined;
135
- timestamp?: string | undefined;
136
163
  }[] | undefined;
137
164
  name?: string | undefined;
138
165
  metadata?: Record<string, unknown> | undefined;
139
166
  }, {
140
167
  role: "tool" | "assistant" | "user" | "system";
141
- timestamp?: string | undefined;
168
+ durationMs?: number | undefined;
169
+ startTime?: string | undefined;
170
+ endTime?: string | undefined;
142
171
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
143
172
  toolCalls?: {
144
173
  tool: string;
145
174
  input?: unknown;
146
175
  output?: unknown;
176
+ durationMs?: number | undefined;
177
+ startTime?: string | undefined;
178
+ endTime?: string | undefined;
147
179
  id?: string | undefined;
148
- timestamp?: string | undefined;
149
180
  }[] | undefined;
150
181
  name?: string | undefined;
151
182
  metadata?: Record<string, unknown> | undefined;
@@ -155,8 +186,8 @@ declare const MessageSchema: z.ZodObject<{
155
186
  */
156
187
  declare const CodeJudgeInputSchema: z.ZodObject<{
157
188
  question: z.ZodString;
158
- expectedOutcome: z.ZodString;
159
- expectedMessages: z.ZodArray<z.ZodObject<{
189
+ criteria: z.ZodString;
190
+ expectedOutput: z.ZodArray<z.ZodObject<{
160
191
  role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
161
192
  content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
162
193
  toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -164,53 +195,69 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
164
195
  input: z.ZodOptional<z.ZodUnknown>;
165
196
  output: z.ZodOptional<z.ZodUnknown>;
166
197
  id: z.ZodOptional<z.ZodString>;
167
- timestamp: z.ZodOptional<z.ZodString>;
198
+ startTime: z.ZodOptional<z.ZodString>;
199
+ endTime: z.ZodOptional<z.ZodString>;
200
+ durationMs: z.ZodOptional<z.ZodNumber>;
168
201
  }, "strip", z.ZodTypeAny, {
169
202
  tool: string;
170
203
  input?: unknown;
171
204
  output?: unknown;
205
+ durationMs?: number | undefined;
206
+ startTime?: string | undefined;
207
+ endTime?: string | undefined;
172
208
  id?: string | undefined;
173
- timestamp?: string | undefined;
174
209
  }, {
175
210
  tool: string;
176
211
  input?: unknown;
177
212
  output?: unknown;
213
+ durationMs?: number | undefined;
214
+ startTime?: string | undefined;
215
+ endTime?: string | undefined;
178
216
  id?: string | undefined;
179
- timestamp?: string | undefined;
180
217
  }>, "many">>;
181
218
  name: z.ZodOptional<z.ZodString>;
182
- timestamp: z.ZodOptional<z.ZodString>;
219
+ startTime: z.ZodOptional<z.ZodString>;
220
+ endTime: z.ZodOptional<z.ZodString>;
221
+ durationMs: z.ZodOptional<z.ZodNumber>;
183
222
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
184
223
  }, "strip", z.ZodTypeAny, {
185
224
  role: "tool" | "assistant" | "user" | "system";
186
- timestamp?: string | undefined;
225
+ durationMs?: number | undefined;
226
+ startTime?: string | undefined;
227
+ endTime?: string | undefined;
187
228
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
188
229
  toolCalls?: {
189
230
  tool: string;
190
231
  input?: unknown;
191
232
  output?: unknown;
233
+ durationMs?: number | undefined;
234
+ startTime?: string | undefined;
235
+ endTime?: string | undefined;
192
236
  id?: string | undefined;
193
- timestamp?: string | undefined;
194
237
  }[] | undefined;
195
238
  name?: string | undefined;
196
239
  metadata?: Record<string, unknown> | undefined;
197
240
  }, {
198
241
  role: "tool" | "assistant" | "user" | "system";
199
- timestamp?: string | undefined;
242
+ durationMs?: number | undefined;
243
+ startTime?: string | undefined;
244
+ endTime?: string | undefined;
200
245
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
201
246
  toolCalls?: {
202
247
  tool: string;
203
248
  input?: unknown;
204
249
  output?: unknown;
250
+ durationMs?: number | undefined;
251
+ startTime?: string | undefined;
252
+ endTime?: string | undefined;
205
253
  id?: string | undefined;
206
- timestamp?: string | undefined;
207
254
  }[] | undefined;
208
255
  name?: string | undefined;
209
256
  metadata?: Record<string, unknown> | undefined;
210
257
  }>, "many">;
211
258
  referenceAnswer: z.ZodOptional<z.ZodString>;
212
- candidateAnswer: z.ZodString;
213
- outputMessages: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodObject<{
259
+ answer: z.ZodString;
260
+ output: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodObject<{
214
261
  role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
215
262
  content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
216
263
  toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -218,53 +265,71 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
218
265
  input: z.ZodOptional<z.ZodUnknown>;
219
266
  output: z.ZodOptional<z.ZodUnknown>;
220
267
  id: z.ZodOptional<z.ZodString>;
221
- timestamp: z.ZodOptional<z.ZodString>;
268
+ startTime: z.ZodOptional<z.ZodString>;
269
+ endTime: z.ZodOptional<z.ZodString>;
270
+ durationMs: z.ZodOptional<z.ZodNumber>;
222
271
  }, "strip", z.ZodTypeAny, {
223
272
  tool: string;
224
273
  input?: unknown;
225
274
  output?: unknown;
275
+ durationMs?: number | undefined;
276
+ startTime?: string | undefined;
277
+ endTime?: string | undefined;
226
278
  id?: string | undefined;
227
- timestamp?: string | undefined;
228
279
  }, {
229
280
  tool: string;
230
281
  input?: unknown;
231
282
  output?: unknown;
283
+ durationMs?: number | undefined;
284
+ startTime?: string | undefined;
285
+ endTime?: string | undefined;
232
286
  id?: string | undefined;
233
- timestamp?: string | undefined;
234
287
  }>, "many">>;
235
288
  name: z.ZodOptional<z.ZodString>;
236
- timestamp: z.ZodOptional<z.ZodString>;
289
+ startTime: z.ZodOptional<z.ZodString>;
290
+ endTime: z.ZodOptional<z.ZodString>;
291
+ durationMs: z.ZodOptional<z.ZodNumber>;
237
292
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
238
293
  }, "strip", z.ZodTypeAny, {
239
294
  role: "tool" | "assistant" | "user" | "system";
240
- timestamp?: string | undefined;
295
+ durationMs?: number | undefined;
296
+ startTime?: string | undefined;
297
+ endTime?: string | undefined;
241
298
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
242
299
  toolCalls?: {
243
300
  tool: string;
244
301
  input?: unknown;
245
302
  output?: unknown;
303
+ durationMs?: number | undefined;
304
+ startTime?: string | undefined;
305
+ endTime?: string | undefined;
246
306
  id?: string | undefined;
247
- timestamp?: string | undefined;
248
307
  }[] | undefined;
249
308
  name?: string | undefined;
250
309
  metadata?: Record<string, unknown> | undefined;
251
310
  }, {
252
311
  role: "tool" | "assistant" | "user" | "system";
253
- timestamp?: string | undefined;
312
+ durationMs?: number | undefined;
313
+ startTime?: string | undefined;
314
+ endTime?: string | undefined;
254
315
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
255
316
  toolCalls?: {
256
317
  tool: string;
257
318
  input?: unknown;
258
319
  output?: unknown;
320
+ durationMs?: number | undefined;
321
+ startTime?: string | undefined;
322
+ endTime?: string | undefined;
259
323
  id?: string | undefined;
260
- timestamp?: string | undefined;
261
324
  }[] | undefined;
262
325
  name?: string | undefined;
263
326
  metadata?: Record<string, unknown> | undefined;
264
327
  }>, "many">>>;
328
+ /** Path to a temp file containing the output JSON (used for large payloads). */
329
+ outputPath: z.ZodOptional<z.ZodString>;
265
330
  guidelineFiles: z.ZodArray<z.ZodString, "many">;
266
331
  inputFiles: z.ZodArray<z.ZodString, "many">;
267
- inputMessages: z.ZodArray<z.ZodObject<{
332
+ input: z.ZodArray<z.ZodObject<{
268
333
  role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
269
334
  content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
270
335
  toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -272,51 +337,67 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
272
337
  input: z.ZodOptional<z.ZodUnknown>;
273
338
  output: z.ZodOptional<z.ZodUnknown>;
274
339
  id: z.ZodOptional<z.ZodString>;
275
- timestamp: z.ZodOptional<z.ZodString>;
340
+ startTime: z.ZodOptional<z.ZodString>;
341
+ endTime: z.ZodOptional<z.ZodString>;
342
+ durationMs: z.ZodOptional<z.ZodNumber>;
276
343
  }, "strip", z.ZodTypeAny, {
277
344
  tool: string;
278
345
  input?: unknown;
279
346
  output?: unknown;
347
+ durationMs?: number | undefined;
348
+ startTime?: string | undefined;
349
+ endTime?: string | undefined;
280
350
  id?: string | undefined;
281
- timestamp?: string | undefined;
282
351
  }, {
283
352
  tool: string;
284
353
  input?: unknown;
285
354
  output?: unknown;
355
+ durationMs?: number | undefined;
356
+ startTime?: string | undefined;
357
+ endTime?: string | undefined;
286
358
  id?: string | undefined;
287
- timestamp?: string | undefined;
288
359
  }>, "many">>;
289
360
  name: z.ZodOptional<z.ZodString>;
290
- timestamp: z.ZodOptional<z.ZodString>;
361
+ startTime: z.ZodOptional<z.ZodString>;
362
+ endTime: z.ZodOptional<z.ZodString>;
363
+ durationMs: z.ZodOptional<z.ZodNumber>;
291
364
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
292
365
  }, "strip", z.ZodTypeAny, {
293
366
  role: "tool" | "assistant" | "user" | "system";
294
- timestamp?: string | undefined;
367
+ durationMs?: number | undefined;
368
+ startTime?: string | undefined;
369
+ endTime?: string | undefined;
295
370
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
296
371
  toolCalls?: {
297
372
  tool: string;
298
373
  input?: unknown;
299
374
  output?: unknown;
375
+ durationMs?: number | undefined;
376
+ startTime?: string | undefined;
377
+ endTime?: string | undefined;
300
378
  id?: string | undefined;
301
- timestamp?: string | undefined;
302
379
  }[] | undefined;
303
380
  name?: string | undefined;
304
381
  metadata?: Record<string, unknown> | undefined;
305
382
  }, {
306
383
  role: "tool" | "assistant" | "user" | "system";
307
- timestamp?: string | undefined;
384
+ durationMs?: number | undefined;
385
+ startTime?: string | undefined;
386
+ endTime?: string | undefined;
308
387
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
309
388
  toolCalls?: {
310
389
  tool: string;
311
390
  input?: unknown;
312
391
  output?: unknown;
392
+ durationMs?: number | undefined;
393
+ startTime?: string | undefined;
394
+ endTime?: string | undefined;
313
395
  id?: string | undefined;
314
- timestamp?: string | undefined;
315
396
  }[] | undefined;
316
397
  name?: string | undefined;
317
398
  metadata?: Record<string, unknown> | undefined;
318
399
  }>, "many">;
319
- traceSummary: z.ZodOptional<z.ZodNullable<z.ZodObject<{
400
+ trace: z.ZodOptional<z.ZodNullable<z.ZodObject<{
320
401
  eventCount: z.ZodNumber;
321
402
  toolNames: z.ZodArray<z.ZodString, "many">;
322
403
  toolCallsByName: z.ZodRecord<z.ZodString, z.ZodNumber>;
@@ -337,6 +418,9 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
337
418
  costUsd: z.ZodOptional<z.ZodNumber>;
338
419
  durationMs: z.ZodOptional<z.ZodNumber>;
339
420
  toolDurations: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodNumber, "many">>>;
421
+ startTime: z.ZodOptional<z.ZodString>;
422
+ endTime: z.ZodOptional<z.ZodString>;
423
+ llmCallCount: z.ZodOptional<z.ZodNumber>;
340
424
  }, "strip", z.ZodTypeAny, {
341
425
  eventCount: number;
342
426
  toolNames: string[];
@@ -350,6 +434,9 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
350
434
  costUsd?: number | undefined;
351
435
  durationMs?: number | undefined;
352
436
  toolDurations?: Record<string, number[]> | undefined;
437
+ startTime?: string | undefined;
438
+ endTime?: string | undefined;
439
+ llmCallCount?: number | undefined;
353
440
  }, {
354
441
  eventCount: number;
355
442
  toolNames: string[];
@@ -363,58 +450,76 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
363
450
  costUsd?: number | undefined;
364
451
  durationMs?: number | undefined;
365
452
  toolDurations?: Record<string, number[]> | undefined;
453
+ startTime?: string | undefined;
454
+ endTime?: string | undefined;
455
+ llmCallCount?: number | undefined;
366
456
  }>>>;
457
+ fileChanges: z.ZodOptional<z.ZodNullable<z.ZodString>>;
458
+ workspacePath: z.ZodOptional<z.ZodNullable<z.ZodString>>;
367
459
  config: z.ZodOptional<z.ZodNullable<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
368
460
  }, "strip", z.ZodTypeAny, {
369
- question: string;
370
- expectedOutcome: string;
371
- expectedMessages: {
461
+ input: {
372
462
  role: "tool" | "assistant" | "user" | "system";
373
- timestamp?: string | undefined;
463
+ durationMs?: number | undefined;
464
+ startTime?: string | undefined;
465
+ endTime?: string | undefined;
374
466
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
375
467
  toolCalls?: {
376
468
  tool: string;
377
469
  input?: unknown;
378
470
  output?: unknown;
471
+ durationMs?: number | undefined;
472
+ startTime?: string | undefined;
473
+ endTime?: string | undefined;
379
474
  id?: string | undefined;
380
- timestamp?: string | undefined;
381
475
  }[] | undefined;
382
476
  name?: string | undefined;
383
477
  metadata?: Record<string, unknown> | undefined;
384
478
  }[];
385
- candidateAnswer: string;
386
- guidelineFiles: string[];
387
- inputFiles: string[];
388
- inputMessages: {
479
+ question: string;
480
+ criteria: string;
481
+ expectedOutput: {
389
482
  role: "tool" | "assistant" | "user" | "system";
390
- timestamp?: string | undefined;
483
+ durationMs?: number | undefined;
484
+ startTime?: string | undefined;
485
+ endTime?: string | undefined;
391
486
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
392
487
  toolCalls?: {
393
488
  tool: string;
394
489
  input?: unknown;
395
490
  output?: unknown;
491
+ durationMs?: number | undefined;
492
+ startTime?: string | undefined;
493
+ endTime?: string | undefined;
396
494
  id?: string | undefined;
397
- timestamp?: string | undefined;
398
495
  }[] | undefined;
399
496
  name?: string | undefined;
400
497
  metadata?: Record<string, unknown> | undefined;
401
498
  }[];
402
- referenceAnswer?: string | undefined;
403
- outputMessages?: {
499
+ answer: string;
500
+ guidelineFiles: string[];
501
+ inputFiles: string[];
502
+ output?: {
404
503
  role: "tool" | "assistant" | "user" | "system";
405
- timestamp?: string | undefined;
504
+ durationMs?: number | undefined;
505
+ startTime?: string | undefined;
506
+ endTime?: string | undefined;
406
507
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
407
508
  toolCalls?: {
408
509
  tool: string;
409
510
  input?: unknown;
410
511
  output?: unknown;
512
+ durationMs?: number | undefined;
513
+ startTime?: string | undefined;
514
+ endTime?: string | undefined;
411
515
  id?: string | undefined;
412
- timestamp?: string | undefined;
413
516
  }[] | undefined;
414
517
  name?: string | undefined;
415
518
  metadata?: Record<string, unknown> | undefined;
416
519
  }[] | null | undefined;
417
- traceSummary?: {
520
+ referenceAnswer?: string | undefined;
521
+ outputPath?: string | undefined;
522
+ trace?: {
418
523
  eventCount: number;
419
524
  toolNames: string[];
420
525
  toolCallsByName: Record<string, number>;
@@ -427,58 +532,76 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
427
532
  costUsd?: number | undefined;
428
533
  durationMs?: number | undefined;
429
534
  toolDurations?: Record<string, number[]> | undefined;
535
+ startTime?: string | undefined;
536
+ endTime?: string | undefined;
537
+ llmCallCount?: number | undefined;
430
538
  } | null | undefined;
539
+ fileChanges?: string | null | undefined;
540
+ workspacePath?: string | null | undefined;
431
541
  config?: Record<string, unknown> | null | undefined;
432
542
  }, {
433
- question: string;
434
- expectedOutcome: string;
435
- expectedMessages: {
543
+ input: {
436
544
  role: "tool" | "assistant" | "user" | "system";
437
- timestamp?: string | undefined;
545
+ durationMs?: number | undefined;
546
+ startTime?: string | undefined;
547
+ endTime?: string | undefined;
438
548
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
439
549
  toolCalls?: {
440
550
  tool: string;
441
551
  input?: unknown;
442
552
  output?: unknown;
553
+ durationMs?: number | undefined;
554
+ startTime?: string | undefined;
555
+ endTime?: string | undefined;
443
556
  id?: string | undefined;
444
- timestamp?: string | undefined;
445
557
  }[] | undefined;
446
558
  name?: string | undefined;
447
559
  metadata?: Record<string, unknown> | undefined;
448
560
  }[];
449
- candidateAnswer: string;
450
- guidelineFiles: string[];
451
- inputFiles: string[];
452
- inputMessages: {
561
+ question: string;
562
+ criteria: string;
563
+ expectedOutput: {
453
564
  role: "tool" | "assistant" | "user" | "system";
454
- timestamp?: string | undefined;
565
+ durationMs?: number | undefined;
566
+ startTime?: string | undefined;
567
+ endTime?: string | undefined;
455
568
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
456
569
  toolCalls?: {
457
570
  tool: string;
458
571
  input?: unknown;
459
572
  output?: unknown;
573
+ durationMs?: number | undefined;
574
+ startTime?: string | undefined;
575
+ endTime?: string | undefined;
460
576
  id?: string | undefined;
461
- timestamp?: string | undefined;
462
577
  }[] | undefined;
463
578
  name?: string | undefined;
464
579
  metadata?: Record<string, unknown> | undefined;
465
580
  }[];
466
- referenceAnswer?: string | undefined;
467
- outputMessages?: {
581
+ answer: string;
582
+ guidelineFiles: string[];
583
+ inputFiles: string[];
584
+ output?: {
468
585
  role: "tool" | "assistant" | "user" | "system";
469
- timestamp?: string | undefined;
586
+ durationMs?: number | undefined;
587
+ startTime?: string | undefined;
588
+ endTime?: string | undefined;
470
589
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
471
590
  toolCalls?: {
472
591
  tool: string;
473
592
  input?: unknown;
474
593
  output?: unknown;
594
+ durationMs?: number | undefined;
595
+ startTime?: string | undefined;
596
+ endTime?: string | undefined;
475
597
  id?: string | undefined;
476
- timestamp?: string | undefined;
477
598
  }[] | undefined;
478
599
  name?: string | undefined;
479
600
  metadata?: Record<string, unknown> | undefined;
480
601
  }[] | null | undefined;
481
- traceSummary?: {
602
+ referenceAnswer?: string | undefined;
603
+ outputPath?: string | undefined;
604
+ trace?: {
482
605
  eventCount: number;
483
606
  toolNames: string[];
484
607
  toolCallsByName: Record<string, number>;
@@ -491,7 +614,12 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
491
614
  costUsd?: number | undefined;
492
615
  durationMs?: number | undefined;
493
616
  toolDurations?: Record<string, number[]> | undefined;
617
+ startTime?: string | undefined;
618
+ endTime?: string | undefined;
619
+ llmCallCount?: number | undefined;
494
620
  } | null | undefined;
621
+ fileChanges?: string | null | undefined;
622
+ workspacePath?: string | null | undefined;
495
623
  config?: Record<string, unknown> | null | undefined;
496
624
  }>;
497
625
  /**
@@ -532,8 +660,8 @@ type TokenUsage = z.infer<typeof TokenUsageSchema>;
532
660
  */
533
661
  declare const PromptTemplateInputSchema: z.ZodObject<{
534
662
  question: z.ZodString;
535
- expectedOutcome: z.ZodString;
536
- expectedMessages: z.ZodArray<z.ZodObject<{
663
+ criteria: z.ZodString;
664
+ expectedOutput: z.ZodArray<z.ZodObject<{
537
665
  role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
538
666
  content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
539
667
  toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -541,53 +669,69 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
541
669
  input: z.ZodOptional<z.ZodUnknown>;
542
670
  output: z.ZodOptional<z.ZodUnknown>;
543
671
  id: z.ZodOptional<z.ZodString>;
544
- timestamp: z.ZodOptional<z.ZodString>;
672
+ startTime: z.ZodOptional<z.ZodString>;
673
+ endTime: z.ZodOptional<z.ZodString>;
674
+ durationMs: z.ZodOptional<z.ZodNumber>;
545
675
  }, "strip", z.ZodTypeAny, {
546
676
  tool: string;
547
677
  input?: unknown;
548
678
  output?: unknown;
679
+ durationMs?: number | undefined;
680
+ startTime?: string | undefined;
681
+ endTime?: string | undefined;
549
682
  id?: string | undefined;
550
- timestamp?: string | undefined;
551
683
  }, {
552
684
  tool: string;
553
685
  input?: unknown;
554
686
  output?: unknown;
687
+ durationMs?: number | undefined;
688
+ startTime?: string | undefined;
689
+ endTime?: string | undefined;
555
690
  id?: string | undefined;
556
- timestamp?: string | undefined;
557
691
  }>, "many">>;
558
692
  name: z.ZodOptional<z.ZodString>;
559
- timestamp: z.ZodOptional<z.ZodString>;
693
+ startTime: z.ZodOptional<z.ZodString>;
694
+ endTime: z.ZodOptional<z.ZodString>;
695
+ durationMs: z.ZodOptional<z.ZodNumber>;
560
696
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
561
697
  }, "strip", z.ZodTypeAny, {
562
698
  role: "tool" | "assistant" | "user" | "system";
563
- timestamp?: string | undefined;
699
+ durationMs?: number | undefined;
700
+ startTime?: string | undefined;
701
+ endTime?: string | undefined;
564
702
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
565
703
  toolCalls?: {
566
704
  tool: string;
567
705
  input?: unknown;
568
706
  output?: unknown;
707
+ durationMs?: number | undefined;
708
+ startTime?: string | undefined;
709
+ endTime?: string | undefined;
569
710
  id?: string | undefined;
570
- timestamp?: string | undefined;
571
711
  }[] | undefined;
572
712
  name?: string | undefined;
573
713
  metadata?: Record<string, unknown> | undefined;
574
714
  }, {
575
715
  role: "tool" | "assistant" | "user" | "system";
576
- timestamp?: string | undefined;
716
+ durationMs?: number | undefined;
717
+ startTime?: string | undefined;
718
+ endTime?: string | undefined;
577
719
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
578
720
  toolCalls?: {
579
721
  tool: string;
580
722
  input?: unknown;
581
723
  output?: unknown;
724
+ durationMs?: number | undefined;
725
+ startTime?: string | undefined;
726
+ endTime?: string | undefined;
582
727
  id?: string | undefined;
583
- timestamp?: string | undefined;
584
728
  }[] | undefined;
585
729
  name?: string | undefined;
586
730
  metadata?: Record<string, unknown> | undefined;
587
731
  }>, "many">;
588
732
  referenceAnswer: z.ZodOptional<z.ZodString>;
589
- candidateAnswer: z.ZodString;
590
- outputMessages: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodObject<{
733
+ answer: z.ZodString;
734
+ output: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodObject<{
591
735
  role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
592
736
  content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
593
737
  toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -595,53 +739,71 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
595
739
  input: z.ZodOptional<z.ZodUnknown>;
596
740
  output: z.ZodOptional<z.ZodUnknown>;
597
741
  id: z.ZodOptional<z.ZodString>;
598
- timestamp: z.ZodOptional<z.ZodString>;
742
+ startTime: z.ZodOptional<z.ZodString>;
743
+ endTime: z.ZodOptional<z.ZodString>;
744
+ durationMs: z.ZodOptional<z.ZodNumber>;
599
745
  }, "strip", z.ZodTypeAny, {
600
746
  tool: string;
601
747
  input?: unknown;
602
748
  output?: unknown;
749
+ durationMs?: number | undefined;
750
+ startTime?: string | undefined;
751
+ endTime?: string | undefined;
603
752
  id?: string | undefined;
604
- timestamp?: string | undefined;
605
753
  }, {
606
754
  tool: string;
607
755
  input?: unknown;
608
756
  output?: unknown;
757
+ durationMs?: number | undefined;
758
+ startTime?: string | undefined;
759
+ endTime?: string | undefined;
609
760
  id?: string | undefined;
610
- timestamp?: string | undefined;
611
761
  }>, "many">>;
612
762
  name: z.ZodOptional<z.ZodString>;
613
- timestamp: z.ZodOptional<z.ZodString>;
763
+ startTime: z.ZodOptional<z.ZodString>;
764
+ endTime: z.ZodOptional<z.ZodString>;
765
+ durationMs: z.ZodOptional<z.ZodNumber>;
614
766
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
615
767
  }, "strip", z.ZodTypeAny, {
616
768
  role: "tool" | "assistant" | "user" | "system";
617
- timestamp?: string | undefined;
769
+ durationMs?: number | undefined;
770
+ startTime?: string | undefined;
771
+ endTime?: string | undefined;
618
772
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
619
773
  toolCalls?: {
620
774
  tool: string;
621
775
  input?: unknown;
622
776
  output?: unknown;
777
+ durationMs?: number | undefined;
778
+ startTime?: string | undefined;
779
+ endTime?: string | undefined;
623
780
  id?: string | undefined;
624
- timestamp?: string | undefined;
625
781
  }[] | undefined;
626
782
  name?: string | undefined;
627
783
  metadata?: Record<string, unknown> | undefined;
628
784
  }, {
629
785
  role: "tool" | "assistant" | "user" | "system";
630
- timestamp?: string | undefined;
786
+ durationMs?: number | undefined;
787
+ startTime?: string | undefined;
788
+ endTime?: string | undefined;
631
789
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
632
790
  toolCalls?: {
633
791
  tool: string;
634
792
  input?: unknown;
635
793
  output?: unknown;
794
+ durationMs?: number | undefined;
795
+ startTime?: string | undefined;
796
+ endTime?: string | undefined;
636
797
  id?: string | undefined;
637
- timestamp?: string | undefined;
638
798
  }[] | undefined;
639
799
  name?: string | undefined;
640
800
  metadata?: Record<string, unknown> | undefined;
641
801
  }>, "many">>>;
802
+ /** Path to a temp file containing the output JSON (used for large payloads). */
803
+ outputPath: z.ZodOptional<z.ZodString>;
642
804
  guidelineFiles: z.ZodArray<z.ZodString, "many">;
643
805
  inputFiles: z.ZodArray<z.ZodString, "many">;
644
- inputMessages: z.ZodArray<z.ZodObject<{
806
+ input: z.ZodArray<z.ZodObject<{
645
807
  role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
646
808
  content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
647
809
  toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -649,51 +811,67 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
649
811
  input: z.ZodOptional<z.ZodUnknown>;
650
812
  output: z.ZodOptional<z.ZodUnknown>;
651
813
  id: z.ZodOptional<z.ZodString>;
652
- timestamp: z.ZodOptional<z.ZodString>;
814
+ startTime: z.ZodOptional<z.ZodString>;
815
+ endTime: z.ZodOptional<z.ZodString>;
816
+ durationMs: z.ZodOptional<z.ZodNumber>;
653
817
  }, "strip", z.ZodTypeAny, {
654
818
  tool: string;
655
819
  input?: unknown;
656
820
  output?: unknown;
821
+ durationMs?: number | undefined;
822
+ startTime?: string | undefined;
823
+ endTime?: string | undefined;
657
824
  id?: string | undefined;
658
- timestamp?: string | undefined;
659
825
  }, {
660
826
  tool: string;
661
827
  input?: unknown;
662
828
  output?: unknown;
829
+ durationMs?: number | undefined;
830
+ startTime?: string | undefined;
831
+ endTime?: string | undefined;
663
832
  id?: string | undefined;
664
- timestamp?: string | undefined;
665
833
  }>, "many">>;
666
834
  name: z.ZodOptional<z.ZodString>;
667
- timestamp: z.ZodOptional<z.ZodString>;
835
+ startTime: z.ZodOptional<z.ZodString>;
836
+ endTime: z.ZodOptional<z.ZodString>;
837
+ durationMs: z.ZodOptional<z.ZodNumber>;
668
838
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
669
839
  }, "strip", z.ZodTypeAny, {
670
840
  role: "tool" | "assistant" | "user" | "system";
671
- timestamp?: string | undefined;
841
+ durationMs?: number | undefined;
842
+ startTime?: string | undefined;
843
+ endTime?: string | undefined;
672
844
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
673
845
  toolCalls?: {
674
846
  tool: string;
675
847
  input?: unknown;
676
848
  output?: unknown;
849
+ durationMs?: number | undefined;
850
+ startTime?: string | undefined;
851
+ endTime?: string | undefined;
677
852
  id?: string | undefined;
678
- timestamp?: string | undefined;
679
853
  }[] | undefined;
680
854
  name?: string | undefined;
681
855
  metadata?: Record<string, unknown> | undefined;
682
856
  }, {
683
857
  role: "tool" | "assistant" | "user" | "system";
684
- timestamp?: string | undefined;
858
+ durationMs?: number | undefined;
859
+ startTime?: string | undefined;
860
+ endTime?: string | undefined;
685
861
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
686
862
  toolCalls?: {
687
863
  tool: string;
688
864
  input?: unknown;
689
865
  output?: unknown;
866
+ durationMs?: number | undefined;
867
+ startTime?: string | undefined;
868
+ endTime?: string | undefined;
690
869
  id?: string | undefined;
691
- timestamp?: string | undefined;
692
870
  }[] | undefined;
693
871
  name?: string | undefined;
694
872
  metadata?: Record<string, unknown> | undefined;
695
873
  }>, "many">;
696
- traceSummary: z.ZodOptional<z.ZodNullable<z.ZodObject<{
874
+ trace: z.ZodOptional<z.ZodNullable<z.ZodObject<{
697
875
  eventCount: z.ZodNumber;
698
876
  toolNames: z.ZodArray<z.ZodString, "many">;
699
877
  toolCallsByName: z.ZodRecord<z.ZodString, z.ZodNumber>;
@@ -714,6 +892,9 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
714
892
  costUsd: z.ZodOptional<z.ZodNumber>;
715
893
  durationMs: z.ZodOptional<z.ZodNumber>;
716
894
  toolDurations: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodNumber, "many">>>;
895
+ startTime: z.ZodOptional<z.ZodString>;
896
+ endTime: z.ZodOptional<z.ZodString>;
897
+ llmCallCount: z.ZodOptional<z.ZodNumber>;
717
898
  }, "strip", z.ZodTypeAny, {
718
899
  eventCount: number;
719
900
  toolNames: string[];
@@ -727,6 +908,9 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
727
908
  costUsd?: number | undefined;
728
909
  durationMs?: number | undefined;
729
910
  toolDurations?: Record<string, number[]> | undefined;
911
+ startTime?: string | undefined;
912
+ endTime?: string | undefined;
913
+ llmCallCount?: number | undefined;
730
914
  }, {
731
915
  eventCount: number;
732
916
  toolNames: string[];
@@ -740,58 +924,76 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
740
924
  costUsd?: number | undefined;
741
925
  durationMs?: number | undefined;
742
926
  toolDurations?: Record<string, number[]> | undefined;
927
+ startTime?: string | undefined;
928
+ endTime?: string | undefined;
929
+ llmCallCount?: number | undefined;
743
930
  }>>>;
931
+ fileChanges: z.ZodOptional<z.ZodNullable<z.ZodString>>;
932
+ workspacePath: z.ZodOptional<z.ZodNullable<z.ZodString>>;
744
933
  config: z.ZodOptional<z.ZodNullable<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
745
934
  }, "strip", z.ZodTypeAny, {
746
- question: string;
747
- expectedOutcome: string;
748
- expectedMessages: {
935
+ input: {
749
936
  role: "tool" | "assistant" | "user" | "system";
750
- timestamp?: string | undefined;
937
+ durationMs?: number | undefined;
938
+ startTime?: string | undefined;
939
+ endTime?: string | undefined;
751
940
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
752
941
  toolCalls?: {
753
942
  tool: string;
754
943
  input?: unknown;
755
944
  output?: unknown;
945
+ durationMs?: number | undefined;
946
+ startTime?: string | undefined;
947
+ endTime?: string | undefined;
756
948
  id?: string | undefined;
757
- timestamp?: string | undefined;
758
949
  }[] | undefined;
759
950
  name?: string | undefined;
760
951
  metadata?: Record<string, unknown> | undefined;
761
952
  }[];
762
- candidateAnswer: string;
763
- guidelineFiles: string[];
764
- inputFiles: string[];
765
- inputMessages: {
953
+ question: string;
954
+ criteria: string;
955
+ expectedOutput: {
766
956
  role: "tool" | "assistant" | "user" | "system";
767
- timestamp?: string | undefined;
957
+ durationMs?: number | undefined;
958
+ startTime?: string | undefined;
959
+ endTime?: string | undefined;
768
960
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
769
961
  toolCalls?: {
770
962
  tool: string;
771
963
  input?: unknown;
772
964
  output?: unknown;
965
+ durationMs?: number | undefined;
966
+ startTime?: string | undefined;
967
+ endTime?: string | undefined;
773
968
  id?: string | undefined;
774
- timestamp?: string | undefined;
775
969
  }[] | undefined;
776
970
  name?: string | undefined;
777
971
  metadata?: Record<string, unknown> | undefined;
778
972
  }[];
779
- referenceAnswer?: string | undefined;
780
- outputMessages?: {
973
+ answer: string;
974
+ guidelineFiles: string[];
975
+ inputFiles: string[];
976
+ output?: {
781
977
  role: "tool" | "assistant" | "user" | "system";
782
- timestamp?: string | undefined;
978
+ durationMs?: number | undefined;
979
+ startTime?: string | undefined;
980
+ endTime?: string | undefined;
783
981
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
784
982
  toolCalls?: {
785
983
  tool: string;
786
984
  input?: unknown;
787
985
  output?: unknown;
986
+ durationMs?: number | undefined;
987
+ startTime?: string | undefined;
988
+ endTime?: string | undefined;
788
989
  id?: string | undefined;
789
- timestamp?: string | undefined;
790
990
  }[] | undefined;
791
991
  name?: string | undefined;
792
992
  metadata?: Record<string, unknown> | undefined;
793
993
  }[] | null | undefined;
794
- traceSummary?: {
994
+ referenceAnswer?: string | undefined;
995
+ outputPath?: string | undefined;
996
+ trace?: {
795
997
  eventCount: number;
796
998
  toolNames: string[];
797
999
  toolCallsByName: Record<string, number>;
@@ -804,58 +1006,76 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
804
1006
  costUsd?: number | undefined;
805
1007
  durationMs?: number | undefined;
806
1008
  toolDurations?: Record<string, number[]> | undefined;
1009
+ startTime?: string | undefined;
1010
+ endTime?: string | undefined;
1011
+ llmCallCount?: number | undefined;
807
1012
  } | null | undefined;
1013
+ fileChanges?: string | null | undefined;
1014
+ workspacePath?: string | null | undefined;
808
1015
  config?: Record<string, unknown> | null | undefined;
809
1016
  }, {
810
- question: string;
811
- expectedOutcome: string;
812
- expectedMessages: {
1017
+ input: {
813
1018
  role: "tool" | "assistant" | "user" | "system";
814
- timestamp?: string | undefined;
1019
+ durationMs?: number | undefined;
1020
+ startTime?: string | undefined;
1021
+ endTime?: string | undefined;
815
1022
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
816
1023
  toolCalls?: {
817
1024
  tool: string;
818
1025
  input?: unknown;
819
1026
  output?: unknown;
1027
+ durationMs?: number | undefined;
1028
+ startTime?: string | undefined;
1029
+ endTime?: string | undefined;
820
1030
  id?: string | undefined;
821
- timestamp?: string | undefined;
822
1031
  }[] | undefined;
823
1032
  name?: string | undefined;
824
1033
  metadata?: Record<string, unknown> | undefined;
825
1034
  }[];
826
- candidateAnswer: string;
827
- guidelineFiles: string[];
828
- inputFiles: string[];
829
- inputMessages: {
1035
+ question: string;
1036
+ criteria: string;
1037
+ expectedOutput: {
830
1038
  role: "tool" | "assistant" | "user" | "system";
831
- timestamp?: string | undefined;
1039
+ durationMs?: number | undefined;
1040
+ startTime?: string | undefined;
1041
+ endTime?: string | undefined;
832
1042
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
833
1043
  toolCalls?: {
834
1044
  tool: string;
835
1045
  input?: unknown;
836
1046
  output?: unknown;
1047
+ durationMs?: number | undefined;
1048
+ startTime?: string | undefined;
1049
+ endTime?: string | undefined;
837
1050
  id?: string | undefined;
838
- timestamp?: string | undefined;
839
1051
  }[] | undefined;
840
1052
  name?: string | undefined;
841
1053
  metadata?: Record<string, unknown> | undefined;
842
1054
  }[];
843
- referenceAnswer?: string | undefined;
844
- outputMessages?: {
1055
+ answer: string;
1056
+ guidelineFiles: string[];
1057
+ inputFiles: string[];
1058
+ output?: {
845
1059
  role: "tool" | "assistant" | "user" | "system";
846
- timestamp?: string | undefined;
1060
+ durationMs?: number | undefined;
1061
+ startTime?: string | undefined;
1062
+ endTime?: string | undefined;
847
1063
  content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
848
1064
  toolCalls?: {
849
1065
  tool: string;
850
1066
  input?: unknown;
851
1067
  output?: unknown;
1068
+ durationMs?: number | undefined;
1069
+ startTime?: string | undefined;
1070
+ endTime?: string | undefined;
852
1071
  id?: string | undefined;
853
- timestamp?: string | undefined;
854
1072
  }[] | undefined;
855
1073
  name?: string | undefined;
856
1074
  metadata?: Record<string, unknown> | undefined;
857
1075
  }[] | null | undefined;
858
- traceSummary?: {
1076
+ referenceAnswer?: string | undefined;
1077
+ outputPath?: string | undefined;
1078
+ trace?: {
859
1079
  eventCount: number;
860
1080
  toolNames: string[];
861
1081
  toolCallsByName: Record<string, number>;
@@ -868,7 +1088,12 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
868
1088
  costUsd?: number | undefined;
869
1089
  durationMs?: number | undefined;
870
1090
  toolDurations?: Record<string, number[]> | undefined;
1091
+ startTime?: string | undefined;
1092
+ endTime?: string | undefined;
1093
+ llmCallCount?: number | undefined;
871
1094
  } | null | undefined;
1095
+ fileChanges?: string | null | undefined;
1096
+ workspacePath?: string | null | undefined;
872
1097
  config?: Record<string, unknown> | null | undefined;
873
1098
  }>;
874
1099
  type PromptTemplateInput = CodeJudgeInput;
@@ -895,7 +1120,7 @@ interface TargetInvokeRequest {
895
1120
  * Response from a target invocation
896
1121
  */
897
1122
  interface TargetInvokeResponse {
898
- readonly outputMessages: readonly unknown[];
1123
+ readonly output: readonly unknown[];
899
1124
  readonly rawText?: string;
900
1125
  }
901
1126
  /**
@@ -961,7 +1186,7 @@ declare class TargetInvocationError extends Error {
961
1186
  * ```typescript
962
1187
  * import { createTargetClient, defineCodeJudge } from '@agentv/eval';
963
1188
  *
964
- * export default defineCodeJudge(async ({ question, expectedOutcome }) => {
1189
+ * export default defineCodeJudge(async ({ question, criteria }) => {
965
1190
  * const target = createTargetClient();
966
1191
  *
967
1192
  * if (!target) {
@@ -970,7 +1195,7 @@ declare class TargetInvocationError extends Error {
970
1195
  * }
971
1196
  *
972
1197
  * const response = await target.invoke({
973
- * question: `Is this answer correct? Question: ${question}, Expected: ${expectedOutcome}`,
1198
+ * question: `Is this answer correct? Question: ${question}, Expected: ${criteria}`,
974
1199
  * systemPrompt: 'You are an expert evaluator. Respond with JSON: { "correct": true/false }'
975
1200
  * });
976
1201
  *
@@ -981,6 +1206,63 @@ declare class TargetInvocationError extends Error {
981
1206
  */
982
1207
  declare function createTargetClient(): TargetClient | undefined;
983
1208
 
1209
+ /**
1210
+ * Context provided to assertion handlers.
1211
+ * Same shape as CodeJudgeInput — assertions receive full evaluation context.
1212
+ */
1213
+ type AssertionContext = CodeJudgeInput;
1214
+ /**
1215
+ * Known built-in assertion types. Custom types are extensible via string.
1216
+ *
1217
+ * Use in EVAL.yaml `assert` blocks:
1218
+ * ```yaml
1219
+ * assert:
1220
+ * - type: contains
1221
+ * value: "Paris"
1222
+ * ```
1223
+ *
1224
+ * Custom types registered via `.agentv/assertions/` or `defineAssertion()`
1225
+ * are also valid — the `string & {}` escape hatch provides autocomplete
1226
+ * for known types while accepting any string.
1227
+ */
1228
+ type AssertionType = 'llm_judge' | 'code_judge' | 'rubrics' | 'composite' | 'tool_trajectory' | 'field_accuracy' | 'latency' | 'cost' | 'token_usage' | 'execution_metrics' | 'agent_judge' | 'contains' | 'equals' | 'regex' | 'is_json' | (string & {});
1229
+ /**
1230
+ * Result returned from an assertion handler.
1231
+ *
1232
+ * @example Pass with reasoning
1233
+ * ```ts
1234
+ * { pass: true, reasoning: 'Output contains expected keywords' }
1235
+ * ```
1236
+ *
1237
+ * @example Fail with misses
1238
+ * ```ts
1239
+ * { pass: false, misses: ['Missing required header'], score: 0.3 }
1240
+ * ```
1241
+ *
1242
+ * @example Granular score (0-1)
1243
+ * ```ts
1244
+ * { score: 0.75, hits: ['Format correct', 'Content relevant'], misses: ['Missing citation'] }
1245
+ * ```
1246
+ */
1247
+ interface AssertionScore {
1248
+ /** Explicit pass/fail. If omitted, derived from score (>= 0.5 = pass). */
1249
+ readonly pass?: boolean;
1250
+ /** Numeric score between 0 and 1. Defaults to 1 if pass=true, 0 if pass=false. */
1251
+ readonly score?: number;
1252
+ /** Aspects that passed. */
1253
+ readonly hits?: readonly string[];
1254
+ /** Aspects that failed. */
1255
+ readonly misses?: readonly string[];
1256
+ /** Human-readable explanation. */
1257
+ readonly reasoning?: string;
1258
+ /** Optional structured details for domain-specific metrics. */
1259
+ readonly details?: Record<string, unknown>;
1260
+ }
1261
+ /**
1262
+ * Handler function type for assertions.
1263
+ */
1264
+ type AssertionHandler = (ctx: AssertionContext) => AssertionScore | Promise<AssertionScore>;
1265
+
984
1266
  /**
985
1267
  * Handler function type for prompt templates.
986
1268
  * Returns the prompt string to use for evaluation.
@@ -995,15 +1277,26 @@ type CodeJudgeHandler = (input: CodeJudgeInput) => CodeJudgeResult | Promise<Cod
995
1277
  /**
996
1278
  * AgentV Evaluation SDK
997
1279
  *
998
- * Build custom code judges for evaluating AI agent outputs.
1280
+ * Build custom evaluators for AI agent outputs.
999
1281
  *
1000
- * @example Basic code judge
1282
+ * @example Custom assertion (simplest way to add evaluation logic)
1283
+ * ```typescript
1284
+ * #!/usr/bin/env bun
1285
+ * import { defineAssertion } from '@agentv/eval';
1286
+ *
1287
+ * export default defineAssertion(({ answer }) => ({
1288
+ * pass: answer.includes('hello'),
1289
+ * reasoning: 'Checks greeting',
1290
+ * }));
1291
+ * ```
1292
+ *
1293
+ * @example Code judge (full control)
1001
1294
  * ```typescript
1002
1295
  * #!/usr/bin/env bun
1003
1296
  * import { defineCodeJudge } from '@agentv/eval';
1004
1297
  *
1005
- * export default defineCodeJudge(({ traceSummary, candidateAnswer }) => ({
1006
- * score: traceSummary?.eventCount <= 5 ? 1.0 : 0.5,
1298
+ * export default defineCodeJudge(({ trace, answer }) => ({
1299
+ * score: trace?.eventCount <= 5 ? 1.0 : 0.5,
1007
1300
  * hits: ['Efficient tool usage'],
1008
1301
  * misses: [],
1009
1302
  * }));
@@ -1049,12 +1342,12 @@ type CodeJudgeHandler = (input: CodeJudgeInput) => CodeJudgeResult | Promise<Cod
1049
1342
  * ```typescript
1050
1343
  * import { defineCodeJudge } from '@agentv/eval';
1051
1344
  *
1052
- * export default defineCodeJudge(({ traceSummary }) => {
1053
- * if (!traceSummary) {
1345
+ * export default defineCodeJudge(({ trace }) => {
1346
+ * if (!trace) {
1054
1347
  * return { score: 0.5, reasoning: 'No trace available' };
1055
1348
  * }
1056
1349
  *
1057
- * const efficient = traceSummary.eventCount <= 10;
1350
+ * const efficient = trace.eventCount <= 10;
1058
1351
  * return {
1059
1352
  * score: efficient ? 1.0 : 0.5,
1060
1353
  * hits: efficient ? ['Efficient execution'] : [],
@@ -1071,7 +1364,7 @@ type CodeJudgeHandler = (input: CodeJudgeInput) => CodeJudgeResult | Promise<Cod
1071
1364
  * maxToolCalls: z.number().default(10),
1072
1365
  * });
1073
1366
  *
1074
- * export default defineCodeJudge(({ traceSummary, config }) => {
1367
+ * export default defineCodeJudge(({ trace, config }) => {
1075
1368
  * const { maxToolCalls } = ConfigSchema.parse(config ?? {});
1076
1369
  * // Use maxToolCalls...
1077
1370
  * });
@@ -1096,7 +1389,7 @@ declare function defineCodeJudge(handler: CodeJudgeHandler): void;
1096
1389
  *
1097
1390
  * export default definePromptTemplate((ctx) => `
1098
1391
  * Question: ${ctx.question}
1099
- * Answer: ${ctx.candidateAnswer}
1392
+ * Answer: ${ctx.answer}
1100
1393
  *
1101
1394
  * ${ctx.referenceAnswer ? `Reference: ${ctx.referenceAnswer}` : ''}
1102
1395
  * `);
@@ -1110,12 +1403,57 @@ declare function defineCodeJudge(handler: CodeJudgeHandler): void;
1110
1403
  * const rubric = ctx.config?.rubric as string | undefined;
1111
1404
  * return `
1112
1405
  * Question: ${ctx.question}
1113
- * Candidate Answer: ${ctx.candidateAnswer}
1406
+ * Candidate Answer: ${ctx.answer}
1114
1407
  * ${rubric ? `\nEvaluation Criteria:\n${rubric}` : ''}
1115
1408
  * `;
1116
1409
  * });
1117
1410
  * ```
1118
1411
  */
1119
1412
  declare function definePromptTemplate(handler: PromptTemplateHandler): void;
1413
+ /**
1414
+ * Define a custom assertion evaluator with automatic stdin/stdout handling.
1415
+ *
1416
+ * Assertions are the simplest way to add custom evaluation logic. They receive
1417
+ * the full evaluation context and return a pass/fail result with optional
1418
+ * granular scoring.
1419
+ *
1420
+ * This function:
1421
+ * 1. Reads JSON from stdin (snake_case format)
1422
+ * 2. Converts to camelCase and validates with Zod
1423
+ * 3. Calls your handler with typed context
1424
+ * 4. Normalizes the result (pass→score, clamp, etc.)
1425
+ * 5. Outputs JSON to stdout
1426
+ * 6. Handles errors gracefully with proper exit codes
1427
+ *
1428
+ * @param handler - Function that evaluates the context and returns a result
1429
+ *
1430
+ * @example Simple pass/fail
1431
+ * ```typescript
1432
+ * import { defineAssertion } from '@agentv/eval';
1433
+ *
1434
+ * export default defineAssertion(({ answer }) => ({
1435
+ * pass: answer.toLowerCase().includes('hello'),
1436
+ * reasoning: 'Checks for greeting',
1437
+ * }));
1438
+ * ```
1439
+ *
1440
+ * @example Granular scoring
1441
+ * ```typescript
1442
+ * import { defineAssertion } from '@agentv/eval';
1443
+ *
1444
+ * export default defineAssertion(({ answer, trace }) => {
1445
+ * const hasContent = answer.length > 0 ? 0.5 : 0;
1446
+ * const isEfficient = (trace?.eventCount ?? 0) <= 5 ? 0.5 : 0;
1447
+ * return {
1448
+ * score: hasContent + isEfficient,
1449
+ * hits: [
1450
+ * ...(hasContent ? ['Has content'] : []),
1451
+ * ...(isEfficient ? ['Efficient'] : []),
1452
+ * ],
1453
+ * };
1454
+ * });
1455
+ * ```
1456
+ */
1457
+ declare function defineAssertion(handler: AssertionHandler): void;
1120
1458
 
1121
- export { type CodeJudgeHandler, type CodeJudgeInput, CodeJudgeInputSchema, type CodeJudgeResult, CodeJudgeResultSchema, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type TraceSummary, TraceSummarySchema, createTargetClient, defineCodeJudge, definePromptTemplate };
1459
+ export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeJudgeHandler, type CodeJudgeInput, CodeJudgeInputSchema, type CodeJudgeResult, CodeJudgeResultSchema, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type TraceSummary, TraceSummarySchema, createTargetClient, defineAssertion, defineCodeJudge, definePromptTemplate };