@agentv/eval 2.6.0 → 2.7.1-next.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -8
- package/dist/index.cjs +121 -20
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +487 -149
- package/dist/index.d.ts +487 -149
- package/dist/index.js +120 -20
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -46,6 +46,9 @@ declare const TraceSummarySchema: z.ZodObject<{
|
|
|
46
46
|
costUsd: z.ZodOptional<z.ZodNumber>;
|
|
47
47
|
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
48
48
|
toolDurations: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodNumber, "many">>>;
|
|
49
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
50
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
51
|
+
llmCallCount: z.ZodOptional<z.ZodNumber>;
|
|
49
52
|
}, "strip", z.ZodTypeAny, {
|
|
50
53
|
eventCount: number;
|
|
51
54
|
toolNames: string[];
|
|
@@ -59,6 +62,9 @@ declare const TraceSummarySchema: z.ZodObject<{
|
|
|
59
62
|
costUsd?: number | undefined;
|
|
60
63
|
durationMs?: number | undefined;
|
|
61
64
|
toolDurations?: Record<string, number[]> | undefined;
|
|
65
|
+
startTime?: string | undefined;
|
|
66
|
+
endTime?: string | undefined;
|
|
67
|
+
llmCallCount?: number | undefined;
|
|
62
68
|
}, {
|
|
63
69
|
eventCount: number;
|
|
64
70
|
toolNames: string[];
|
|
@@ -72,6 +78,9 @@ declare const TraceSummarySchema: z.ZodObject<{
|
|
|
72
78
|
costUsd?: number | undefined;
|
|
73
79
|
durationMs?: number | undefined;
|
|
74
80
|
toolDurations?: Record<string, number[]> | undefined;
|
|
81
|
+
startTime?: string | undefined;
|
|
82
|
+
endTime?: string | undefined;
|
|
83
|
+
llmCallCount?: number | undefined;
|
|
75
84
|
}>;
|
|
76
85
|
/**
|
|
77
86
|
* Tool call schema.
|
|
@@ -81,19 +90,25 @@ declare const ToolCallSchema: z.ZodObject<{
|
|
|
81
90
|
input: z.ZodOptional<z.ZodUnknown>;
|
|
82
91
|
output: z.ZodOptional<z.ZodUnknown>;
|
|
83
92
|
id: z.ZodOptional<z.ZodString>;
|
|
84
|
-
|
|
93
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
94
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
95
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
85
96
|
}, "strip", z.ZodTypeAny, {
|
|
86
97
|
tool: string;
|
|
87
98
|
input?: unknown;
|
|
88
99
|
output?: unknown;
|
|
100
|
+
durationMs?: number | undefined;
|
|
101
|
+
startTime?: string | undefined;
|
|
102
|
+
endTime?: string | undefined;
|
|
89
103
|
id?: string | undefined;
|
|
90
|
-
timestamp?: string | undefined;
|
|
91
104
|
}, {
|
|
92
105
|
tool: string;
|
|
93
106
|
input?: unknown;
|
|
94
107
|
output?: unknown;
|
|
108
|
+
durationMs?: number | undefined;
|
|
109
|
+
startTime?: string | undefined;
|
|
110
|
+
endTime?: string | undefined;
|
|
95
111
|
id?: string | undefined;
|
|
96
|
-
timestamp?: string | undefined;
|
|
97
112
|
}>;
|
|
98
113
|
/**
|
|
99
114
|
* Unified message schema for input, expected, and output messages.
|
|
@@ -106,46 +121,62 @@ declare const MessageSchema: z.ZodObject<{
|
|
|
106
121
|
input: z.ZodOptional<z.ZodUnknown>;
|
|
107
122
|
output: z.ZodOptional<z.ZodUnknown>;
|
|
108
123
|
id: z.ZodOptional<z.ZodString>;
|
|
109
|
-
|
|
124
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
125
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
126
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
110
127
|
}, "strip", z.ZodTypeAny, {
|
|
111
128
|
tool: string;
|
|
112
129
|
input?: unknown;
|
|
113
130
|
output?: unknown;
|
|
131
|
+
durationMs?: number | undefined;
|
|
132
|
+
startTime?: string | undefined;
|
|
133
|
+
endTime?: string | undefined;
|
|
114
134
|
id?: string | undefined;
|
|
115
|
-
timestamp?: string | undefined;
|
|
116
135
|
}, {
|
|
117
136
|
tool: string;
|
|
118
137
|
input?: unknown;
|
|
119
138
|
output?: unknown;
|
|
139
|
+
durationMs?: number | undefined;
|
|
140
|
+
startTime?: string | undefined;
|
|
141
|
+
endTime?: string | undefined;
|
|
120
142
|
id?: string | undefined;
|
|
121
|
-
timestamp?: string | undefined;
|
|
122
143
|
}>, "many">>;
|
|
123
144
|
name: z.ZodOptional<z.ZodString>;
|
|
124
|
-
|
|
145
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
146
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
147
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
125
148
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
126
149
|
}, "strip", z.ZodTypeAny, {
|
|
127
150
|
role: "tool" | "assistant" | "user" | "system";
|
|
128
|
-
|
|
151
|
+
durationMs?: number | undefined;
|
|
152
|
+
startTime?: string | undefined;
|
|
153
|
+
endTime?: string | undefined;
|
|
129
154
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
130
155
|
toolCalls?: {
|
|
131
156
|
tool: string;
|
|
132
157
|
input?: unknown;
|
|
133
158
|
output?: unknown;
|
|
159
|
+
durationMs?: number | undefined;
|
|
160
|
+
startTime?: string | undefined;
|
|
161
|
+
endTime?: string | undefined;
|
|
134
162
|
id?: string | undefined;
|
|
135
|
-
timestamp?: string | undefined;
|
|
136
163
|
}[] | undefined;
|
|
137
164
|
name?: string | undefined;
|
|
138
165
|
metadata?: Record<string, unknown> | undefined;
|
|
139
166
|
}, {
|
|
140
167
|
role: "tool" | "assistant" | "user" | "system";
|
|
141
|
-
|
|
168
|
+
durationMs?: number | undefined;
|
|
169
|
+
startTime?: string | undefined;
|
|
170
|
+
endTime?: string | undefined;
|
|
142
171
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
143
172
|
toolCalls?: {
|
|
144
173
|
tool: string;
|
|
145
174
|
input?: unknown;
|
|
146
175
|
output?: unknown;
|
|
176
|
+
durationMs?: number | undefined;
|
|
177
|
+
startTime?: string | undefined;
|
|
178
|
+
endTime?: string | undefined;
|
|
147
179
|
id?: string | undefined;
|
|
148
|
-
timestamp?: string | undefined;
|
|
149
180
|
}[] | undefined;
|
|
150
181
|
name?: string | undefined;
|
|
151
182
|
metadata?: Record<string, unknown> | undefined;
|
|
@@ -155,8 +186,8 @@ declare const MessageSchema: z.ZodObject<{
|
|
|
155
186
|
*/
|
|
156
187
|
declare const CodeJudgeInputSchema: z.ZodObject<{
|
|
157
188
|
question: z.ZodString;
|
|
158
|
-
|
|
159
|
-
|
|
189
|
+
criteria: z.ZodString;
|
|
190
|
+
expectedOutput: z.ZodArray<z.ZodObject<{
|
|
160
191
|
role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
|
|
161
192
|
content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
|
|
162
193
|
toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -164,53 +195,69 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
|
|
|
164
195
|
input: z.ZodOptional<z.ZodUnknown>;
|
|
165
196
|
output: z.ZodOptional<z.ZodUnknown>;
|
|
166
197
|
id: z.ZodOptional<z.ZodString>;
|
|
167
|
-
|
|
198
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
199
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
200
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
168
201
|
}, "strip", z.ZodTypeAny, {
|
|
169
202
|
tool: string;
|
|
170
203
|
input?: unknown;
|
|
171
204
|
output?: unknown;
|
|
205
|
+
durationMs?: number | undefined;
|
|
206
|
+
startTime?: string | undefined;
|
|
207
|
+
endTime?: string | undefined;
|
|
172
208
|
id?: string | undefined;
|
|
173
|
-
timestamp?: string | undefined;
|
|
174
209
|
}, {
|
|
175
210
|
tool: string;
|
|
176
211
|
input?: unknown;
|
|
177
212
|
output?: unknown;
|
|
213
|
+
durationMs?: number | undefined;
|
|
214
|
+
startTime?: string | undefined;
|
|
215
|
+
endTime?: string | undefined;
|
|
178
216
|
id?: string | undefined;
|
|
179
|
-
timestamp?: string | undefined;
|
|
180
217
|
}>, "many">>;
|
|
181
218
|
name: z.ZodOptional<z.ZodString>;
|
|
182
|
-
|
|
219
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
220
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
221
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
183
222
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
184
223
|
}, "strip", z.ZodTypeAny, {
|
|
185
224
|
role: "tool" | "assistant" | "user" | "system";
|
|
186
|
-
|
|
225
|
+
durationMs?: number | undefined;
|
|
226
|
+
startTime?: string | undefined;
|
|
227
|
+
endTime?: string | undefined;
|
|
187
228
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
188
229
|
toolCalls?: {
|
|
189
230
|
tool: string;
|
|
190
231
|
input?: unknown;
|
|
191
232
|
output?: unknown;
|
|
233
|
+
durationMs?: number | undefined;
|
|
234
|
+
startTime?: string | undefined;
|
|
235
|
+
endTime?: string | undefined;
|
|
192
236
|
id?: string | undefined;
|
|
193
|
-
timestamp?: string | undefined;
|
|
194
237
|
}[] | undefined;
|
|
195
238
|
name?: string | undefined;
|
|
196
239
|
metadata?: Record<string, unknown> | undefined;
|
|
197
240
|
}, {
|
|
198
241
|
role: "tool" | "assistant" | "user" | "system";
|
|
199
|
-
|
|
242
|
+
durationMs?: number | undefined;
|
|
243
|
+
startTime?: string | undefined;
|
|
244
|
+
endTime?: string | undefined;
|
|
200
245
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
201
246
|
toolCalls?: {
|
|
202
247
|
tool: string;
|
|
203
248
|
input?: unknown;
|
|
204
249
|
output?: unknown;
|
|
250
|
+
durationMs?: number | undefined;
|
|
251
|
+
startTime?: string | undefined;
|
|
252
|
+
endTime?: string | undefined;
|
|
205
253
|
id?: string | undefined;
|
|
206
|
-
timestamp?: string | undefined;
|
|
207
254
|
}[] | undefined;
|
|
208
255
|
name?: string | undefined;
|
|
209
256
|
metadata?: Record<string, unknown> | undefined;
|
|
210
257
|
}>, "many">;
|
|
211
258
|
referenceAnswer: z.ZodOptional<z.ZodString>;
|
|
212
|
-
|
|
213
|
-
|
|
259
|
+
answer: z.ZodString;
|
|
260
|
+
output: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodObject<{
|
|
214
261
|
role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
|
|
215
262
|
content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
|
|
216
263
|
toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -218,53 +265,71 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
|
|
|
218
265
|
input: z.ZodOptional<z.ZodUnknown>;
|
|
219
266
|
output: z.ZodOptional<z.ZodUnknown>;
|
|
220
267
|
id: z.ZodOptional<z.ZodString>;
|
|
221
|
-
|
|
268
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
269
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
270
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
222
271
|
}, "strip", z.ZodTypeAny, {
|
|
223
272
|
tool: string;
|
|
224
273
|
input?: unknown;
|
|
225
274
|
output?: unknown;
|
|
275
|
+
durationMs?: number | undefined;
|
|
276
|
+
startTime?: string | undefined;
|
|
277
|
+
endTime?: string | undefined;
|
|
226
278
|
id?: string | undefined;
|
|
227
|
-
timestamp?: string | undefined;
|
|
228
279
|
}, {
|
|
229
280
|
tool: string;
|
|
230
281
|
input?: unknown;
|
|
231
282
|
output?: unknown;
|
|
283
|
+
durationMs?: number | undefined;
|
|
284
|
+
startTime?: string | undefined;
|
|
285
|
+
endTime?: string | undefined;
|
|
232
286
|
id?: string | undefined;
|
|
233
|
-
timestamp?: string | undefined;
|
|
234
287
|
}>, "many">>;
|
|
235
288
|
name: z.ZodOptional<z.ZodString>;
|
|
236
|
-
|
|
289
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
290
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
291
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
237
292
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
238
293
|
}, "strip", z.ZodTypeAny, {
|
|
239
294
|
role: "tool" | "assistant" | "user" | "system";
|
|
240
|
-
|
|
295
|
+
durationMs?: number | undefined;
|
|
296
|
+
startTime?: string | undefined;
|
|
297
|
+
endTime?: string | undefined;
|
|
241
298
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
242
299
|
toolCalls?: {
|
|
243
300
|
tool: string;
|
|
244
301
|
input?: unknown;
|
|
245
302
|
output?: unknown;
|
|
303
|
+
durationMs?: number | undefined;
|
|
304
|
+
startTime?: string | undefined;
|
|
305
|
+
endTime?: string | undefined;
|
|
246
306
|
id?: string | undefined;
|
|
247
|
-
timestamp?: string | undefined;
|
|
248
307
|
}[] | undefined;
|
|
249
308
|
name?: string | undefined;
|
|
250
309
|
metadata?: Record<string, unknown> | undefined;
|
|
251
310
|
}, {
|
|
252
311
|
role: "tool" | "assistant" | "user" | "system";
|
|
253
|
-
|
|
312
|
+
durationMs?: number | undefined;
|
|
313
|
+
startTime?: string | undefined;
|
|
314
|
+
endTime?: string | undefined;
|
|
254
315
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
255
316
|
toolCalls?: {
|
|
256
317
|
tool: string;
|
|
257
318
|
input?: unknown;
|
|
258
319
|
output?: unknown;
|
|
320
|
+
durationMs?: number | undefined;
|
|
321
|
+
startTime?: string | undefined;
|
|
322
|
+
endTime?: string | undefined;
|
|
259
323
|
id?: string | undefined;
|
|
260
|
-
timestamp?: string | undefined;
|
|
261
324
|
}[] | undefined;
|
|
262
325
|
name?: string | undefined;
|
|
263
326
|
metadata?: Record<string, unknown> | undefined;
|
|
264
327
|
}>, "many">>>;
|
|
328
|
+
/** Path to a temp file containing the output JSON (used for large payloads). */
|
|
329
|
+
outputPath: z.ZodOptional<z.ZodString>;
|
|
265
330
|
guidelineFiles: z.ZodArray<z.ZodString, "many">;
|
|
266
331
|
inputFiles: z.ZodArray<z.ZodString, "many">;
|
|
267
|
-
|
|
332
|
+
input: z.ZodArray<z.ZodObject<{
|
|
268
333
|
role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
|
|
269
334
|
content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
|
|
270
335
|
toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -272,51 +337,67 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
|
|
|
272
337
|
input: z.ZodOptional<z.ZodUnknown>;
|
|
273
338
|
output: z.ZodOptional<z.ZodUnknown>;
|
|
274
339
|
id: z.ZodOptional<z.ZodString>;
|
|
275
|
-
|
|
340
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
341
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
342
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
276
343
|
}, "strip", z.ZodTypeAny, {
|
|
277
344
|
tool: string;
|
|
278
345
|
input?: unknown;
|
|
279
346
|
output?: unknown;
|
|
347
|
+
durationMs?: number | undefined;
|
|
348
|
+
startTime?: string | undefined;
|
|
349
|
+
endTime?: string | undefined;
|
|
280
350
|
id?: string | undefined;
|
|
281
|
-
timestamp?: string | undefined;
|
|
282
351
|
}, {
|
|
283
352
|
tool: string;
|
|
284
353
|
input?: unknown;
|
|
285
354
|
output?: unknown;
|
|
355
|
+
durationMs?: number | undefined;
|
|
356
|
+
startTime?: string | undefined;
|
|
357
|
+
endTime?: string | undefined;
|
|
286
358
|
id?: string | undefined;
|
|
287
|
-
timestamp?: string | undefined;
|
|
288
359
|
}>, "many">>;
|
|
289
360
|
name: z.ZodOptional<z.ZodString>;
|
|
290
|
-
|
|
361
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
362
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
363
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
291
364
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
292
365
|
}, "strip", z.ZodTypeAny, {
|
|
293
366
|
role: "tool" | "assistant" | "user" | "system";
|
|
294
|
-
|
|
367
|
+
durationMs?: number | undefined;
|
|
368
|
+
startTime?: string | undefined;
|
|
369
|
+
endTime?: string | undefined;
|
|
295
370
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
296
371
|
toolCalls?: {
|
|
297
372
|
tool: string;
|
|
298
373
|
input?: unknown;
|
|
299
374
|
output?: unknown;
|
|
375
|
+
durationMs?: number | undefined;
|
|
376
|
+
startTime?: string | undefined;
|
|
377
|
+
endTime?: string | undefined;
|
|
300
378
|
id?: string | undefined;
|
|
301
|
-
timestamp?: string | undefined;
|
|
302
379
|
}[] | undefined;
|
|
303
380
|
name?: string | undefined;
|
|
304
381
|
metadata?: Record<string, unknown> | undefined;
|
|
305
382
|
}, {
|
|
306
383
|
role: "tool" | "assistant" | "user" | "system";
|
|
307
|
-
|
|
384
|
+
durationMs?: number | undefined;
|
|
385
|
+
startTime?: string | undefined;
|
|
386
|
+
endTime?: string | undefined;
|
|
308
387
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
309
388
|
toolCalls?: {
|
|
310
389
|
tool: string;
|
|
311
390
|
input?: unknown;
|
|
312
391
|
output?: unknown;
|
|
392
|
+
durationMs?: number | undefined;
|
|
393
|
+
startTime?: string | undefined;
|
|
394
|
+
endTime?: string | undefined;
|
|
313
395
|
id?: string | undefined;
|
|
314
|
-
timestamp?: string | undefined;
|
|
315
396
|
}[] | undefined;
|
|
316
397
|
name?: string | undefined;
|
|
317
398
|
metadata?: Record<string, unknown> | undefined;
|
|
318
399
|
}>, "many">;
|
|
319
|
-
|
|
400
|
+
trace: z.ZodOptional<z.ZodNullable<z.ZodObject<{
|
|
320
401
|
eventCount: z.ZodNumber;
|
|
321
402
|
toolNames: z.ZodArray<z.ZodString, "many">;
|
|
322
403
|
toolCallsByName: z.ZodRecord<z.ZodString, z.ZodNumber>;
|
|
@@ -337,6 +418,9 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
|
|
|
337
418
|
costUsd: z.ZodOptional<z.ZodNumber>;
|
|
338
419
|
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
339
420
|
toolDurations: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodNumber, "many">>>;
|
|
421
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
422
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
423
|
+
llmCallCount: z.ZodOptional<z.ZodNumber>;
|
|
340
424
|
}, "strip", z.ZodTypeAny, {
|
|
341
425
|
eventCount: number;
|
|
342
426
|
toolNames: string[];
|
|
@@ -350,6 +434,9 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
|
|
|
350
434
|
costUsd?: number | undefined;
|
|
351
435
|
durationMs?: number | undefined;
|
|
352
436
|
toolDurations?: Record<string, number[]> | undefined;
|
|
437
|
+
startTime?: string | undefined;
|
|
438
|
+
endTime?: string | undefined;
|
|
439
|
+
llmCallCount?: number | undefined;
|
|
353
440
|
}, {
|
|
354
441
|
eventCount: number;
|
|
355
442
|
toolNames: string[];
|
|
@@ -363,58 +450,76 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
|
|
|
363
450
|
costUsd?: number | undefined;
|
|
364
451
|
durationMs?: number | undefined;
|
|
365
452
|
toolDurations?: Record<string, number[]> | undefined;
|
|
453
|
+
startTime?: string | undefined;
|
|
454
|
+
endTime?: string | undefined;
|
|
455
|
+
llmCallCount?: number | undefined;
|
|
366
456
|
}>>>;
|
|
457
|
+
fileChanges: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
458
|
+
workspacePath: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
367
459
|
config: z.ZodOptional<z.ZodNullable<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
368
460
|
}, "strip", z.ZodTypeAny, {
|
|
369
|
-
|
|
370
|
-
expectedOutcome: string;
|
|
371
|
-
expectedMessages: {
|
|
461
|
+
input: {
|
|
372
462
|
role: "tool" | "assistant" | "user" | "system";
|
|
373
|
-
|
|
463
|
+
durationMs?: number | undefined;
|
|
464
|
+
startTime?: string | undefined;
|
|
465
|
+
endTime?: string | undefined;
|
|
374
466
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
375
467
|
toolCalls?: {
|
|
376
468
|
tool: string;
|
|
377
469
|
input?: unknown;
|
|
378
470
|
output?: unknown;
|
|
471
|
+
durationMs?: number | undefined;
|
|
472
|
+
startTime?: string | undefined;
|
|
473
|
+
endTime?: string | undefined;
|
|
379
474
|
id?: string | undefined;
|
|
380
|
-
timestamp?: string | undefined;
|
|
381
475
|
}[] | undefined;
|
|
382
476
|
name?: string | undefined;
|
|
383
477
|
metadata?: Record<string, unknown> | undefined;
|
|
384
478
|
}[];
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
inputMessages: {
|
|
479
|
+
question: string;
|
|
480
|
+
criteria: string;
|
|
481
|
+
expectedOutput: {
|
|
389
482
|
role: "tool" | "assistant" | "user" | "system";
|
|
390
|
-
|
|
483
|
+
durationMs?: number | undefined;
|
|
484
|
+
startTime?: string | undefined;
|
|
485
|
+
endTime?: string | undefined;
|
|
391
486
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
392
487
|
toolCalls?: {
|
|
393
488
|
tool: string;
|
|
394
489
|
input?: unknown;
|
|
395
490
|
output?: unknown;
|
|
491
|
+
durationMs?: number | undefined;
|
|
492
|
+
startTime?: string | undefined;
|
|
493
|
+
endTime?: string | undefined;
|
|
396
494
|
id?: string | undefined;
|
|
397
|
-
timestamp?: string | undefined;
|
|
398
495
|
}[] | undefined;
|
|
399
496
|
name?: string | undefined;
|
|
400
497
|
metadata?: Record<string, unknown> | undefined;
|
|
401
498
|
}[];
|
|
402
|
-
|
|
403
|
-
|
|
499
|
+
answer: string;
|
|
500
|
+
guidelineFiles: string[];
|
|
501
|
+
inputFiles: string[];
|
|
502
|
+
output?: {
|
|
404
503
|
role: "tool" | "assistant" | "user" | "system";
|
|
405
|
-
|
|
504
|
+
durationMs?: number | undefined;
|
|
505
|
+
startTime?: string | undefined;
|
|
506
|
+
endTime?: string | undefined;
|
|
406
507
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
407
508
|
toolCalls?: {
|
|
408
509
|
tool: string;
|
|
409
510
|
input?: unknown;
|
|
410
511
|
output?: unknown;
|
|
512
|
+
durationMs?: number | undefined;
|
|
513
|
+
startTime?: string | undefined;
|
|
514
|
+
endTime?: string | undefined;
|
|
411
515
|
id?: string | undefined;
|
|
412
|
-
timestamp?: string | undefined;
|
|
413
516
|
}[] | undefined;
|
|
414
517
|
name?: string | undefined;
|
|
415
518
|
metadata?: Record<string, unknown> | undefined;
|
|
416
519
|
}[] | null | undefined;
|
|
417
|
-
|
|
520
|
+
referenceAnswer?: string | undefined;
|
|
521
|
+
outputPath?: string | undefined;
|
|
522
|
+
trace?: {
|
|
418
523
|
eventCount: number;
|
|
419
524
|
toolNames: string[];
|
|
420
525
|
toolCallsByName: Record<string, number>;
|
|
@@ -427,58 +532,76 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
|
|
|
427
532
|
costUsd?: number | undefined;
|
|
428
533
|
durationMs?: number | undefined;
|
|
429
534
|
toolDurations?: Record<string, number[]> | undefined;
|
|
535
|
+
startTime?: string | undefined;
|
|
536
|
+
endTime?: string | undefined;
|
|
537
|
+
llmCallCount?: number | undefined;
|
|
430
538
|
} | null | undefined;
|
|
539
|
+
fileChanges?: string | null | undefined;
|
|
540
|
+
workspacePath?: string | null | undefined;
|
|
431
541
|
config?: Record<string, unknown> | null | undefined;
|
|
432
542
|
}, {
|
|
433
|
-
|
|
434
|
-
expectedOutcome: string;
|
|
435
|
-
expectedMessages: {
|
|
543
|
+
input: {
|
|
436
544
|
role: "tool" | "assistant" | "user" | "system";
|
|
437
|
-
|
|
545
|
+
durationMs?: number | undefined;
|
|
546
|
+
startTime?: string | undefined;
|
|
547
|
+
endTime?: string | undefined;
|
|
438
548
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
439
549
|
toolCalls?: {
|
|
440
550
|
tool: string;
|
|
441
551
|
input?: unknown;
|
|
442
552
|
output?: unknown;
|
|
553
|
+
durationMs?: number | undefined;
|
|
554
|
+
startTime?: string | undefined;
|
|
555
|
+
endTime?: string | undefined;
|
|
443
556
|
id?: string | undefined;
|
|
444
|
-
timestamp?: string | undefined;
|
|
445
557
|
}[] | undefined;
|
|
446
558
|
name?: string | undefined;
|
|
447
559
|
metadata?: Record<string, unknown> | undefined;
|
|
448
560
|
}[];
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
inputMessages: {
|
|
561
|
+
question: string;
|
|
562
|
+
criteria: string;
|
|
563
|
+
expectedOutput: {
|
|
453
564
|
role: "tool" | "assistant" | "user" | "system";
|
|
454
|
-
|
|
565
|
+
durationMs?: number | undefined;
|
|
566
|
+
startTime?: string | undefined;
|
|
567
|
+
endTime?: string | undefined;
|
|
455
568
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
456
569
|
toolCalls?: {
|
|
457
570
|
tool: string;
|
|
458
571
|
input?: unknown;
|
|
459
572
|
output?: unknown;
|
|
573
|
+
durationMs?: number | undefined;
|
|
574
|
+
startTime?: string | undefined;
|
|
575
|
+
endTime?: string | undefined;
|
|
460
576
|
id?: string | undefined;
|
|
461
|
-
timestamp?: string | undefined;
|
|
462
577
|
}[] | undefined;
|
|
463
578
|
name?: string | undefined;
|
|
464
579
|
metadata?: Record<string, unknown> | undefined;
|
|
465
580
|
}[];
|
|
466
|
-
|
|
467
|
-
|
|
581
|
+
answer: string;
|
|
582
|
+
guidelineFiles: string[];
|
|
583
|
+
inputFiles: string[];
|
|
584
|
+
output?: {
|
|
468
585
|
role: "tool" | "assistant" | "user" | "system";
|
|
469
|
-
|
|
586
|
+
durationMs?: number | undefined;
|
|
587
|
+
startTime?: string | undefined;
|
|
588
|
+
endTime?: string | undefined;
|
|
470
589
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
471
590
|
toolCalls?: {
|
|
472
591
|
tool: string;
|
|
473
592
|
input?: unknown;
|
|
474
593
|
output?: unknown;
|
|
594
|
+
durationMs?: number | undefined;
|
|
595
|
+
startTime?: string | undefined;
|
|
596
|
+
endTime?: string | undefined;
|
|
475
597
|
id?: string | undefined;
|
|
476
|
-
timestamp?: string | undefined;
|
|
477
598
|
}[] | undefined;
|
|
478
599
|
name?: string | undefined;
|
|
479
600
|
metadata?: Record<string, unknown> | undefined;
|
|
480
601
|
}[] | null | undefined;
|
|
481
|
-
|
|
602
|
+
referenceAnswer?: string | undefined;
|
|
603
|
+
outputPath?: string | undefined;
|
|
604
|
+
trace?: {
|
|
482
605
|
eventCount: number;
|
|
483
606
|
toolNames: string[];
|
|
484
607
|
toolCallsByName: Record<string, number>;
|
|
@@ -491,7 +614,12 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
|
|
|
491
614
|
costUsd?: number | undefined;
|
|
492
615
|
durationMs?: number | undefined;
|
|
493
616
|
toolDurations?: Record<string, number[]> | undefined;
|
|
617
|
+
startTime?: string | undefined;
|
|
618
|
+
endTime?: string | undefined;
|
|
619
|
+
llmCallCount?: number | undefined;
|
|
494
620
|
} | null | undefined;
|
|
621
|
+
fileChanges?: string | null | undefined;
|
|
622
|
+
workspacePath?: string | null | undefined;
|
|
495
623
|
config?: Record<string, unknown> | null | undefined;
|
|
496
624
|
}>;
|
|
497
625
|
/**
|
|
@@ -532,8 +660,8 @@ type TokenUsage = z.infer<typeof TokenUsageSchema>;
|
|
|
532
660
|
*/
|
|
533
661
|
declare const PromptTemplateInputSchema: z.ZodObject<{
|
|
534
662
|
question: z.ZodString;
|
|
535
|
-
|
|
536
|
-
|
|
663
|
+
criteria: z.ZodString;
|
|
664
|
+
expectedOutput: z.ZodArray<z.ZodObject<{
|
|
537
665
|
role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
|
|
538
666
|
content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
|
|
539
667
|
toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -541,53 +669,69 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
|
|
|
541
669
|
input: z.ZodOptional<z.ZodUnknown>;
|
|
542
670
|
output: z.ZodOptional<z.ZodUnknown>;
|
|
543
671
|
id: z.ZodOptional<z.ZodString>;
|
|
544
|
-
|
|
672
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
673
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
674
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
545
675
|
}, "strip", z.ZodTypeAny, {
|
|
546
676
|
tool: string;
|
|
547
677
|
input?: unknown;
|
|
548
678
|
output?: unknown;
|
|
679
|
+
durationMs?: number | undefined;
|
|
680
|
+
startTime?: string | undefined;
|
|
681
|
+
endTime?: string | undefined;
|
|
549
682
|
id?: string | undefined;
|
|
550
|
-
timestamp?: string | undefined;
|
|
551
683
|
}, {
|
|
552
684
|
tool: string;
|
|
553
685
|
input?: unknown;
|
|
554
686
|
output?: unknown;
|
|
687
|
+
durationMs?: number | undefined;
|
|
688
|
+
startTime?: string | undefined;
|
|
689
|
+
endTime?: string | undefined;
|
|
555
690
|
id?: string | undefined;
|
|
556
|
-
timestamp?: string | undefined;
|
|
557
691
|
}>, "many">>;
|
|
558
692
|
name: z.ZodOptional<z.ZodString>;
|
|
559
|
-
|
|
693
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
694
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
695
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
560
696
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
561
697
|
}, "strip", z.ZodTypeAny, {
|
|
562
698
|
role: "tool" | "assistant" | "user" | "system";
|
|
563
|
-
|
|
699
|
+
durationMs?: number | undefined;
|
|
700
|
+
startTime?: string | undefined;
|
|
701
|
+
endTime?: string | undefined;
|
|
564
702
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
565
703
|
toolCalls?: {
|
|
566
704
|
tool: string;
|
|
567
705
|
input?: unknown;
|
|
568
706
|
output?: unknown;
|
|
707
|
+
durationMs?: number | undefined;
|
|
708
|
+
startTime?: string | undefined;
|
|
709
|
+
endTime?: string | undefined;
|
|
569
710
|
id?: string | undefined;
|
|
570
|
-
timestamp?: string | undefined;
|
|
571
711
|
}[] | undefined;
|
|
572
712
|
name?: string | undefined;
|
|
573
713
|
metadata?: Record<string, unknown> | undefined;
|
|
574
714
|
}, {
|
|
575
715
|
role: "tool" | "assistant" | "user" | "system";
|
|
576
|
-
|
|
716
|
+
durationMs?: number | undefined;
|
|
717
|
+
startTime?: string | undefined;
|
|
718
|
+
endTime?: string | undefined;
|
|
577
719
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
578
720
|
toolCalls?: {
|
|
579
721
|
tool: string;
|
|
580
722
|
input?: unknown;
|
|
581
723
|
output?: unknown;
|
|
724
|
+
durationMs?: number | undefined;
|
|
725
|
+
startTime?: string | undefined;
|
|
726
|
+
endTime?: string | undefined;
|
|
582
727
|
id?: string | undefined;
|
|
583
|
-
timestamp?: string | undefined;
|
|
584
728
|
}[] | undefined;
|
|
585
729
|
name?: string | undefined;
|
|
586
730
|
metadata?: Record<string, unknown> | undefined;
|
|
587
731
|
}>, "many">;
|
|
588
732
|
referenceAnswer: z.ZodOptional<z.ZodString>;
|
|
589
|
-
|
|
590
|
-
|
|
733
|
+
answer: z.ZodString;
|
|
734
|
+
output: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodObject<{
|
|
591
735
|
role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
|
|
592
736
|
content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
|
|
593
737
|
toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -595,53 +739,71 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
|
|
|
595
739
|
input: z.ZodOptional<z.ZodUnknown>;
|
|
596
740
|
output: z.ZodOptional<z.ZodUnknown>;
|
|
597
741
|
id: z.ZodOptional<z.ZodString>;
|
|
598
|
-
|
|
742
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
743
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
744
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
599
745
|
}, "strip", z.ZodTypeAny, {
|
|
600
746
|
tool: string;
|
|
601
747
|
input?: unknown;
|
|
602
748
|
output?: unknown;
|
|
749
|
+
durationMs?: number | undefined;
|
|
750
|
+
startTime?: string | undefined;
|
|
751
|
+
endTime?: string | undefined;
|
|
603
752
|
id?: string | undefined;
|
|
604
|
-
timestamp?: string | undefined;
|
|
605
753
|
}, {
|
|
606
754
|
tool: string;
|
|
607
755
|
input?: unknown;
|
|
608
756
|
output?: unknown;
|
|
757
|
+
durationMs?: number | undefined;
|
|
758
|
+
startTime?: string | undefined;
|
|
759
|
+
endTime?: string | undefined;
|
|
609
760
|
id?: string | undefined;
|
|
610
|
-
timestamp?: string | undefined;
|
|
611
761
|
}>, "many">>;
|
|
612
762
|
name: z.ZodOptional<z.ZodString>;
|
|
613
|
-
|
|
763
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
764
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
765
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
614
766
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
615
767
|
}, "strip", z.ZodTypeAny, {
|
|
616
768
|
role: "tool" | "assistant" | "user" | "system";
|
|
617
|
-
|
|
769
|
+
durationMs?: number | undefined;
|
|
770
|
+
startTime?: string | undefined;
|
|
771
|
+
endTime?: string | undefined;
|
|
618
772
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
619
773
|
toolCalls?: {
|
|
620
774
|
tool: string;
|
|
621
775
|
input?: unknown;
|
|
622
776
|
output?: unknown;
|
|
777
|
+
durationMs?: number | undefined;
|
|
778
|
+
startTime?: string | undefined;
|
|
779
|
+
endTime?: string | undefined;
|
|
623
780
|
id?: string | undefined;
|
|
624
|
-
timestamp?: string | undefined;
|
|
625
781
|
}[] | undefined;
|
|
626
782
|
name?: string | undefined;
|
|
627
783
|
metadata?: Record<string, unknown> | undefined;
|
|
628
784
|
}, {
|
|
629
785
|
role: "tool" | "assistant" | "user" | "system";
|
|
630
|
-
|
|
786
|
+
durationMs?: number | undefined;
|
|
787
|
+
startTime?: string | undefined;
|
|
788
|
+
endTime?: string | undefined;
|
|
631
789
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
632
790
|
toolCalls?: {
|
|
633
791
|
tool: string;
|
|
634
792
|
input?: unknown;
|
|
635
793
|
output?: unknown;
|
|
794
|
+
durationMs?: number | undefined;
|
|
795
|
+
startTime?: string | undefined;
|
|
796
|
+
endTime?: string | undefined;
|
|
636
797
|
id?: string | undefined;
|
|
637
|
-
timestamp?: string | undefined;
|
|
638
798
|
}[] | undefined;
|
|
639
799
|
name?: string | undefined;
|
|
640
800
|
metadata?: Record<string, unknown> | undefined;
|
|
641
801
|
}>, "many">>>;
|
|
802
|
+
/** Path to a temp file containing the output JSON (used for large payloads). */
|
|
803
|
+
outputPath: z.ZodOptional<z.ZodString>;
|
|
642
804
|
guidelineFiles: z.ZodArray<z.ZodString, "many">;
|
|
643
805
|
inputFiles: z.ZodArray<z.ZodString, "many">;
|
|
644
|
-
|
|
806
|
+
input: z.ZodArray<z.ZodObject<{
|
|
645
807
|
role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
|
|
646
808
|
content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
|
|
647
809
|
toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -649,51 +811,67 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
|
|
|
649
811
|
input: z.ZodOptional<z.ZodUnknown>;
|
|
650
812
|
output: z.ZodOptional<z.ZodUnknown>;
|
|
651
813
|
id: z.ZodOptional<z.ZodString>;
|
|
652
|
-
|
|
814
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
815
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
816
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
653
817
|
}, "strip", z.ZodTypeAny, {
|
|
654
818
|
tool: string;
|
|
655
819
|
input?: unknown;
|
|
656
820
|
output?: unknown;
|
|
821
|
+
durationMs?: number | undefined;
|
|
822
|
+
startTime?: string | undefined;
|
|
823
|
+
endTime?: string | undefined;
|
|
657
824
|
id?: string | undefined;
|
|
658
|
-
timestamp?: string | undefined;
|
|
659
825
|
}, {
|
|
660
826
|
tool: string;
|
|
661
827
|
input?: unknown;
|
|
662
828
|
output?: unknown;
|
|
829
|
+
durationMs?: number | undefined;
|
|
830
|
+
startTime?: string | undefined;
|
|
831
|
+
endTime?: string | undefined;
|
|
663
832
|
id?: string | undefined;
|
|
664
|
-
timestamp?: string | undefined;
|
|
665
833
|
}>, "many">>;
|
|
666
834
|
name: z.ZodOptional<z.ZodString>;
|
|
667
|
-
|
|
835
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
836
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
837
|
+
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
668
838
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
669
839
|
}, "strip", z.ZodTypeAny, {
|
|
670
840
|
role: "tool" | "assistant" | "user" | "system";
|
|
671
|
-
|
|
841
|
+
durationMs?: number | undefined;
|
|
842
|
+
startTime?: string | undefined;
|
|
843
|
+
endTime?: string | undefined;
|
|
672
844
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
673
845
|
toolCalls?: {
|
|
674
846
|
tool: string;
|
|
675
847
|
input?: unknown;
|
|
676
848
|
output?: unknown;
|
|
849
|
+
durationMs?: number | undefined;
|
|
850
|
+
startTime?: string | undefined;
|
|
851
|
+
endTime?: string | undefined;
|
|
677
852
|
id?: string | undefined;
|
|
678
|
-
timestamp?: string | undefined;
|
|
679
853
|
}[] | undefined;
|
|
680
854
|
name?: string | undefined;
|
|
681
855
|
metadata?: Record<string, unknown> | undefined;
|
|
682
856
|
}, {
|
|
683
857
|
role: "tool" | "assistant" | "user" | "system";
|
|
684
|
-
|
|
858
|
+
durationMs?: number | undefined;
|
|
859
|
+
startTime?: string | undefined;
|
|
860
|
+
endTime?: string | undefined;
|
|
685
861
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
686
862
|
toolCalls?: {
|
|
687
863
|
tool: string;
|
|
688
864
|
input?: unknown;
|
|
689
865
|
output?: unknown;
|
|
866
|
+
durationMs?: number | undefined;
|
|
867
|
+
startTime?: string | undefined;
|
|
868
|
+
endTime?: string | undefined;
|
|
690
869
|
id?: string | undefined;
|
|
691
|
-
timestamp?: string | undefined;
|
|
692
870
|
}[] | undefined;
|
|
693
871
|
name?: string | undefined;
|
|
694
872
|
metadata?: Record<string, unknown> | undefined;
|
|
695
873
|
}>, "many">;
|
|
696
|
-
|
|
874
|
+
trace: z.ZodOptional<z.ZodNullable<z.ZodObject<{
|
|
697
875
|
eventCount: z.ZodNumber;
|
|
698
876
|
toolNames: z.ZodArray<z.ZodString, "many">;
|
|
699
877
|
toolCallsByName: z.ZodRecord<z.ZodString, z.ZodNumber>;
|
|
@@ -714,6 +892,9 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
|
|
|
714
892
|
costUsd: z.ZodOptional<z.ZodNumber>;
|
|
715
893
|
durationMs: z.ZodOptional<z.ZodNumber>;
|
|
716
894
|
toolDurations: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodNumber, "many">>>;
|
|
895
|
+
startTime: z.ZodOptional<z.ZodString>;
|
|
896
|
+
endTime: z.ZodOptional<z.ZodString>;
|
|
897
|
+
llmCallCount: z.ZodOptional<z.ZodNumber>;
|
|
717
898
|
}, "strip", z.ZodTypeAny, {
|
|
718
899
|
eventCount: number;
|
|
719
900
|
toolNames: string[];
|
|
@@ -727,6 +908,9 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
|
|
|
727
908
|
costUsd?: number | undefined;
|
|
728
909
|
durationMs?: number | undefined;
|
|
729
910
|
toolDurations?: Record<string, number[]> | undefined;
|
|
911
|
+
startTime?: string | undefined;
|
|
912
|
+
endTime?: string | undefined;
|
|
913
|
+
llmCallCount?: number | undefined;
|
|
730
914
|
}, {
|
|
731
915
|
eventCount: number;
|
|
732
916
|
toolNames: string[];
|
|
@@ -740,58 +924,76 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
|
|
|
740
924
|
costUsd?: number | undefined;
|
|
741
925
|
durationMs?: number | undefined;
|
|
742
926
|
toolDurations?: Record<string, number[]> | undefined;
|
|
927
|
+
startTime?: string | undefined;
|
|
928
|
+
endTime?: string | undefined;
|
|
929
|
+
llmCallCount?: number | undefined;
|
|
743
930
|
}>>>;
|
|
931
|
+
fileChanges: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
932
|
+
workspacePath: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
744
933
|
config: z.ZodOptional<z.ZodNullable<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
745
934
|
}, "strip", z.ZodTypeAny, {
|
|
746
|
-
|
|
747
|
-
expectedOutcome: string;
|
|
748
|
-
expectedMessages: {
|
|
935
|
+
input: {
|
|
749
936
|
role: "tool" | "assistant" | "user" | "system";
|
|
750
|
-
|
|
937
|
+
durationMs?: number | undefined;
|
|
938
|
+
startTime?: string | undefined;
|
|
939
|
+
endTime?: string | undefined;
|
|
751
940
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
752
941
|
toolCalls?: {
|
|
753
942
|
tool: string;
|
|
754
943
|
input?: unknown;
|
|
755
944
|
output?: unknown;
|
|
945
|
+
durationMs?: number | undefined;
|
|
946
|
+
startTime?: string | undefined;
|
|
947
|
+
endTime?: string | undefined;
|
|
756
948
|
id?: string | undefined;
|
|
757
|
-
timestamp?: string | undefined;
|
|
758
949
|
}[] | undefined;
|
|
759
950
|
name?: string | undefined;
|
|
760
951
|
metadata?: Record<string, unknown> | undefined;
|
|
761
952
|
}[];
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
inputMessages: {
|
|
953
|
+
question: string;
|
|
954
|
+
criteria: string;
|
|
955
|
+
expectedOutput: {
|
|
766
956
|
role: "tool" | "assistant" | "user" | "system";
|
|
767
|
-
|
|
957
|
+
durationMs?: number | undefined;
|
|
958
|
+
startTime?: string | undefined;
|
|
959
|
+
endTime?: string | undefined;
|
|
768
960
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
769
961
|
toolCalls?: {
|
|
770
962
|
tool: string;
|
|
771
963
|
input?: unknown;
|
|
772
964
|
output?: unknown;
|
|
965
|
+
durationMs?: number | undefined;
|
|
966
|
+
startTime?: string | undefined;
|
|
967
|
+
endTime?: string | undefined;
|
|
773
968
|
id?: string | undefined;
|
|
774
|
-
timestamp?: string | undefined;
|
|
775
969
|
}[] | undefined;
|
|
776
970
|
name?: string | undefined;
|
|
777
971
|
metadata?: Record<string, unknown> | undefined;
|
|
778
972
|
}[];
|
|
779
|
-
|
|
780
|
-
|
|
973
|
+
answer: string;
|
|
974
|
+
guidelineFiles: string[];
|
|
975
|
+
inputFiles: string[];
|
|
976
|
+
output?: {
|
|
781
977
|
role: "tool" | "assistant" | "user" | "system";
|
|
782
|
-
|
|
978
|
+
durationMs?: number | undefined;
|
|
979
|
+
startTime?: string | undefined;
|
|
980
|
+
endTime?: string | undefined;
|
|
783
981
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
784
982
|
toolCalls?: {
|
|
785
983
|
tool: string;
|
|
786
984
|
input?: unknown;
|
|
787
985
|
output?: unknown;
|
|
986
|
+
durationMs?: number | undefined;
|
|
987
|
+
startTime?: string | undefined;
|
|
988
|
+
endTime?: string | undefined;
|
|
788
989
|
id?: string | undefined;
|
|
789
|
-
timestamp?: string | undefined;
|
|
790
990
|
}[] | undefined;
|
|
791
991
|
name?: string | undefined;
|
|
792
992
|
metadata?: Record<string, unknown> | undefined;
|
|
793
993
|
}[] | null | undefined;
|
|
794
|
-
|
|
994
|
+
referenceAnswer?: string | undefined;
|
|
995
|
+
outputPath?: string | undefined;
|
|
996
|
+
trace?: {
|
|
795
997
|
eventCount: number;
|
|
796
998
|
toolNames: string[];
|
|
797
999
|
toolCallsByName: Record<string, number>;
|
|
@@ -804,58 +1006,76 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
|
|
|
804
1006
|
costUsd?: number | undefined;
|
|
805
1007
|
durationMs?: number | undefined;
|
|
806
1008
|
toolDurations?: Record<string, number[]> | undefined;
|
|
1009
|
+
startTime?: string | undefined;
|
|
1010
|
+
endTime?: string | undefined;
|
|
1011
|
+
llmCallCount?: number | undefined;
|
|
807
1012
|
} | null | undefined;
|
|
1013
|
+
fileChanges?: string | null | undefined;
|
|
1014
|
+
workspacePath?: string | null | undefined;
|
|
808
1015
|
config?: Record<string, unknown> | null | undefined;
|
|
809
1016
|
}, {
|
|
810
|
-
|
|
811
|
-
expectedOutcome: string;
|
|
812
|
-
expectedMessages: {
|
|
1017
|
+
input: {
|
|
813
1018
|
role: "tool" | "assistant" | "user" | "system";
|
|
814
|
-
|
|
1019
|
+
durationMs?: number | undefined;
|
|
1020
|
+
startTime?: string | undefined;
|
|
1021
|
+
endTime?: string | undefined;
|
|
815
1022
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
816
1023
|
toolCalls?: {
|
|
817
1024
|
tool: string;
|
|
818
1025
|
input?: unknown;
|
|
819
1026
|
output?: unknown;
|
|
1027
|
+
durationMs?: number | undefined;
|
|
1028
|
+
startTime?: string | undefined;
|
|
1029
|
+
endTime?: string | undefined;
|
|
820
1030
|
id?: string | undefined;
|
|
821
|
-
timestamp?: string | undefined;
|
|
822
1031
|
}[] | undefined;
|
|
823
1032
|
name?: string | undefined;
|
|
824
1033
|
metadata?: Record<string, unknown> | undefined;
|
|
825
1034
|
}[];
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
inputMessages: {
|
|
1035
|
+
question: string;
|
|
1036
|
+
criteria: string;
|
|
1037
|
+
expectedOutput: {
|
|
830
1038
|
role: "tool" | "assistant" | "user" | "system";
|
|
831
|
-
|
|
1039
|
+
durationMs?: number | undefined;
|
|
1040
|
+
startTime?: string | undefined;
|
|
1041
|
+
endTime?: string | undefined;
|
|
832
1042
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
833
1043
|
toolCalls?: {
|
|
834
1044
|
tool: string;
|
|
835
1045
|
input?: unknown;
|
|
836
1046
|
output?: unknown;
|
|
1047
|
+
durationMs?: number | undefined;
|
|
1048
|
+
startTime?: string | undefined;
|
|
1049
|
+
endTime?: string | undefined;
|
|
837
1050
|
id?: string | undefined;
|
|
838
|
-
timestamp?: string | undefined;
|
|
839
1051
|
}[] | undefined;
|
|
840
1052
|
name?: string | undefined;
|
|
841
1053
|
metadata?: Record<string, unknown> | undefined;
|
|
842
1054
|
}[];
|
|
843
|
-
|
|
844
|
-
|
|
1055
|
+
answer: string;
|
|
1056
|
+
guidelineFiles: string[];
|
|
1057
|
+
inputFiles: string[];
|
|
1058
|
+
output?: {
|
|
845
1059
|
role: "tool" | "assistant" | "user" | "system";
|
|
846
|
-
|
|
1060
|
+
durationMs?: number | undefined;
|
|
1061
|
+
startTime?: string | undefined;
|
|
1062
|
+
endTime?: string | undefined;
|
|
847
1063
|
content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
|
|
848
1064
|
toolCalls?: {
|
|
849
1065
|
tool: string;
|
|
850
1066
|
input?: unknown;
|
|
851
1067
|
output?: unknown;
|
|
1068
|
+
durationMs?: number | undefined;
|
|
1069
|
+
startTime?: string | undefined;
|
|
1070
|
+
endTime?: string | undefined;
|
|
852
1071
|
id?: string | undefined;
|
|
853
|
-
timestamp?: string | undefined;
|
|
854
1072
|
}[] | undefined;
|
|
855
1073
|
name?: string | undefined;
|
|
856
1074
|
metadata?: Record<string, unknown> | undefined;
|
|
857
1075
|
}[] | null | undefined;
|
|
858
|
-
|
|
1076
|
+
referenceAnswer?: string | undefined;
|
|
1077
|
+
outputPath?: string | undefined;
|
|
1078
|
+
trace?: {
|
|
859
1079
|
eventCount: number;
|
|
860
1080
|
toolNames: string[];
|
|
861
1081
|
toolCallsByName: Record<string, number>;
|
|
@@ -868,7 +1088,12 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
|
|
|
868
1088
|
costUsd?: number | undefined;
|
|
869
1089
|
durationMs?: number | undefined;
|
|
870
1090
|
toolDurations?: Record<string, number[]> | undefined;
|
|
1091
|
+
startTime?: string | undefined;
|
|
1092
|
+
endTime?: string | undefined;
|
|
1093
|
+
llmCallCount?: number | undefined;
|
|
871
1094
|
} | null | undefined;
|
|
1095
|
+
fileChanges?: string | null | undefined;
|
|
1096
|
+
workspacePath?: string | null | undefined;
|
|
872
1097
|
config?: Record<string, unknown> | null | undefined;
|
|
873
1098
|
}>;
|
|
874
1099
|
type PromptTemplateInput = CodeJudgeInput;
|
|
@@ -895,7 +1120,7 @@ interface TargetInvokeRequest {
|
|
|
895
1120
|
* Response from a target invocation
|
|
896
1121
|
*/
|
|
897
1122
|
interface TargetInvokeResponse {
|
|
898
|
-
readonly
|
|
1123
|
+
readonly output: readonly unknown[];
|
|
899
1124
|
readonly rawText?: string;
|
|
900
1125
|
}
|
|
901
1126
|
/**
|
|
@@ -961,7 +1186,7 @@ declare class TargetInvocationError extends Error {
|
|
|
961
1186
|
* ```typescript
|
|
962
1187
|
* import { createTargetClient, defineCodeJudge } from '@agentv/eval';
|
|
963
1188
|
*
|
|
964
|
-
* export default defineCodeJudge(async ({ question,
|
|
1189
|
+
* export default defineCodeJudge(async ({ question, criteria }) => {
|
|
965
1190
|
* const target = createTargetClient();
|
|
966
1191
|
*
|
|
967
1192
|
* if (!target) {
|
|
@@ -970,7 +1195,7 @@ declare class TargetInvocationError extends Error {
|
|
|
970
1195
|
* }
|
|
971
1196
|
*
|
|
972
1197
|
* const response = await target.invoke({
|
|
973
|
-
* question: `Is this answer correct? Question: ${question}, Expected: ${
|
|
1198
|
+
* question: `Is this answer correct? Question: ${question}, Expected: ${criteria}`,
|
|
974
1199
|
* systemPrompt: 'You are an expert evaluator. Respond with JSON: { "correct": true/false }'
|
|
975
1200
|
* });
|
|
976
1201
|
*
|
|
@@ -981,6 +1206,63 @@ declare class TargetInvocationError extends Error {
|
|
|
981
1206
|
*/
|
|
982
1207
|
declare function createTargetClient(): TargetClient | undefined;
|
|
983
1208
|
|
|
1209
|
+
/**
|
|
1210
|
+
* Context provided to assertion handlers.
|
|
1211
|
+
* Same shape as CodeJudgeInput — assertions receive full evaluation context.
|
|
1212
|
+
*/
|
|
1213
|
+
type AssertionContext = CodeJudgeInput;
|
|
1214
|
+
/**
|
|
1215
|
+
* Known built-in assertion types. Custom types are extensible via string.
|
|
1216
|
+
*
|
|
1217
|
+
* Use in EVAL.yaml `assert` blocks:
|
|
1218
|
+
* ```yaml
|
|
1219
|
+
* assert:
|
|
1220
|
+
* - type: contains
|
|
1221
|
+
* value: "Paris"
|
|
1222
|
+
* ```
|
|
1223
|
+
*
|
|
1224
|
+
* Custom types registered via `.agentv/assertions/` or `defineAssertion()`
|
|
1225
|
+
* are also valid — the `string & {}` escape hatch provides autocomplete
|
|
1226
|
+
* for known types while accepting any string.
|
|
1227
|
+
*/
|
|
1228
|
+
type AssertionType = 'llm_judge' | 'code_judge' | 'rubrics' | 'composite' | 'tool_trajectory' | 'field_accuracy' | 'latency' | 'cost' | 'token_usage' | 'execution_metrics' | 'agent_judge' | 'contains' | 'equals' | 'regex' | 'is_json' | (string & {});
|
|
1229
|
+
/**
|
|
1230
|
+
* Result returned from an assertion handler.
|
|
1231
|
+
*
|
|
1232
|
+
* @example Pass with reasoning
|
|
1233
|
+
* ```ts
|
|
1234
|
+
* { pass: true, reasoning: 'Output contains expected keywords' }
|
|
1235
|
+
* ```
|
|
1236
|
+
*
|
|
1237
|
+
* @example Fail with misses
|
|
1238
|
+
* ```ts
|
|
1239
|
+
* { pass: false, misses: ['Missing required header'], score: 0.3 }
|
|
1240
|
+
* ```
|
|
1241
|
+
*
|
|
1242
|
+
* @example Granular score (0-1)
|
|
1243
|
+
* ```ts
|
|
1244
|
+
* { score: 0.75, hits: ['Format correct', 'Content relevant'], misses: ['Missing citation'] }
|
|
1245
|
+
* ```
|
|
1246
|
+
*/
|
|
1247
|
+
interface AssertionScore {
|
|
1248
|
+
/** Explicit pass/fail. If omitted, derived from score (>= 0.5 = pass). */
|
|
1249
|
+
readonly pass?: boolean;
|
|
1250
|
+
/** Numeric score between 0 and 1. Defaults to 1 if pass=true, 0 if pass=false. */
|
|
1251
|
+
readonly score?: number;
|
|
1252
|
+
/** Aspects that passed. */
|
|
1253
|
+
readonly hits?: readonly string[];
|
|
1254
|
+
/** Aspects that failed. */
|
|
1255
|
+
readonly misses?: readonly string[];
|
|
1256
|
+
/** Human-readable explanation. */
|
|
1257
|
+
readonly reasoning?: string;
|
|
1258
|
+
/** Optional structured details for domain-specific metrics. */
|
|
1259
|
+
readonly details?: Record<string, unknown>;
|
|
1260
|
+
}
|
|
1261
|
+
/**
|
|
1262
|
+
* Handler function type for assertions.
|
|
1263
|
+
*/
|
|
1264
|
+
type AssertionHandler = (ctx: AssertionContext) => AssertionScore | Promise<AssertionScore>;
|
|
1265
|
+
|
|
984
1266
|
/**
|
|
985
1267
|
* Handler function type for prompt templates.
|
|
986
1268
|
* Returns the prompt string to use for evaluation.
|
|
@@ -995,15 +1277,26 @@ type CodeJudgeHandler = (input: CodeJudgeInput) => CodeJudgeResult | Promise<Cod
|
|
|
995
1277
|
/**
|
|
996
1278
|
* AgentV Evaluation SDK
|
|
997
1279
|
*
|
|
998
|
-
* Build custom
|
|
1280
|
+
* Build custom evaluators for AI agent outputs.
|
|
999
1281
|
*
|
|
1000
|
-
* @example
|
|
1282
|
+
* @example Custom assertion (simplest way to add evaluation logic)
|
|
1283
|
+
* ```typescript
|
|
1284
|
+
* #!/usr/bin/env bun
|
|
1285
|
+
* import { defineAssertion } from '@agentv/eval';
|
|
1286
|
+
*
|
|
1287
|
+
* export default defineAssertion(({ answer }) => ({
|
|
1288
|
+
* pass: answer.includes('hello'),
|
|
1289
|
+
* reasoning: 'Checks greeting',
|
|
1290
|
+
* }));
|
|
1291
|
+
* ```
|
|
1292
|
+
*
|
|
1293
|
+
* @example Code judge (full control)
|
|
1001
1294
|
* ```typescript
|
|
1002
1295
|
* #!/usr/bin/env bun
|
|
1003
1296
|
* import { defineCodeJudge } from '@agentv/eval';
|
|
1004
1297
|
*
|
|
1005
|
-
* export default defineCodeJudge(({
|
|
1006
|
-
* score:
|
|
1298
|
+
* export default defineCodeJudge(({ trace, answer }) => ({
|
|
1299
|
+
* score: trace?.eventCount <= 5 ? 1.0 : 0.5,
|
|
1007
1300
|
* hits: ['Efficient tool usage'],
|
|
1008
1301
|
* misses: [],
|
|
1009
1302
|
* }));
|
|
@@ -1049,12 +1342,12 @@ type CodeJudgeHandler = (input: CodeJudgeInput) => CodeJudgeResult | Promise<Cod
|
|
|
1049
1342
|
* ```typescript
|
|
1050
1343
|
* import { defineCodeJudge } from '@agentv/eval';
|
|
1051
1344
|
*
|
|
1052
|
-
* export default defineCodeJudge(({
|
|
1053
|
-
* if (!
|
|
1345
|
+
* export default defineCodeJudge(({ trace }) => {
|
|
1346
|
+
* if (!trace) {
|
|
1054
1347
|
* return { score: 0.5, reasoning: 'No trace available' };
|
|
1055
1348
|
* }
|
|
1056
1349
|
*
|
|
1057
|
-
* const efficient =
|
|
1350
|
+
* const efficient = trace.eventCount <= 10;
|
|
1058
1351
|
* return {
|
|
1059
1352
|
* score: efficient ? 1.0 : 0.5,
|
|
1060
1353
|
* hits: efficient ? ['Efficient execution'] : [],
|
|
@@ -1071,7 +1364,7 @@ type CodeJudgeHandler = (input: CodeJudgeInput) => CodeJudgeResult | Promise<Cod
|
|
|
1071
1364
|
* maxToolCalls: z.number().default(10),
|
|
1072
1365
|
* });
|
|
1073
1366
|
*
|
|
1074
|
-
* export default defineCodeJudge(({
|
|
1367
|
+
* export default defineCodeJudge(({ trace, config }) => {
|
|
1075
1368
|
* const { maxToolCalls } = ConfigSchema.parse(config ?? {});
|
|
1076
1369
|
* // Use maxToolCalls...
|
|
1077
1370
|
* });
|
|
@@ -1096,7 +1389,7 @@ declare function defineCodeJudge(handler: CodeJudgeHandler): void;
|
|
|
1096
1389
|
*
|
|
1097
1390
|
* export default definePromptTemplate((ctx) => `
|
|
1098
1391
|
* Question: ${ctx.question}
|
|
1099
|
-
* Answer: ${ctx.
|
|
1392
|
+
* Answer: ${ctx.answer}
|
|
1100
1393
|
*
|
|
1101
1394
|
* ${ctx.referenceAnswer ? `Reference: ${ctx.referenceAnswer}` : ''}
|
|
1102
1395
|
* `);
|
|
@@ -1110,12 +1403,57 @@ declare function defineCodeJudge(handler: CodeJudgeHandler): void;
|
|
|
1110
1403
|
* const rubric = ctx.config?.rubric as string | undefined;
|
|
1111
1404
|
* return `
|
|
1112
1405
|
* Question: ${ctx.question}
|
|
1113
|
-
* Candidate Answer: ${ctx.
|
|
1406
|
+
* Candidate Answer: ${ctx.answer}
|
|
1114
1407
|
* ${rubric ? `\nEvaluation Criteria:\n${rubric}` : ''}
|
|
1115
1408
|
* `;
|
|
1116
1409
|
* });
|
|
1117
1410
|
* ```
|
|
1118
1411
|
*/
|
|
1119
1412
|
declare function definePromptTemplate(handler: PromptTemplateHandler): void;
|
|
1413
|
+
/**
|
|
1414
|
+
* Define a custom assertion evaluator with automatic stdin/stdout handling.
|
|
1415
|
+
*
|
|
1416
|
+
* Assertions are the simplest way to add custom evaluation logic. They receive
|
|
1417
|
+
* the full evaluation context and return a pass/fail result with optional
|
|
1418
|
+
* granular scoring.
|
|
1419
|
+
*
|
|
1420
|
+
* This function:
|
|
1421
|
+
* 1. Reads JSON from stdin (snake_case format)
|
|
1422
|
+
* 2. Converts to camelCase and validates with Zod
|
|
1423
|
+
* 3. Calls your handler with typed context
|
|
1424
|
+
* 4. Normalizes the result (pass→score, clamp, etc.)
|
|
1425
|
+
* 5. Outputs JSON to stdout
|
|
1426
|
+
* 6. Handles errors gracefully with proper exit codes
|
|
1427
|
+
*
|
|
1428
|
+
* @param handler - Function that evaluates the context and returns a result
|
|
1429
|
+
*
|
|
1430
|
+
* @example Simple pass/fail
|
|
1431
|
+
* ```typescript
|
|
1432
|
+
* import { defineAssertion } from '@agentv/eval';
|
|
1433
|
+
*
|
|
1434
|
+
* export default defineAssertion(({ answer }) => ({
|
|
1435
|
+
* pass: answer.toLowerCase().includes('hello'),
|
|
1436
|
+
* reasoning: 'Checks for greeting',
|
|
1437
|
+
* }));
|
|
1438
|
+
* ```
|
|
1439
|
+
*
|
|
1440
|
+
* @example Granular scoring
|
|
1441
|
+
* ```typescript
|
|
1442
|
+
* import { defineAssertion } from '@agentv/eval';
|
|
1443
|
+
*
|
|
1444
|
+
* export default defineAssertion(({ answer, trace }) => {
|
|
1445
|
+
* const hasContent = answer.length > 0 ? 0.5 : 0;
|
|
1446
|
+
* const isEfficient = (trace?.eventCount ?? 0) <= 5 ? 0.5 : 0;
|
|
1447
|
+
* return {
|
|
1448
|
+
* score: hasContent + isEfficient,
|
|
1449
|
+
* hits: [
|
|
1450
|
+
* ...(hasContent ? ['Has content'] : []),
|
|
1451
|
+
* ...(isEfficient ? ['Efficient'] : []),
|
|
1452
|
+
* ],
|
|
1453
|
+
* };
|
|
1454
|
+
* });
|
|
1455
|
+
* ```
|
|
1456
|
+
*/
|
|
1457
|
+
declare function defineAssertion(handler: AssertionHandler): void;
|
|
1120
1458
|
|
|
1121
|
-
export { type CodeJudgeHandler, type CodeJudgeInput, CodeJudgeInputSchema, type CodeJudgeResult, CodeJudgeResultSchema, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type TraceSummary, TraceSummarySchema, createTargetClient, defineCodeJudge, definePromptTemplate };
|
|
1459
|
+
export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeJudgeHandler, type CodeJudgeInput, CodeJudgeInputSchema, type CodeJudgeResult, CodeJudgeResultSchema, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type TraceSummary, TraceSummarySchema, createTargetClient, defineAssertion, defineCodeJudge, definePromptTemplate };
|