@ls-stack/agent-eval 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2043 @@
1
+ import { z } from "zod/v4";
2
+
3
+ //#region ../shared/src/schemas/display.d.ts
4
+ declare const scalarCellSchema: z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean, z.ZodNull]>;
5
+ /** Primitive table cell value supported by the eval UI. */
6
+ type ScalarCell = z.infer<typeof scalarCellSchema>;
7
+ declare const jsonCellSchema: z.ZodType<string | number | boolean | null | Record<string, unknown> | unknown[]>;
8
+ /** JSON-safe value supported by `format: 'json'` columns. */
9
+ type JsonCell = z.infer<typeof jsonCellSchema>;
10
+ declare const repoFileRefSchema: z.ZodObject<{
11
+ source: z.ZodLiteral<"repo">;
12
+ path: z.ZodString;
13
+ mimeType: z.ZodOptional<z.ZodString>;
14
+ }, z.core.$strip>;
15
+ /** Reference to a file that lives in the authored workspace. */
16
+ type RepoFileRef = z.infer<typeof repoFileRefSchema>;
17
+ declare const runArtifactRefSchema: z.ZodObject<{
18
+ source: z.ZodLiteral<"run">;
19
+ artifactId: z.ZodString;
20
+ mimeType: z.ZodString;
21
+ fileName: z.ZodOptional<z.ZodString>;
22
+ }, z.core.$strip>;
23
+ /** Reference to a generated artifact stored under a specific run. */
24
+ type RunArtifactRef = z.infer<typeof runArtifactRefSchema>;
25
+ declare const fileRefSchema: z.ZodUnion<readonly [z.ZodObject<{
26
+ source: z.ZodLiteral<"repo">;
27
+ path: z.ZodString;
28
+ mimeType: z.ZodOptional<z.ZodString>;
29
+ }, z.core.$strip>, z.ZodObject<{
30
+ source: z.ZodLiteral<"run">;
31
+ artifactId: z.ZodString;
32
+ mimeType: z.ZodString;
33
+ fileName: z.ZodOptional<z.ZodString>;
34
+ }, z.core.$strip>]>;
35
+ /** File reference supported by media and file columns. */
36
+ type FileRef = z.infer<typeof fileRefSchema>;
37
+ /** Numeric presentation options for values rendered with `format: 'number'`. */
38
+ type NumberDisplayOptions = {
39
+ /** Number notation used when rendering the value. */notation?: 'standard' | 'compact'; /** Compact style used when `notation: 'compact'` is enabled. */
40
+ compactDisplay?: 'short' | 'long'; /** String prepended to the rendered number, such as `$`. */
41
+ prefix?: string; /** String appended to the rendered number, such as ` ms`. */
42
+ suffix?: string; /** Fixed number of decimal places to render. */
43
+ decimalPlaces?: number;
44
+ };
45
+ /** Schema for numeric presentation options used by number-formatted values. */
46
+ declare const numberDisplayOptionsSchema: z.ZodType<NumberDisplayOptions>;
47
+ /** Schema for the supported column rendering kinds in list views. */
48
+ declare const columnKindSchema: z.ZodEnum<{
49
+ string: "string";
50
+ number: "number";
51
+ boolean: "boolean";
52
+ }>;
53
+ /** Display kind used by a column definition in the UI. */
54
+ type ColumnKind = z.infer<typeof columnKindSchema>;
55
+ /** Schema for the built-in column formatting presets. */
56
+ declare const columnFormatSchema: z.ZodEnum<{
57
+ number: "number";
58
+ boolean: "boolean";
59
+ file: "file";
60
+ markdown: "markdown";
61
+ json: "json";
62
+ image: "image";
63
+ audio: "audio";
64
+ video: "video";
65
+ duration: "duration";
66
+ percent: "percent";
67
+ passFail: "passFail";
68
+ stars: "stars";
69
+ }>;
70
+ /** Formatting preset applied to a column value in the UI. */
71
+ type ColumnFormat = z.infer<typeof columnFormatSchema>;
72
+ /** Schema describing a rendered column in the eval results table. */
73
+ declare const columnDefSchema: z.ZodObject<{
74
+ key: z.ZodString;
75
+ label: z.ZodString;
76
+ kind: z.ZodEnum<{
77
+ string: "string";
78
+ number: "number";
79
+ boolean: "boolean";
80
+ }>;
81
+ format: z.ZodOptional<z.ZodEnum<{
82
+ number: "number";
83
+ boolean: "boolean";
84
+ file: "file";
85
+ markdown: "markdown";
86
+ json: "json";
87
+ image: "image";
88
+ audio: "audio";
89
+ video: "video";
90
+ duration: "duration";
91
+ percent: "percent";
92
+ passFail: "passFail";
93
+ stars: "stars";
94
+ }>>;
95
+ numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
96
+ isScore: z.ZodOptional<z.ZodBoolean>;
97
+ isManualScore: z.ZodOptional<z.ZodBoolean>;
98
+ passThreshold: z.ZodOptional<z.ZodNumber>;
99
+ maxStars: z.ZodOptional<z.ZodNumber>;
100
+ hideInTable: z.ZodOptional<z.ZodBoolean>;
101
+ sortable: z.ZodOptional<z.ZodBoolean>;
102
+ align: z.ZodOptional<z.ZodEnum<{
103
+ left: "left";
104
+ center: "center";
105
+ right: "right";
106
+ }>>;
107
+ }, z.core.$strip>;
108
+ /** Column definition exposed to the UI for eval and case tables. */
109
+ type ColumnDef = z.infer<typeof columnDefSchema>;
110
+ /** Schema for any supported value that can populate a table cell. */
111
+ declare const cellValueSchema: z.ZodUnion<readonly [z.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z.ZodUnion<readonly [z.ZodObject<{
112
+ source: z.ZodLiteral<"repo">;
113
+ path: z.ZodString;
114
+ mimeType: z.ZodOptional<z.ZodString>;
115
+ }, z.core.$strip>, z.ZodObject<{
116
+ source: z.ZodLiteral<"run">;
117
+ artifactId: z.ZodString;
118
+ mimeType: z.ZodString;
119
+ fileName: z.ZodOptional<z.ZodString>;
120
+ }, z.core.$strip>]>]>;
121
+ /** Value stored in a rendered eval result table cell. */
122
+ type CellValue = z.infer<typeof cellValueSchema>;
123
+ //#endregion
124
+ //#region ../shared/src/schemas/trace.d.ts
125
+ /** Schema for the semantic categories used to classify trace spans. */
126
+ declare const traceSpanKindSchema: z.ZodEnum<{
127
+ eval: "eval";
128
+ agent: "agent";
129
+ llm: "llm";
130
+ tool: "tool";
131
+ retrieval: "retrieval";
132
+ scorer: "scorer";
133
+ checkpoint: "checkpoint";
134
+ custom: "custom";
135
+ }>;
136
+ /** Semantic category used to classify a trace span in the UI. */
137
+ type TraceSpanKind = z.infer<typeof traceSpanKindSchema>;
138
+ /** Schema for the supported presentation formats of trace attributes. */
139
+ declare const traceAttributeDisplayFormatSchema: z.ZodEnum<{
140
+ string: "string";
141
+ number: "number";
142
+ duration: "duration";
143
+ json: "json";
144
+ }>;
145
+ /**
146
+ * Formatting hint for trace attribute values rendered by the UI.
147
+ *
148
+ * This affects presentation only and does not change the stored value.
149
+ */
150
+ type TraceAttributeDisplayFormat = z.infer<typeof traceAttributeDisplayFormatSchema>;
151
+ /** Schema for the UI locations where a trace attribute can appear. */
152
+ declare const traceAttributeDisplayPlacementSchema: z.ZodEnum<{
153
+ tree: "tree";
154
+ detail: "detail";
155
+ section: "section";
156
+ }>;
157
+ /** UI locations where a trace attribute may be rendered. */
158
+ type TraceAttributeDisplayPlacement = z.infer<typeof traceAttributeDisplayPlacementSchema>;
159
+ /** Schema for resolved trace display rules sent to the UI. */
160
+ declare const traceAttributeDisplaySchema: z.ZodObject<{
161
+ key: z.ZodOptional<z.ZodString>;
162
+ path: z.ZodString;
163
+ label: z.ZodOptional<z.ZodString>;
164
+ format: z.ZodOptional<z.ZodEnum<{
165
+ string: "string";
166
+ number: "number";
167
+ duration: "duration";
168
+ json: "json";
169
+ }>>;
170
+ numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
171
+ placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
172
+ tree: "tree";
173
+ detail: "detail";
174
+ section: "section";
175
+ }>>>;
176
+ scope: z.ZodOptional<z.ZodEnum<{
177
+ self: "self";
178
+ subtree: "subtree";
179
+ }>>;
180
+ mode: z.ZodOptional<z.ZodEnum<{
181
+ all: "all";
182
+ last: "last";
183
+ sum: "sum";
184
+ }>>;
185
+ }, z.core.$strip>;
186
+ /**
187
+ * Resolved trace display rule consumed by the UI.
188
+ *
189
+ * `path` points at the attribute to render on each span. `scope` and `mode`
190
+ * control whether the value comes from the current span only or from the full
191
+ * subtree, and how multiple matches are combined.
192
+ */
193
+ type TraceAttributeDisplay = z.infer<typeof traceAttributeDisplaySchema>;
194
+ /** Schema for trace display config after transforms have been resolved. */
195
+ declare const traceDisplayConfigSchema: z.ZodObject<{
196
+ attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
197
+ key: z.ZodOptional<z.ZodString>;
198
+ path: z.ZodString;
199
+ label: z.ZodOptional<z.ZodString>;
200
+ format: z.ZodOptional<z.ZodEnum<{
201
+ string: "string";
202
+ number: "number";
203
+ duration: "duration";
204
+ json: "json";
205
+ }>>;
206
+ numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
207
+ placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
208
+ tree: "tree";
209
+ detail: "detail";
210
+ section: "section";
211
+ }>>>;
212
+ scope: z.ZodOptional<z.ZodEnum<{
213
+ self: "self";
214
+ subtree: "subtree";
215
+ }>>;
216
+ mode: z.ZodOptional<z.ZodEnum<{
217
+ all: "all";
218
+ last: "last";
219
+ sum: "sum";
220
+ }>>;
221
+ }, z.core.$strip>>>;
222
+ }, z.core.$strip>;
223
+ /** UI-ready trace display configuration attached to case details. */
224
+ type TraceDisplayConfig = z.infer<typeof traceDisplayConfigSchema>;
225
+ /** Context passed to a `traceDisplay` transform while resolving a span value. */
226
+ type TraceAttributeTransformContext = {
227
+ value: unknown;
228
+ span: EvalTraceSpan;
229
+ };
230
+ /**
231
+ * Runner-side transform used to derive a display value from a raw trace
232
+ * attribute.
233
+ */
234
+ type TraceAttributeTransform = (ctx: TraceAttributeTransformContext) => unknown;
235
+ /** Schema for authored trace display rules accepted from user config. */
236
+ declare const traceAttributeDisplayInputSchema: z.ZodObject<{
237
+ key: z.ZodOptional<z.ZodString>;
238
+ path: z.ZodString;
239
+ label: z.ZodOptional<z.ZodString>;
240
+ format: z.ZodOptional<z.ZodEnum<{
241
+ string: "string";
242
+ number: "number";
243
+ duration: "duration";
244
+ json: "json";
245
+ }>>;
246
+ numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
247
+ placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
248
+ tree: "tree";
249
+ detail: "detail";
250
+ section: "section";
251
+ }>>>;
252
+ scope: z.ZodOptional<z.ZodEnum<{
253
+ self: "self";
254
+ subtree: "subtree";
255
+ }>>;
256
+ mode: z.ZodOptional<z.ZodEnum<{
257
+ all: "all";
258
+ last: "last";
259
+ sum: "sum";
260
+ }>>;
261
+ transform: z.ZodOptional<z.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
262
+ }, z.core.$strip>;
263
+ /**
264
+ * Authored trace display rule accepted in eval definitions and config files.
265
+ *
266
+ * `key` allows the same source `path` to be displayed multiple ways, such as
267
+ * raw and compact views of a single token count. `numberFormat` customizes
268
+ * `format: 'number'` values. `transform` runs in the
269
+ * runner before the UI receives the resolved trace payload.
270
+ */
271
+ type TraceAttributeDisplayInput = z.infer<typeof traceAttributeDisplayInputSchema>;
272
+ /** Schema for authored trace display config in eval or workspace config. */
273
+ declare const traceDisplayInputConfigSchema: z.ZodObject<{
274
+ attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
275
+ key: z.ZodOptional<z.ZodString>;
276
+ path: z.ZodString;
277
+ label: z.ZodOptional<z.ZodString>;
278
+ format: z.ZodOptional<z.ZodEnum<{
279
+ string: "string";
280
+ number: "number";
281
+ duration: "duration";
282
+ json: "json";
283
+ }>>;
284
+ numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
285
+ placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
286
+ tree: "tree";
287
+ detail: "detail";
288
+ section: "section";
289
+ }>>>;
290
+ scope: z.ZodOptional<z.ZodEnum<{
291
+ self: "self";
292
+ subtree: "subtree";
293
+ }>>;
294
+ mode: z.ZodOptional<z.ZodEnum<{
295
+ all: "all";
296
+ last: "last";
297
+ sum: "sum";
298
+ }>>;
299
+ transform: z.ZodOptional<z.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
300
+ }, z.core.$strip>>>;
301
+ }, z.core.$strip>;
302
+ /** Trace display configuration authored by users in config or eval files. */
303
+ type TraceDisplayInputConfig = z.infer<typeof traceDisplayInputConfigSchema>;
304
+ /** Schema for a persisted trace span captured during case execution. */
305
+ declare const traceSpanSchema: z.ZodObject<{
306
+ id: z.ZodString;
307
+ parentId: z.ZodNullable<z.ZodString>;
308
+ caseId: z.ZodString;
309
+ kind: z.ZodEnum<{
310
+ eval: "eval";
311
+ agent: "agent";
312
+ llm: "llm";
313
+ tool: "tool";
314
+ retrieval: "retrieval";
315
+ scorer: "scorer";
316
+ checkpoint: "checkpoint";
317
+ custom: "custom";
318
+ }>;
319
+ name: z.ZodString;
320
+ startedAt: z.ZodString;
321
+ endedAt: z.ZodNullable<z.ZodString>;
322
+ status: z.ZodEnum<{
323
+ error: "error";
324
+ running: "running";
325
+ ok: "ok";
326
+ cancelled: "cancelled";
327
+ }>;
328
+ attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
329
+ error: z.ZodOptional<z.ZodObject<{
330
+ name: z.ZodOptional<z.ZodString>;
331
+ message: z.ZodString;
332
+ stack: z.ZodOptional<z.ZodString>;
333
+ }, z.core.$strip>>;
334
+ }, z.core.$strip>;
335
+ /** Persisted trace span shape stored for each eval case run. */
336
+ type EvalTraceSpan = z.infer<typeof traceSpanSchema>;
337
+ //#endregion
338
+ //#region ../shared/src/schemas/eval.d.ts
339
+ /** Freshness signal derived from the latest relevant run plus git state. */
340
+ declare const evalFreshnessStatusSchema: z.ZodEnum<{
341
+ fresh: "fresh";
342
+ stale: "stale";
343
+ outdated: "outdated";
344
+ }>;
345
+ /** Freshness signal derived from the latest relevant run plus git state. */
346
+ type EvalFreshnessStatus = z.infer<typeof evalFreshnessStatusSchema>;
347
+ /** Reducer used to collapse a column's per-case values into a single stat. */
348
+ declare const evalStatAggregateSchema: z.ZodEnum<{
349
+ avg: "avg";
350
+ min: "min";
351
+ max: "max";
352
+ sum: "sum";
353
+ last: "last";
354
+ }>;
355
+ /** Reducer used to collapse a column's per-case values into a single stat. */
356
+ type EvalStatAggregate = z.infer<typeof evalStatAggregateSchema>;
357
+ /**
358
+ * One entry in the EvalCard stats row. Built-in kinds use latest run totals;
359
+ * `column` aggregates a score or numeric output column across the latest run.
360
+ */
361
+ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
362
+ kind: z.ZodLiteral<"cases">;
363
+ }, z.core.$strip>, z.ZodObject<{
364
+ kind: z.ZodLiteral<"passRate">;
365
+ accent: z.ZodOptional<z.ZodBoolean>;
366
+ }, z.core.$strip>, z.ZodObject<{
367
+ kind: z.ZodLiteral<"duration">;
368
+ }, z.core.$strip>, z.ZodObject<{
369
+ kind: z.ZodLiteral<"column">;
370
+ key: z.ZodString;
371
+ label: z.ZodOptional<z.ZodString>;
372
+ aggregate: z.ZodEnum<{
373
+ avg: "avg";
374
+ min: "min";
375
+ max: "max";
376
+ sum: "sum";
377
+ last: "last";
378
+ }>;
379
+ format: z.ZodOptional<z.ZodEnum<{
380
+ number: "number";
381
+ boolean: "boolean";
382
+ file: "file";
383
+ duration: "duration";
384
+ markdown: "markdown";
385
+ json: "json";
386
+ image: "image";
387
+ audio: "audio";
388
+ video: "video";
389
+ percent: "percent";
390
+ passFail: "passFail";
391
+ stars: "stars";
392
+ }>>;
393
+ accent: z.ZodOptional<z.ZodBoolean>;
394
+ }, z.core.$strip>], "kind">;
395
+ /** Single stat rendered in the EvalCard stats row. */
396
+ type EvalStatItem = z.infer<typeof evalStatItemSchema>;
397
+ /** Ordered list of stats rendered in the EvalCard stats row. */
398
+ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
399
+ kind: z.ZodLiteral<"cases">;
400
+ }, z.core.$strip>, z.ZodObject<{
401
+ kind: z.ZodLiteral<"passRate">;
402
+ accent: z.ZodOptional<z.ZodBoolean>;
403
+ }, z.core.$strip>, z.ZodObject<{
404
+ kind: z.ZodLiteral<"duration">;
405
+ }, z.core.$strip>, z.ZodObject<{
406
+ kind: z.ZodLiteral<"column">;
407
+ key: z.ZodString;
408
+ label: z.ZodOptional<z.ZodString>;
409
+ aggregate: z.ZodEnum<{
410
+ avg: "avg";
411
+ min: "min";
412
+ max: "max";
413
+ sum: "sum";
414
+ last: "last";
415
+ }>;
416
+ format: z.ZodOptional<z.ZodEnum<{
417
+ number: "number";
418
+ boolean: "boolean";
419
+ file: "file";
420
+ duration: "duration";
421
+ markdown: "markdown";
422
+ json: "json";
423
+ image: "image";
424
+ audio: "audio";
425
+ video: "video";
426
+ percent: "percent";
427
+ passFail: "passFail";
428
+ stars: "stars";
429
+ }>>;
430
+ accent: z.ZodOptional<z.ZodBoolean>;
431
+ }, z.core.$strip>], "kind">>;
432
+ /** Ordered list of stats rendered in the EvalCard stats row. */
433
+ type EvalStatsConfig = z.infer<typeof evalStatsConfigSchema>;
434
+ /** Schema summarizing a discovered eval for list and overview screens. */
435
+ declare const evalSummarySchema: z.ZodObject<{
436
+ id: z.ZodString;
437
+ title: z.ZodOptional<z.ZodString>;
438
+ filePath: z.ZodString;
439
+ stale: z.ZodBoolean;
440
+ outdated: z.ZodBoolean;
441
+ freshnessStatus: z.ZodEnum<{
442
+ fresh: "fresh";
443
+ stale: "stale";
444
+ outdated: "outdated";
445
+ }>;
446
+ latestRunAt: z.ZodNullable<z.ZodString>;
447
+ latestRunCommitSha: z.ZodNullable<z.ZodString>;
448
+ currentCommitSha: z.ZodNullable<z.ZodString>;
449
+ columnDefs: z.ZodArray<z.ZodObject<{
450
+ key: z.ZodString;
451
+ label: z.ZodString;
452
+ kind: z.ZodEnum<{
453
+ string: "string";
454
+ number: "number";
455
+ boolean: "boolean";
456
+ }>;
457
+ format: z.ZodOptional<z.ZodEnum<{
458
+ number: "number";
459
+ boolean: "boolean";
460
+ file: "file";
461
+ duration: "duration";
462
+ markdown: "markdown";
463
+ json: "json";
464
+ image: "image";
465
+ audio: "audio";
466
+ video: "video";
467
+ percent: "percent";
468
+ passFail: "passFail";
469
+ stars: "stars";
470
+ }>>;
471
+ numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
472
+ isScore: z.ZodOptional<z.ZodBoolean>;
473
+ isManualScore: z.ZodOptional<z.ZodBoolean>;
474
+ passThreshold: z.ZodOptional<z.ZodNumber>;
475
+ maxStars: z.ZodOptional<z.ZodNumber>;
476
+ hideInTable: z.ZodOptional<z.ZodBoolean>;
477
+ sortable: z.ZodOptional<z.ZodBoolean>;
478
+ align: z.ZodOptional<z.ZodEnum<{
479
+ left: "left";
480
+ center: "center";
481
+ right: "right";
482
+ }>>;
483
+ }, z.core.$strip>>;
484
+ caseCount: z.ZodNullable<z.ZodNumber>;
485
+ lastRunStatus: z.ZodNullable<z.ZodEnum<{
486
+ error: "error";
487
+ pass: "pass";
488
+ fail: "fail";
489
+ running: "running";
490
+ cancelled: "cancelled";
491
+ unscored: "unscored";
492
+ }>>;
493
+ stats: z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
494
+ kind: z.ZodLiteral<"cases">;
495
+ }, z.core.$strip>, z.ZodObject<{
496
+ kind: z.ZodLiteral<"passRate">;
497
+ accent: z.ZodOptional<z.ZodBoolean>;
498
+ }, z.core.$strip>, z.ZodObject<{
499
+ kind: z.ZodLiteral<"duration">;
500
+ }, z.core.$strip>, z.ZodObject<{
501
+ kind: z.ZodLiteral<"column">;
502
+ key: z.ZodString;
503
+ label: z.ZodOptional<z.ZodString>;
504
+ aggregate: z.ZodEnum<{
505
+ avg: "avg";
506
+ min: "min";
507
+ max: "max";
508
+ sum: "sum";
509
+ last: "last";
510
+ }>;
511
+ format: z.ZodOptional<z.ZodEnum<{
512
+ number: "number";
513
+ boolean: "boolean";
514
+ file: "file";
515
+ duration: "duration";
516
+ markdown: "markdown";
517
+ json: "json";
518
+ image: "image";
519
+ audio: "audio";
520
+ video: "video";
521
+ percent: "percent";
522
+ passFail: "passFail";
523
+ stars: "stars";
524
+ }>>;
525
+ accent: z.ZodOptional<z.ZodBoolean>;
526
+ }, z.core.$strip>], "kind">>>;
527
+ charts: z.ZodOptional<z.ZodArray<z.ZodObject<{
528
+ heading: z.ZodOptional<z.ZodString>;
529
+ type: z.ZodEnum<{
530
+ area: "area";
531
+ line: "line";
532
+ bar: "bar";
533
+ }>;
534
+ metrics: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
535
+ source: z.ZodLiteral<"builtin">;
536
+ metric: z.ZodEnum<{
537
+ passRate: "passRate";
538
+ durationMs: "durationMs";
539
+ }>;
540
+ label: z.ZodOptional<z.ZodString>;
541
+ color: z.ZodOptional<z.ZodEnum<{
542
+ success: "success";
543
+ accent: "accent";
544
+ error: "error";
545
+ accentDim: "accentDim";
546
+ warning: "warning";
547
+ textMuted: "textMuted";
548
+ }>>;
549
+ axis: z.ZodOptional<z.ZodEnum<{
550
+ left: "left";
551
+ right: "right";
552
+ }>>;
553
+ }, z.core.$strip>, z.ZodObject<{
554
+ source: z.ZodLiteral<"column">;
555
+ key: z.ZodString;
556
+ aggregate: z.ZodEnum<{
557
+ avg: "avg";
558
+ min: "min";
559
+ max: "max";
560
+ sum: "sum";
561
+ latest: "latest";
562
+ passThresholdRate: "passThresholdRate";
563
+ }>;
564
+ label: z.ZodOptional<z.ZodString>;
565
+ color: z.ZodOptional<z.ZodEnum<{
566
+ success: "success";
567
+ accent: "accent";
568
+ error: "error";
569
+ accentDim: "accentDim";
570
+ warning: "warning";
571
+ textMuted: "textMuted";
572
+ }>>;
573
+ axis: z.ZodOptional<z.ZodEnum<{
574
+ left: "left";
575
+ right: "right";
576
+ }>>;
577
+ }, z.core.$strip>], "source">>;
578
+ yDomain: z.ZodOptional<z.ZodObject<{
579
+ left: z.ZodOptional<z.ZodObject<{
580
+ min: z.ZodOptional<z.ZodNumber>;
581
+ max: z.ZodOptional<z.ZodNumber>;
582
+ }, z.core.$strip>>;
583
+ right: z.ZodOptional<z.ZodObject<{
584
+ min: z.ZodOptional<z.ZodNumber>;
585
+ max: z.ZodOptional<z.ZodNumber>;
586
+ }, z.core.$strip>>;
587
+ }, z.core.$strip>>;
588
+ tooltipExtras: z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
589
+ source: z.ZodLiteral<"builtin">;
590
+ metric: z.ZodEnum<{
591
+ passRate: "passRate";
592
+ durationMs: "durationMs";
593
+ }>;
594
+ label: z.ZodOptional<z.ZodString>;
595
+ }, z.core.$strip>, z.ZodObject<{
596
+ source: z.ZodLiteral<"column">;
597
+ key: z.ZodString;
598
+ aggregate: z.ZodEnum<{
599
+ avg: "avg";
600
+ min: "min";
601
+ max: "max";
602
+ sum: "sum";
603
+ latest: "latest";
604
+ passThresholdRate: "passThresholdRate";
605
+ }>;
606
+ label: z.ZodOptional<z.ZodString>;
607
+ }, z.core.$strip>], "source">>>;
608
+ }, z.core.$strip>>>;
609
+ }, z.core.$strip>;
610
+ /** Metadata shown for one discovered eval in the explorer UI. */
611
+ type EvalSummary = z.infer<typeof evalSummarySchema>;
612
+ /** Schema for one case row in an eval run result table. */
613
+ declare const caseRowSchema: z.ZodObject<{
614
+ caseId: z.ZodString;
615
+ evalId: z.ZodString;
616
+ status: z.ZodEnum<{
617
+ error: "error";
618
+ pass: "pass";
619
+ fail: "fail";
620
+ running: "running";
621
+ cancelled: "cancelled";
622
+ pending: "pending";
623
+ }>;
624
+ latencyMs: z.ZodNullable<z.ZodNumber>;
625
+ costUsd: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
626
+ columns: z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z.ZodUnion<readonly [z.ZodObject<{
627
+ source: z.ZodLiteral<"repo">;
628
+ path: z.ZodString;
629
+ mimeType: z.ZodOptional<z.ZodString>;
630
+ }, z.core.$strip>, z.ZodObject<{
631
+ source: z.ZodLiteral<"run">;
632
+ artifactId: z.ZodString;
633
+ mimeType: z.ZodString;
634
+ fileName: z.ZodOptional<z.ZodString>;
635
+ }, z.core.$strip>]>]>>;
636
+ trial: z.ZodNumber;
637
+ }, z.core.$strip>;
638
+ /** Flattened per-case row rendered in run tables and streamed updates. */
639
+ type CaseRow = z.infer<typeof caseRowSchema>;
640
+ /** Structured assertion failure metadata captured for one case run. */
641
+ declare const assertionFailureSchema: z.ZodObject<{
642
+ message: z.ZodString;
643
+ stack: z.ZodOptional<z.ZodString>;
644
+ }, z.core.$strip>;
645
+ /** Assertion failure metadata captured for one case run. */
646
+ type AssertionFailure = z.infer<typeof assertionFailureSchema>;
647
+ /** Trace payload captured while computing one score for a case. */
648
+ declare const scoreTraceSchema: z.ZodObject<{
649
+ trace: z.ZodArray<z.ZodObject<{
650
+ id: z.ZodString;
651
+ parentId: z.ZodNullable<z.ZodString>;
652
+ caseId: z.ZodString;
653
+ kind: z.ZodEnum<{
654
+ custom: "custom";
655
+ eval: "eval";
656
+ agent: "agent";
657
+ llm: "llm";
658
+ tool: "tool";
659
+ retrieval: "retrieval";
660
+ scorer: "scorer";
661
+ checkpoint: "checkpoint";
662
+ }>;
663
+ name: z.ZodString;
664
+ startedAt: z.ZodString;
665
+ endedAt: z.ZodNullable<z.ZodString>;
666
+ status: z.ZodEnum<{
667
+ error: "error";
668
+ running: "running";
669
+ cancelled: "cancelled";
670
+ ok: "ok";
671
+ }>;
672
+ attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
673
+ error: z.ZodOptional<z.ZodObject<{
674
+ name: z.ZodOptional<z.ZodString>;
675
+ message: z.ZodString;
676
+ stack: z.ZodOptional<z.ZodString>;
677
+ }, z.core.$strip>>;
678
+ }, z.core.$strip>>;
679
+ traceDisplay: z.ZodObject<{
680
+ attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
681
+ key: z.ZodOptional<z.ZodString>;
682
+ path: z.ZodString;
683
+ label: z.ZodOptional<z.ZodString>;
684
+ format: z.ZodOptional<z.ZodEnum<{
685
+ string: "string";
686
+ number: "number";
687
+ duration: "duration";
688
+ json: "json";
689
+ }>>;
690
+ numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
691
+ placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
692
+ tree: "tree";
693
+ detail: "detail";
694
+ section: "section";
695
+ }>>>;
696
+ scope: z.ZodOptional<z.ZodEnum<{
697
+ self: "self";
698
+ subtree: "subtree";
699
+ }>>;
700
+ mode: z.ZodOptional<z.ZodEnum<{
701
+ sum: "sum";
702
+ last: "last";
703
+ all: "all";
704
+ }>>;
705
+ }, z.core.$strip>>>;
706
+ }, z.core.$strip>;
707
+ }, z.core.$strip>;
708
+ /** Trace payload captured while computing one score for a case. */
709
+ type ScoreTrace = z.infer<typeof scoreTraceSchema>;
710
+ /** Schema for the detailed payload shown when opening a specific case. */
711
+ declare const caseDetailSchema: z.ZodObject<{
712
+ caseId: z.ZodString;
713
+ evalId: z.ZodString;
714
+ status: z.ZodEnum<{
715
+ error: "error";
716
+ pass: "pass";
717
+ fail: "fail";
718
+ running: "running";
719
+ cancelled: "cancelled";
720
+ pending: "pending";
721
+ }>;
722
+ input: z.ZodUnknown;
723
+ trace: z.ZodArray<z.ZodObject<{
724
+ id: z.ZodString;
725
+ parentId: z.ZodNullable<z.ZodString>;
726
+ caseId: z.ZodString;
727
+ kind: z.ZodEnum<{
728
+ custom: "custom";
729
+ eval: "eval";
730
+ agent: "agent";
731
+ llm: "llm";
732
+ tool: "tool";
733
+ retrieval: "retrieval";
734
+ scorer: "scorer";
735
+ checkpoint: "checkpoint";
736
+ }>;
737
+ name: z.ZodString;
738
+ startedAt: z.ZodString;
739
+ endedAt: z.ZodNullable<z.ZodString>;
740
+ status: z.ZodEnum<{
741
+ error: "error";
742
+ running: "running";
743
+ cancelled: "cancelled";
744
+ ok: "ok";
745
+ }>;
746
+ attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
747
+ error: z.ZodOptional<z.ZodObject<{
748
+ name: z.ZodOptional<z.ZodString>;
749
+ message: z.ZodString;
750
+ stack: z.ZodOptional<z.ZodString>;
751
+ }, z.core.$strip>>;
752
+ }, z.core.$strip>>;
753
+ traceDisplay: z.ZodObject<{
754
+ attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
755
+ key: z.ZodOptional<z.ZodString>;
756
+ path: z.ZodString;
757
+ label: z.ZodOptional<z.ZodString>;
758
+ format: z.ZodOptional<z.ZodEnum<{
759
+ string: "string";
760
+ number: "number";
761
+ duration: "duration";
762
+ json: "json";
763
+ }>>;
764
+ numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
765
+ placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
766
+ tree: "tree";
767
+ detail: "detail";
768
+ section: "section";
769
+ }>>>;
770
+ scope: z.ZodOptional<z.ZodEnum<{
771
+ self: "self";
772
+ subtree: "subtree";
773
+ }>>;
774
+ mode: z.ZodOptional<z.ZodEnum<{
775
+ sum: "sum";
776
+ last: "last";
777
+ all: "all";
778
+ }>>;
779
+ }, z.core.$strip>>>;
780
+ }, z.core.$strip>;
781
+ scoringTraces: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
782
+ trace: z.ZodArray<z.ZodObject<{
783
+ id: z.ZodString;
784
+ parentId: z.ZodNullable<z.ZodString>;
785
+ caseId: z.ZodString;
786
+ kind: z.ZodEnum<{
787
+ custom: "custom";
788
+ eval: "eval";
789
+ agent: "agent";
790
+ llm: "llm";
791
+ tool: "tool";
792
+ retrieval: "retrieval";
793
+ scorer: "scorer";
794
+ checkpoint: "checkpoint";
795
+ }>;
796
+ name: z.ZodString;
797
+ startedAt: z.ZodString;
798
+ endedAt: z.ZodNullable<z.ZodString>;
799
+ status: z.ZodEnum<{
800
+ error: "error";
801
+ running: "running";
802
+ cancelled: "cancelled";
803
+ ok: "ok";
804
+ }>;
805
+ attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
806
+ error: z.ZodOptional<z.ZodObject<{
807
+ name: z.ZodOptional<z.ZodString>;
808
+ message: z.ZodString;
809
+ stack: z.ZodOptional<z.ZodString>;
810
+ }, z.core.$strip>>;
811
+ }, z.core.$strip>>;
812
+ traceDisplay: z.ZodObject<{
813
+ attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
814
+ key: z.ZodOptional<z.ZodString>;
815
+ path: z.ZodString;
816
+ label: z.ZodOptional<z.ZodString>;
817
+ format: z.ZodOptional<z.ZodEnum<{
818
+ string: "string";
819
+ number: "number";
820
+ duration: "duration";
821
+ json: "json";
822
+ }>>;
823
+ numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
824
+ placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
825
+ tree: "tree";
826
+ detail: "detail";
827
+ section: "section";
828
+ }>>>;
829
+ scope: z.ZodOptional<z.ZodEnum<{
830
+ self: "self";
831
+ subtree: "subtree";
832
+ }>>;
833
+ mode: z.ZodOptional<z.ZodEnum<{
834
+ sum: "sum";
835
+ last: "last";
836
+ all: "all";
837
+ }>>;
838
+ }, z.core.$strip>>>;
839
+ }, z.core.$strip>;
840
+ }, z.core.$strip>>>;
841
+ columns: z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z.ZodUnion<readonly [z.ZodObject<{
842
+ source: z.ZodLiteral<"repo">;
843
+ path: z.ZodString;
844
+ mimeType: z.ZodOptional<z.ZodString>;
845
+ }, z.core.$strip>, z.ZodObject<{
846
+ source: z.ZodLiteral<"run">;
847
+ artifactId: z.ZodString;
848
+ mimeType: z.ZodString;
849
+ fileName: z.ZodOptional<z.ZodString>;
850
+ }, z.core.$strip>]>]>>;
851
+ assertionFailures: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
852
+ message: z.ZodString;
853
+ stack: z.ZodOptional<z.ZodString>;
854
+ }, z.core.$strip>, z.ZodPipe<z.ZodString, z.ZodTransform<{
855
+ message: string;
856
+ stack?: string | undefined;
857
+ }, string>>]>>;
858
+ error: z.ZodNullable<z.ZodObject<{
859
+ name: z.ZodOptional<z.ZodString>;
860
+ message: z.ZodString;
861
+ stack: z.ZodOptional<z.ZodString>;
862
+ }, z.core.$strip>>;
863
+ trial: z.ZodNumber;
864
+ }, z.core.$strip>;
865
+ /** Full case payload including inputs, trace, outputs, and failures. */
866
+ type CaseDetail = z.infer<typeof caseDetailSchema>;
867
+ //#endregion
868
+ //#region ../shared/src/schemas/chart.d.ts
869
+ /** Chart type rendered for a single eval history chart. */
870
+ declare const evalChartTypeSchema: z.ZodEnum<{
871
+ area: "area";
872
+ line: "line";
873
+ bar: "bar";
874
+ }>;
875
+ /** Chart type rendered for a single eval history chart. */
876
+ type EvalChartType = z.infer<typeof evalChartTypeSchema>;
877
+ /**
878
+ * Run-level metric sourced from the aggregated `RunSummary` for a run, rather
879
+ * than from a per-case column.
880
+ */
881
+ declare const evalChartBuiltinMetricSchema: z.ZodEnum<{
882
+ passRate: "passRate";
883
+ durationMs: "durationMs";
884
+ }>;
885
+ /**
886
+ * Run-level metric sourced from the aggregated `RunSummary` for a run, rather
887
+ * than from a per-case column.
888
+ */
889
+ type EvalChartBuiltinMetric = z.infer<typeof evalChartBuiltinMetricSchema>;
890
+ /** Reducer applied to a numeric column across all cases of a single run. */
891
+ declare const evalChartAggregateSchema: z.ZodEnum<{
892
+ avg: "avg";
893
+ sum: "sum";
894
+ min: "min";
895
+ max: "max";
896
+ latest: "latest";
897
+ passThresholdRate: "passThresholdRate";
898
+ }>;
899
+ /** Reducer applied to a numeric column across all cases of a single run. */
900
+ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
901
+ /**
902
+ * Semantic color token resolved to a theme color by the web UI. The SDK does
903
+ * not emit raw hex so authored evals stay decoupled from the web theme.
904
+ */
905
+ declare const evalChartColorSchema: z.ZodEnum<{
906
+ accent: "accent";
907
+ accentDim: "accentDim";
908
+ success: "success";
909
+ error: "error";
910
+ warning: "warning";
911
+ textMuted: "textMuted";
912
+ }>;
913
+ /** Semantic color token resolved to a theme color by the web UI. */
914
+ type EvalChartColor = z.infer<typeof evalChartColorSchema>;
915
+ /** Y-axis placement for a plotted series on a dual-axis chart. */
916
+ declare const evalChartAxisSchema: z.ZodEnum<{
917
+ left: "left";
918
+ right: "right";
919
+ }>;
920
+ /** Y-axis placement for a plotted series on a dual-axis chart. */
921
+ type EvalChartAxis = z.infer<typeof evalChartAxisSchema>;
922
+ /**
923
+ * One plotted series on an eval history chart. `builtin` metrics come from the
924
+ * per-run `RunSummary`; `column` metrics aggregate a per-case score or
925
+ * `setEvalOutput` column across the run using `aggregate`.
926
+ */
927
+ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
928
+ source: z.ZodLiteral<"builtin">;
929
+ metric: z.ZodEnum<{
930
+ passRate: "passRate";
931
+ durationMs: "durationMs";
932
+ }>;
933
+ label: z.ZodOptional<z.ZodString>;
934
+ color: z.ZodOptional<z.ZodEnum<{
935
+ accent: "accent";
936
+ accentDim: "accentDim";
937
+ success: "success";
938
+ error: "error";
939
+ warning: "warning";
940
+ textMuted: "textMuted";
941
+ }>>;
942
+ axis: z.ZodOptional<z.ZodEnum<{
943
+ left: "left";
944
+ right: "right";
945
+ }>>;
946
+ }, z.core.$strip>, z.ZodObject<{
947
+ source: z.ZodLiteral<"column">;
948
+ key: z.ZodString;
949
+ aggregate: z.ZodEnum<{
950
+ avg: "avg";
951
+ sum: "sum";
952
+ min: "min";
953
+ max: "max";
954
+ latest: "latest";
955
+ passThresholdRate: "passThresholdRate";
956
+ }>;
957
+ label: z.ZodOptional<z.ZodString>;
958
+ color: z.ZodOptional<z.ZodEnum<{
959
+ accent: "accent";
960
+ accentDim: "accentDim";
961
+ success: "success";
962
+ error: "error";
963
+ warning: "warning";
964
+ textMuted: "textMuted";
965
+ }>>;
966
+ axis: z.ZodOptional<z.ZodEnum<{
967
+ left: "left";
968
+ right: "right";
969
+ }>>;
970
+ }, z.core.$strip>], "source">;
971
+ /** One plotted series on an eval history chart. */
972
+ type EvalChartMetric = z.infer<typeof evalChartMetricSchema>;
973
+ /** Extra field rendered only in the tooltip, not plotted as a series. */
974
+ declare const evalChartTooltipExtraSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
975
+ source: z.ZodLiteral<"builtin">;
976
+ metric: z.ZodEnum<{
977
+ passRate: "passRate";
978
+ durationMs: "durationMs";
979
+ }>;
980
+ label: z.ZodOptional<z.ZodString>;
981
+ }, z.core.$strip>, z.ZodObject<{
982
+ source: z.ZodLiteral<"column">;
983
+ key: z.ZodString;
984
+ aggregate: z.ZodEnum<{
985
+ avg: "avg";
986
+ sum: "sum";
987
+ min: "min";
988
+ max: "max";
989
+ latest: "latest";
990
+ passThresholdRate: "passThresholdRate";
991
+ }>;
992
+ label: z.ZodOptional<z.ZodString>;
993
+ }, z.core.$strip>], "source">;
994
+ /** Extra field rendered only in the tooltip, not plotted as a series. */
995
+ type EvalChartTooltipExtra = z.infer<typeof evalChartTooltipExtraSchema>;
996
+ /**
997
+ * Authored configuration for one eval history chart rendered in `EvalCard`.
998
+ * Authors declare a list of these via `EvalDefinition.charts` — the UI renders
999
+ * each entry as its own chart frame, stacked in authoring order.
1000
+ */
1001
+ declare const evalChartConfigSchema: z.ZodObject<{
1002
+ heading: z.ZodOptional<z.ZodString>;
1003
+ type: z.ZodEnum<{
1004
+ area: "area";
1005
+ line: "line";
1006
+ bar: "bar";
1007
+ }>;
1008
+ metrics: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
1009
+ source: z.ZodLiteral<"builtin">;
1010
+ metric: z.ZodEnum<{
1011
+ passRate: "passRate";
1012
+ durationMs: "durationMs";
1013
+ }>;
1014
+ label: z.ZodOptional<z.ZodString>;
1015
+ color: z.ZodOptional<z.ZodEnum<{
1016
+ accent: "accent";
1017
+ accentDim: "accentDim";
1018
+ success: "success";
1019
+ error: "error";
1020
+ warning: "warning";
1021
+ textMuted: "textMuted";
1022
+ }>>;
1023
+ axis: z.ZodOptional<z.ZodEnum<{
1024
+ left: "left";
1025
+ right: "right";
1026
+ }>>;
1027
+ }, z.core.$strip>, z.ZodObject<{
1028
+ source: z.ZodLiteral<"column">;
1029
+ key: z.ZodString;
1030
+ aggregate: z.ZodEnum<{
1031
+ avg: "avg";
1032
+ sum: "sum";
1033
+ min: "min";
1034
+ max: "max";
1035
+ latest: "latest";
1036
+ passThresholdRate: "passThresholdRate";
1037
+ }>;
1038
+ label: z.ZodOptional<z.ZodString>;
1039
+ color: z.ZodOptional<z.ZodEnum<{
1040
+ accent: "accent";
1041
+ accentDim: "accentDim";
1042
+ success: "success";
1043
+ error: "error";
1044
+ warning: "warning";
1045
+ textMuted: "textMuted";
1046
+ }>>;
1047
+ axis: z.ZodOptional<z.ZodEnum<{
1048
+ left: "left";
1049
+ right: "right";
1050
+ }>>;
1051
+ }, z.core.$strip>], "source">>;
1052
+ yDomain: z.ZodOptional<z.ZodObject<{
1053
+ left: z.ZodOptional<z.ZodObject<{
1054
+ min: z.ZodOptional<z.ZodNumber>;
1055
+ max: z.ZodOptional<z.ZodNumber>;
1056
+ }, z.core.$strip>>;
1057
+ right: z.ZodOptional<z.ZodObject<{
1058
+ min: z.ZodOptional<z.ZodNumber>;
1059
+ max: z.ZodOptional<z.ZodNumber>;
1060
+ }, z.core.$strip>>;
1061
+ }, z.core.$strip>>;
1062
+ tooltipExtras: z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
1063
+ source: z.ZodLiteral<"builtin">;
1064
+ metric: z.ZodEnum<{
1065
+ passRate: "passRate";
1066
+ durationMs: "durationMs";
1067
+ }>;
1068
+ label: z.ZodOptional<z.ZodString>;
1069
+ }, z.core.$strip>, z.ZodObject<{
1070
+ source: z.ZodLiteral<"column">;
1071
+ key: z.ZodString;
1072
+ aggregate: z.ZodEnum<{
1073
+ avg: "avg";
1074
+ sum: "sum";
1075
+ min: "min";
1076
+ max: "max";
1077
+ latest: "latest";
1078
+ passThresholdRate: "passThresholdRate";
1079
+ }>;
1080
+ label: z.ZodOptional<z.ZodString>;
1081
+ }, z.core.$strip>], "source">>>;
1082
+ }, z.core.$strip>;
1083
+ /** Authored configuration for one eval history chart. */
1084
+ type EvalChartConfig = z.infer<typeof evalChartConfigSchema>;
1085
+ /**
1086
+ * Ordered list of history charts rendered for an eval. Opt-in: when omitted or
1087
+ * empty, the UI renders no history chart at all.
1088
+ */
1089
+ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
1090
+ heading: z.ZodOptional<z.ZodString>;
1091
+ type: z.ZodEnum<{
1092
+ area: "area";
1093
+ line: "line";
1094
+ bar: "bar";
1095
+ }>;
1096
+ metrics: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
1097
+ source: z.ZodLiteral<"builtin">;
1098
+ metric: z.ZodEnum<{
1099
+ passRate: "passRate";
1100
+ durationMs: "durationMs";
1101
+ }>;
1102
+ label: z.ZodOptional<z.ZodString>;
1103
+ color: z.ZodOptional<z.ZodEnum<{
1104
+ accent: "accent";
1105
+ accentDim: "accentDim";
1106
+ success: "success";
1107
+ error: "error";
1108
+ warning: "warning";
1109
+ textMuted: "textMuted";
1110
+ }>>;
1111
+ axis: z.ZodOptional<z.ZodEnum<{
1112
+ left: "left";
1113
+ right: "right";
1114
+ }>>;
1115
+ }, z.core.$strip>, z.ZodObject<{
1116
+ source: z.ZodLiteral<"column">;
1117
+ key: z.ZodString;
1118
+ aggregate: z.ZodEnum<{
1119
+ avg: "avg";
1120
+ sum: "sum";
1121
+ min: "min";
1122
+ max: "max";
1123
+ latest: "latest";
1124
+ passThresholdRate: "passThresholdRate";
1125
+ }>;
1126
+ label: z.ZodOptional<z.ZodString>;
1127
+ color: z.ZodOptional<z.ZodEnum<{
1128
+ accent: "accent";
1129
+ accentDim: "accentDim";
1130
+ success: "success";
1131
+ error: "error";
1132
+ warning: "warning";
1133
+ textMuted: "textMuted";
1134
+ }>>;
1135
+ axis: z.ZodOptional<z.ZodEnum<{
1136
+ left: "left";
1137
+ right: "right";
1138
+ }>>;
1139
+ }, z.core.$strip>], "source">>;
1140
+ yDomain: z.ZodOptional<z.ZodObject<{
1141
+ left: z.ZodOptional<z.ZodObject<{
1142
+ min: z.ZodOptional<z.ZodNumber>;
1143
+ max: z.ZodOptional<z.ZodNumber>;
1144
+ }, z.core.$strip>>;
1145
+ right: z.ZodOptional<z.ZodObject<{
1146
+ min: z.ZodOptional<z.ZodNumber>;
1147
+ max: z.ZodOptional<z.ZodNumber>;
1148
+ }, z.core.$strip>>;
1149
+ }, z.core.$strip>>;
1150
+ tooltipExtras: z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
1151
+ source: z.ZodLiteral<"builtin">;
1152
+ metric: z.ZodEnum<{
1153
+ passRate: "passRate";
1154
+ durationMs: "durationMs";
1155
+ }>;
1156
+ label: z.ZodOptional<z.ZodString>;
1157
+ }, z.core.$strip>, z.ZodObject<{
1158
+ source: z.ZodLiteral<"column">;
1159
+ key: z.ZodString;
1160
+ aggregate: z.ZodEnum<{
1161
+ avg: "avg";
1162
+ sum: "sum";
1163
+ min: "min";
1164
+ max: "max";
1165
+ latest: "latest";
1166
+ passThresholdRate: "passThresholdRate";
1167
+ }>;
1168
+ label: z.ZodOptional<z.ZodString>;
1169
+ }, z.core.$strip>], "source">>>;
1170
+ }, z.core.$strip>>;
1171
+ /** Ordered list of history charts rendered for an eval. */
1172
+ type EvalChartsConfig = z.infer<typeof evalChartsConfigSchema>;
1173
+ //#endregion
1174
+ //#region ../shared/src/schemas/run.d.ts
1175
+ /** Schema for persisted metadata about a single run invocation. */
1176
+ declare const runManifestSchema: z.ZodObject<{
1177
+ id: z.ZodString;
1178
+ shortId: z.ZodString;
1179
+ status: z.ZodEnum<{
1180
+ pending: "pending";
1181
+ running: "running";
1182
+ completed: "completed";
1183
+ cancelled: "cancelled";
1184
+ error: "error";
1185
+ }>;
1186
+ startedAt: z.ZodString;
1187
+ endedAt: z.ZodNullable<z.ZodString>;
1188
+ commitSha: z.ZodDefault<z.ZodOptional<z.ZodNullable<z.ZodString>>>;
1189
+ evalSourceFingerprints: z.ZodDefault<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>>;
1190
+ target: z.ZodObject<{
1191
+ mode: z.ZodEnum<{
1192
+ all: "all";
1193
+ evalIds: "evalIds";
1194
+ caseIds: "caseIds";
1195
+ }>;
1196
+ evalIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
1197
+ caseIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
1198
+ }, z.core.$strip>;
1199
+ trials: z.ZodNumber;
1200
+ trialSelection: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
1201
+ lowestScore: "lowestScore";
1202
+ median: "median";
1203
+ }>>>;
1204
+ cacheMode: z.ZodOptional<z.ZodEnum<{
1205
+ use: "use";
1206
+ bypass: "bypass";
1207
+ refresh: "refresh";
1208
+ }>>;
1209
+ }, z.core.$strip>;
1210
+ /** Persisted lifecycle metadata for a single eval run. */
1211
+ type RunManifest = z.infer<typeof runManifestSchema>;
1212
+ /** Schema for aggregate metrics computed over a completed or active run. */
1213
+ declare const runSummarySchema: z.ZodObject<{
1214
+ runId: z.ZodString;
1215
+ status: z.ZodEnum<{
1216
+ pending: "pending";
1217
+ running: "running";
1218
+ completed: "completed";
1219
+ cancelled: "cancelled";
1220
+ error: "error";
1221
+ }>;
1222
+ totalCases: z.ZodNumber;
1223
+ passedCases: z.ZodNumber;
1224
+ failedCases: z.ZodNumber;
1225
+ errorCases: z.ZodNumber;
1226
+ cancelledCases: z.ZodNumber;
1227
+ totalDurationMs: z.ZodNullable<z.ZodNumber>;
1228
+ errorMessage: z.ZodDefault<z.ZodNullable<z.ZodString>>;
1229
+ }, z.core.$strip>;
1230
+ /** Roll-up statistics for one run. */
1231
+ type RunSummary = z.infer<typeof runSummarySchema>;
1232
+ //#endregion
1233
+ //#region ../shared/src/status.d.ts
1234
+ /**
1235
+ * Canonical derived result status used for aggregated displays and propagation
1236
+ * across case, eval, file, folder, and run result views.
1237
+ */
1238
+ type DerivedStatus = 'pending' | 'running' | 'pass' | 'fail' | 'error' | 'cancelled';
1239
+ /**
1240
+ * Aggregate summary derived from a scoped set of case rows.
1241
+ *
1242
+ * This is intentionally separate from `RunSummary`: it represents a summary
1243
+ * over any slice of case rows, such as a single eval within a run.
1244
+ */
1245
+ type ScopedCaseSummary = {
1246
+ status: DerivedStatus;
1247
+ totalCases: number;
1248
+ passedCases: number;
1249
+ failedCases: number;
1250
+ errorCases: number;
1251
+ cancelledCases: number;
1252
+ pendingCases: number;
1253
+ runningCases: number;
1254
+ totalDurationMs: number | null;
1255
+ };
1256
+ type RunLifecycleStatus = RunManifest['status'] | null | undefined;
1257
+ /**
1258
+ * Derive an aggregate status from child statuses, optionally allowing a raw run
1259
+ * lifecycle status to override active terminal states such as `running`,
1260
+ * `cancelled`, and `error`.
1261
+ */
1262
+ declare function deriveStatusFromChildStatuses(params: {
1263
+ statuses: Iterable<DerivedStatus | null | undefined>;
1264
+ lifecycleStatus?: RunLifecycleStatus;
1265
+ }): DerivedStatus;
1266
+ /**
1267
+ * Derive an aggregate status from a scoped set of case rows.
1268
+ *
1269
+ * Pass `lifecycleStatus` only when the parent scope's raw run lifecycle should
1270
+ * override the derived child result, such as for a whole-run display.
1271
+ */
1272
+ declare function deriveStatusFromCaseRows(params: {
1273
+ caseRows: Iterable<Pick<CaseRow, 'status'>>;
1274
+ lifecycleStatus?: RunLifecycleStatus;
1275
+ }): DerivedStatus;
1276
+ /**
1277
+ * Derive counts, aggregate metrics, and display status from a scoped set of
1278
+ * case rows.
1279
+ */
1280
+ declare function deriveScopedSummaryFromCases(params: {
1281
+ caseRows: Iterable<CaseRow>;
1282
+ lifecycleStatus?: RunLifecycleStatus;
1283
+ }): ScopedCaseSummary;
1284
+ //#endregion
1285
+ //#region ../shared/src/evalStatus.d.ts
1286
+ /** Display status used for eval, file, and folder UI surfaces. */
1287
+ type EvalDisplayStatus = DerivedStatus | 'stale' | 'outdated' | 'unscored';
1288
+ /**
1289
+ * Derive the user-facing eval status from the raw latest run result plus
1290
+ * freshness state.
1291
+ */
1292
+ declare function getEvalDisplayStatus(params: {
1293
+ freshnessStatus: EvalFreshnessStatus;
1294
+ stale: boolean;
1295
+ outdated: boolean;
1296
+ lastRunStatus: 'pass' | 'fail' | 'error' | 'running' | 'cancelled' | 'unscored' | null;
1297
+ isRunning?: boolean;
1298
+ }): EvalDisplayStatus;
1299
+ //#endregion
1300
+ //#region ../shared/src/evalTitle.d.ts
1301
+ type EvalTitleLike = {
1302
+ id: string;
1303
+ title?: string;
1304
+ };
1305
+ /**
1306
+ * Resolve the display title for an eval.
1307
+ *
1308
+ * Returns the authored `title` when present; otherwise derives a human-readable
1309
+ * label from the stable eval `id` so display surfaces can avoid repeating both
1310
+ * fields in common cases.
1311
+ */
1312
+ declare function getEvalTitle(evalLike: EvalTitleLike): string;
1313
+ //#endregion
1314
+ //#region ../shared/src/schemas/sse.d.ts
1315
+ declare const sseEventTypeSchema: z.ZodEnum<{
1316
+ "discovery.updated": "discovery.updated";
1317
+ "run.started": "run.started";
1318
+ "run.summary": "run.summary";
1319
+ "case.started": "case.started";
1320
+ "case.updated": "case.updated";
1321
+ "case.finished": "case.finished";
1322
+ "trace.span": "trace.span";
1323
+ "run.finished": "run.finished";
1324
+ "run.cancelled": "run.cancelled";
1325
+ "run.error": "run.error";
1326
+ }>;
1327
+ /** Server-sent event name emitted by the runner or backend. */
1328
+ type SseEventType = z.infer<typeof sseEventTypeSchema>;
1329
+ /** Schema for the SSE envelope used to stream run updates to clients. */
1330
+ declare const sseEnvelopeSchema: z.ZodObject<{
1331
+ type: z.ZodString;
1332
+ runId: z.ZodOptional<z.ZodString>;
1333
+ timestamp: z.ZodString;
1334
+ payload: z.ZodUnknown;
1335
+ }, z.core.$strip>;
1336
+ /** Wire format for a streamed event emitted during eval execution. */
1337
+ type SseEnvelope = z.infer<typeof sseEnvelopeSchema>;
1338
+ //#endregion
1339
+ //#region ../shared/src/schemas/api.d.ts
1340
+ /** Schema for the API request that starts a new eval run. */
1341
+ declare const createRunRequestSchema: z.ZodObject<{
1342
+ target: z.ZodObject<{
1343
+ mode: z.ZodEnum<{
1344
+ all: "all";
1345
+ evalIds: "evalIds";
1346
+ caseIds: "caseIds";
1347
+ }>;
1348
+ evalIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
1349
+ caseIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
1350
+ }, z.core.$strip>;
1351
+ trials: z.ZodNumber;
1352
+ cache: z.ZodOptional<z.ZodObject<{
1353
+ mode: z.ZodDefault<z.ZodEnum<{
1354
+ use: "use";
1355
+ bypass: "bypass";
1356
+ refresh: "refresh";
1357
+ }>>;
1358
+ }, z.core.$strip>>;
1359
+ }, z.core.$strip>;
1360
+ /** Request payload accepted by the run creation endpoint. */
1361
+ type CreateRunRequest = z.infer<typeof createRunRequestSchema>;
1362
+ /** Schema for updating a UI-authored manual score on one persisted case. */
1363
+ declare const updateManualScoreRequestSchema: z.ZodObject<{
1364
+ value: z.ZodNullable<z.ZodNumber>;
1365
+ }, z.core.$strip>;
1366
+ /** Request payload accepted by the manual score update endpoint. */
1367
+ type UpdateManualScoreRequest = z.infer<typeof updateManualScoreRequestSchema>;
1368
+ //#endregion
1369
+ //#region ../shared/src/schemas/config.d.ts
1370
+ /** Strategy used to collapse repeated trials into one stored case result. */
1371
+ declare const trialSelectionModeSchema: z.ZodEnum<{
1372
+ lowestScore: "lowestScore";
1373
+ median: "median";
1374
+ }>;
1375
+ /** Strategy used to collapse repeated trials into one stored case result. */
1376
+ type TrialSelectionMode = z.infer<typeof trialSelectionModeSchema>;
1377
+ /** Top-level config authored in `agent-evals.config.ts`. */
1378
+ type AgentEvalsConfig = {
1379
+ /** Root directory used to resolve all relative paths. Defaults to `process.cwd()`. */workspaceRoot?: string; /** Glob patterns (relative to `workspaceRoot`) used to discover eval files. */
1380
+ include: string[]; /** Number of trials per case when none is specified. Defaults to `1`. */
1381
+ defaultTrials?: number;
1382
+ /**
1383
+ * Strategy used to pick the single persisted result when `trials > 1`.
1384
+ *
1385
+ * `lowestScore` is the default. `median` uses the lower median when the
1386
+ * number of trials is even.
1387
+ */
1388
+ trialSelection?: TrialSelectionMode;
1389
+ /**
1390
+ * Maximum number of case executions that may run in parallel across one run,
1391
+ * including trial fan-out. Defaults to `2`.
1392
+ */
1393
+ concurrency?: number;
1394
+ /**
1395
+ * Age threshold, in days, before a latest run from a different commit is
1396
+ * considered outdated. Defaults to `14`.
1397
+ */
1398
+ staleAfterDays?: number;
1399
+ /**
1400
+ * Global trace attribute display config for the UI.
1401
+ *
1402
+ * These rules are merged with per-eval `traceDisplay` rules, with the eval
1403
+ * definition taking precedence for matching `key` or `path` entries.
1404
+ */
1405
+ traceDisplay?: TraceDisplayInputConfig;
1406
+ /**
1407
+ * Optional controls for the operation cache. When omitted, the cache is
1408
+ * enabled and stored under `<workspaceRoot>/.agent-evals/cache`.
1409
+ */
1410
+ cache?: {
1411
+ /** Disable the cache entirely; spans with `cache` options execute as if uncached. */enabled?: boolean; /** Override the directory used to persist cache entries. */
1412
+ dir?: string;
1413
+ };
1414
+ };
1415
+ /** Zod schema for validating `agent-evals.config.ts` input. */
1416
+ declare const agentEvalsConfigSchema: z.ZodObject<{
1417
+ workspaceRoot: z.ZodOptional<z.ZodString>;
1418
+ include: z.ZodArray<z.ZodString>;
1419
+ defaultTrials: z.ZodOptional<z.ZodNumber>;
1420
+ trialSelection: z.ZodOptional<z.ZodEnum<{
1421
+ lowestScore: "lowestScore";
1422
+ median: "median";
1423
+ }>>;
1424
+ concurrency: z.ZodOptional<z.ZodNumber>;
1425
+ staleAfterDays: z.ZodOptional<z.ZodNumber>;
1426
+ traceDisplay: z.ZodOptional<z.ZodObject<{
1427
+ attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
1428
+ key: z.ZodOptional<z.ZodString>;
1429
+ path: z.ZodString;
1430
+ label: z.ZodOptional<z.ZodString>;
1431
+ format: z.ZodOptional<z.ZodEnum<{
1432
+ string: "string";
1433
+ number: "number";
1434
+ duration: "duration";
1435
+ json: "json";
1436
+ }>>;
1437
+ numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
1438
+ placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
1439
+ tree: "tree";
1440
+ detail: "detail";
1441
+ section: "section";
1442
+ }>>>;
1443
+ scope: z.ZodOptional<z.ZodEnum<{
1444
+ self: "self";
1445
+ subtree: "subtree";
1446
+ }>>;
1447
+ mode: z.ZodOptional<z.ZodEnum<{
1448
+ all: "all";
1449
+ last: "last";
1450
+ sum: "sum";
1451
+ }>>;
1452
+ transform: z.ZodOptional<z.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
1453
+ }, z.core.$strip>>>;
1454
+ }, z.core.$strip>>;
1455
+ cache: z.ZodOptional<z.ZodObject<{
1456
+ enabled: z.ZodOptional<z.ZodBoolean>;
1457
+ dir: z.ZodOptional<z.ZodString>;
1458
+ }, z.core.$strip>>;
1459
+ }, z.core.$strip>;
1460
+ //#endregion
1461
+ //#region ../shared/src/schemas/cache.d.ts
1462
+ /**
1463
+ * Mode that controls how the cache is consulted for a given run.
1464
+ *
1465
+ * - `use`: read cache on hit, write on miss. Default.
1466
+ * - `bypass`: never read, never write.
1467
+ * - `refresh`: never read, always write (forces re-execution and overwrites).
1468
+ */
1469
+ declare const cacheModeSchema: z.ZodEnum<{
1470
+ use: "use";
1471
+ bypass: "bypass";
1472
+ refresh: "refresh";
1473
+ }>;
1474
+ /** Mode controlling how cached spans behave during a run. */
1475
+ type CacheMode = z.infer<typeof cacheModeSchema>;
1476
+ /** Options accepted by an `evalTracer.span` call to opt the span into caching. */
1477
+ declare const spanCacheOptionsSchema: z.ZodObject<{
1478
+ key: z.ZodUnknown;
1479
+ namespace: z.ZodOptional<z.ZodString>;
1480
+ }, z.core.$strip>;
1481
+ /** Options accepted by an `evalTracer.span` call to opt the span into caching. */
1482
+ type SpanCacheOptions = z.infer<typeof spanCacheOptionsSchema>;
1483
+ /** Summary of a single persisted cache entry, used by list/delete endpoints. */
1484
+ declare const cacheListItemSchema: z.ZodObject<{
1485
+ key: z.ZodString;
1486
+ namespace: z.ZodString;
1487
+ spanName: z.ZodString;
1488
+ spanKind: z.ZodEnum<{
1489
+ eval: "eval";
1490
+ agent: "agent";
1491
+ llm: "llm";
1492
+ tool: "tool";
1493
+ retrieval: "retrieval";
1494
+ scorer: "scorer";
1495
+ checkpoint: "checkpoint";
1496
+ custom: "custom";
1497
+ }>;
1498
+ storedAt: z.ZodString;
1499
+ codeFingerprint: z.ZodString;
1500
+ sizeBytes: z.ZodNumber;
1501
+ }, z.core.$strip>;
1502
+ /** Summary row for a single cache entry. */
1503
+ type CacheListItem = z.infer<typeof cacheListItemSchema>;
1504
+ /** Serialized nested span captured while recording a cached operation. */
1505
+ type SerializedCacheSpan = {
1506
+ kind: TraceSpanKind;
1507
+ name: string;
1508
+ attributes?: Record<string, unknown>;
1509
+ status: 'running' | 'ok' | 'error' | 'cancelled';
1510
+ error?: {
1511
+ name?: string;
1512
+ message: string;
1513
+ stack?: string;
1514
+ };
1515
+ children: SerializedCacheSpan[];
1516
+ };
1517
+ /** Zod schema for `SerializedCacheSpan`, defined lazily for recursion. */
1518
+ declare const serializedCacheSpanSchema: z.ZodType<SerializedCacheSpan>;
1519
+ /**
1520
+ * One captured operation performed while a cached span's body executed.
1521
+ *
1522
+ * Operations are replayed in order against a fresh scope on cache hit to
1523
+ * reproduce the observable effects of the original run.
1524
+ */
1525
+ declare const cacheRecordingOpSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
1526
+ kind: z.ZodLiteral<"setOutput">;
1527
+ key: z.ZodString;
1528
+ value: z.ZodUnknown;
1529
+ }, z.core.$strip>, z.ZodObject<{
1530
+ kind: z.ZodLiteral<"incrementOutput">;
1531
+ key: z.ZodString;
1532
+ delta: z.ZodNumber;
1533
+ }, z.core.$strip>, z.ZodObject<{
1534
+ kind: z.ZodLiteral<"checkpoint">;
1535
+ name: z.ZodString;
1536
+ data: z.ZodUnknown;
1537
+ }, z.core.$strip>, z.ZodObject<{
1538
+ kind: z.ZodLiteral<"subSpan">;
1539
+ span: z.ZodType<SerializedCacheSpan, unknown, z.core.$ZodTypeInternals<SerializedCacheSpan, unknown>>;
1540
+ }, z.core.$strip>], "kind">;
1541
+ /** Single effect captured by a cache recording. */
1542
+ type CacheRecordingOp = z.infer<typeof cacheRecordingOpSchema>;
1543
+ /** Captured observable effects + return value of a cached span body. */
1544
+ declare const cacheRecordingSchema: z.ZodObject<{
1545
+ returnValue: z.ZodUnknown;
1546
+ finalAttributes: z.ZodRecord<z.ZodString, z.ZodUnknown>;
1547
+ ops: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
1548
+ kind: z.ZodLiteral<"setOutput">;
1549
+ key: z.ZodString;
1550
+ value: z.ZodUnknown;
1551
+ }, z.core.$strip>, z.ZodObject<{
1552
+ kind: z.ZodLiteral<"incrementOutput">;
1553
+ key: z.ZodString;
1554
+ delta: z.ZodNumber;
1555
+ }, z.core.$strip>, z.ZodObject<{
1556
+ kind: z.ZodLiteral<"checkpoint">;
1557
+ name: z.ZodString;
1558
+ data: z.ZodUnknown;
1559
+ }, z.core.$strip>, z.ZodObject<{
1560
+ kind: z.ZodLiteral<"subSpan">;
1561
+ span: z.ZodType<SerializedCacheSpan, unknown, z.core.$ZodTypeInternals<SerializedCacheSpan, unknown>>;
1562
+ }, z.core.$strip>], "kind">>;
1563
+ }, z.core.$strip>;
1564
+ /** Captured observable effects + return value of a cached span body. */
1565
+ type CacheRecording = z.infer<typeof cacheRecordingSchema>;
1566
+ /** Persisted cache file containing metadata and a recording. */
1567
+ declare const cacheEntrySchema: z.ZodObject<{
1568
+ version: z.ZodLiteral<1>;
1569
+ key: z.ZodString;
1570
+ namespace: z.ZodString;
1571
+ spanName: z.ZodString;
1572
+ spanKind: z.ZodEnum<{
1573
+ eval: "eval";
1574
+ agent: "agent";
1575
+ llm: "llm";
1576
+ tool: "tool";
1577
+ retrieval: "retrieval";
1578
+ scorer: "scorer";
1579
+ checkpoint: "checkpoint";
1580
+ custom: "custom";
1581
+ }>;
1582
+ storedAt: z.ZodString;
1583
+ codeFingerprint: z.ZodString;
1584
+ recording: z.ZodObject<{
1585
+ returnValue: z.ZodUnknown;
1586
+ finalAttributes: z.ZodRecord<z.ZodString, z.ZodUnknown>;
1587
+ ops: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
1588
+ kind: z.ZodLiteral<"setOutput">;
1589
+ key: z.ZodString;
1590
+ value: z.ZodUnknown;
1591
+ }, z.core.$strip>, z.ZodObject<{
1592
+ kind: z.ZodLiteral<"incrementOutput">;
1593
+ key: z.ZodString;
1594
+ delta: z.ZodNumber;
1595
+ }, z.core.$strip>, z.ZodObject<{
1596
+ kind: z.ZodLiteral<"checkpoint">;
1597
+ name: z.ZodString;
1598
+ data: z.ZodUnknown;
1599
+ }, z.core.$strip>, z.ZodObject<{
1600
+ kind: z.ZodLiteral<"subSpan">;
1601
+ span: z.ZodType<SerializedCacheSpan, unknown, z.core.$ZodTypeInternals<SerializedCacheSpan, unknown>>;
1602
+ }, z.core.$strip>], "kind">>;
1603
+ }, z.core.$strip>;
1604
+ }, z.core.$strip>;
1605
+ /** Persisted cache file contents. */
1606
+ type CacheEntry = z.infer<typeof cacheEntrySchema>;
1607
+ //#endregion
1608
+ //#region ../sdk/src/types.d.ts
1609
+ /** Single authored eval case with its stable identifier and input payload. */
1610
+ type EvalCase<TInput> = {
1611
+ id: string;
1612
+ input: TInput;
1613
+ tags?: string[];
1614
+ };
1615
+ /** UI overrides for a derived or scored column emitted by an eval. */
1616
+ type EvalColumnOverride = {
1617
+ /** Display label shown for the column in tables and detail views. */label?: string;
1618
+ /**
1619
+ * Presentation preset for the value.
1620
+ *
1621
+ * Use this to control how the UI renders the cell and infer table behavior,
1622
+ * for example `number`, `boolean`, `duration`, `markdown`, `json`, or
1623
+ * file/media previews.
1624
+ */
1625
+ format?: ColumnFormat;
1626
+ /**
1627
+ * Extra options for `format: 'number'`.
1628
+ *
1629
+ * Use this to add a prefix or suffix, force a fixed number of decimal
1630
+ * places, or switch to compact notation such as `1.2K`.
1631
+ */
1632
+ numberFormat?: NumberDisplayOptions;
1633
+ /**
1634
+ * Hides the column from the runs table while keeping it available in detail
1635
+ * views and raw output data.
1636
+ */
1637
+ hideInTable?: boolean; /** Whether the UI should allow sorting rows by this column. */
1638
+ sortable?: boolean; /** Horizontal alignment used when rendering the column cells. */
1639
+ align?: 'left' | 'center' | 'right';
1640
+ /**
1641
+ * Maximum number of stars used when `format: 'stars'`.
1642
+ *
1643
+ * Values are still stored as normalized `0..1` numbers; the UI maps the
1644
+ * selected star count evenly across that range.
1645
+ */
1646
+ maxStars?: number;
1647
+ };
1648
+ /** Column override map keyed by output or score field name. */
1649
+ type EvalColumns = Record<string, EvalColumnOverride>;
1650
+ /** Query helpers built from the flattened trace recorded for one eval case. */
1651
+ type EvalTraceTree = {
1652
+ spans: EvalTraceSpan[];
1653
+ rootSpans: EvalTraceSpan[];
1654
+ findSpan: (name: string) => EvalTraceSpan | undefined;
1655
+ findSpansByKind: (kind: EvalTraceSpan['kind']) => EvalTraceSpan[];
1656
+ flattenDfs: () => EvalTraceSpan[];
1657
+ checkpoints: Map<string, unknown>;
1658
+ };
1659
+ /** Context passed to an eval's `execute` function for a single case run. */
1660
+ type EvalExecuteContext<TInput> = {
1661
+ input: TInput;
1662
+ signal: AbortSignal;
1663
+ };
1664
+ /** Context passed to `deriveFromTracing` after execution has completed. */
1665
+ type EvalDeriveContext<TInput> = {
1666
+ trace: EvalTraceTree;
1667
+ input: TInput;
1668
+ case: EvalCase<TInput>;
1669
+ };
1670
+ /** Context passed to score functions after outputs have been collected. */
1671
+ type EvalScoreContext<TInput> = {
1672
+ input: TInput;
1673
+ outputs: Record<string, unknown>;
1674
+ case: EvalCase<TInput>;
1675
+ };
1676
+ /** Score callback that computes a numeric result for one case. */
1677
+ type EvalScoreFn<TInput> = (ctx: EvalScoreContext<TInput>) => number | Promise<number>;
1678
+ /**
1679
+ * Score definition accepted by `defineEval`, with optional UI metadata.
1680
+ *
1681
+ * When `passThreshold` is provided, this score gates the case pass/fail:
1682
+ * a case fails if its computed value is strictly below the threshold. A
1683
+ * score without a `passThreshold` is informational only and never causes
1684
+ * a case to fail on its own.
1685
+ */
1686
+ type EvalScoreDef<TInput> = EvalScoreFn<TInput> | ({
1687
+ compute: EvalScoreFn<TInput>;
1688
+ passThreshold?: number;
1689
+ } & EvalColumnOverride);
1690
+ /**
1691
+ * Manual score definition accepted by `defineEval`.
1692
+ *
1693
+ * Manual scores are emitted as score columns with pending values during CLI
1694
+ * execution. The web UI is responsible for setting their normalized `0..1`
1695
+ * values after a run completes.
1696
+ */
1697
+ type EvalManualScoreDef = EvalColumnOverride & {
1698
+ /**
1699
+ * Optional pass/fail gate applied after a value is filled. Pending manual
1700
+ * values keep the eval in an `unscored` state instead of failing the case.
1701
+ */
1702
+ passThreshold?: number;
1703
+ };
1704
+ /** Complete authored eval definition consumed by `defineEval`. */
1705
+ type EvalDefinition<TInput = unknown> = {
1706
+ id: string;
1707
+ title?: string;
1708
+ /**
1709
+ * Authored cases for this eval.
1710
+ *
1711
+ * When omitted or resolved to an empty array, the runner still executes the
1712
+ * eval once using a synthetic case with empty object input.
1713
+ */
1714
+ cases?: EvalCase<TInput>[] | (() => Promise<EvalCase<TInput>[]>);
1715
+ columns?: EvalColumns;
1716
+ /**
1717
+ * Per-eval trace attribute display rules for the UI.
1718
+ *
1719
+ * These are merged with the global `AgentEvalsConfig.traceDisplay` rules.
1720
+ * Matching entries override the global rule by `key`, or by `path` when no
1721
+ * `key` is provided.
1722
+ */
1723
+ traceDisplay?: TraceDisplayInputConfig;
1724
+ execute: (ctx: EvalExecuteContext<TInput>) => Promise<void> | void;
1725
+ deriveFromTracing?: (ctx: EvalDeriveContext<TInput>) => Record<string, unknown> | Promise<Record<string, unknown>>;
1726
+ scores?: Record<string, EvalScoreDef<TInput>>;
1727
+ /**
1728
+ * Score columns whose values are entered in the web UI after a run.
1729
+ *
1730
+ * Keys become persisted score columns, initialized as pending (`null`) for
1731
+ * every case. Once filled, values are normalized numbers in the `0..1`
1732
+ * range and participate in summaries, stats, charts, and pass thresholds
1733
+ * like computed scores.
1734
+ */
1735
+ manualScores?: Record<string, EvalManualScoreDef>;
1736
+ /**
1737
+ * Optional stats row configuration for the EvalCard in the web UI.
1738
+ *
1739
+ * Opt-in: when omitted (or empty) the EvalCard renders no stats row at all.
1740
+ * When provided, the stats render in order, left to right.
1741
+ *
1742
+ * Built-in kinds (`cases`, `passRate`, `duration`, `cost`) read from the
1743
+ * latest run summary. `kind: 'column'` aggregates a score or numeric output
1744
+ * column across the latest run's cases — `key` must match one of the eval's
1745
+ * score or column keys, and only finite numeric values participate in the
1746
+ * reduction. When no case has a numeric value for the key the stat renders
1747
+ * an em dash. `label` and `format` default to the matching `ColumnDef`.
1748
+ */
1749
+ stats?: EvalStatsConfig;
1750
+ /**
1751
+ * Optional history chart configuration for the EvalCard in the web UI.
1752
+ *
1753
+ * Opt-in: when omitted (or empty) the EvalCard renders no history chart at
1754
+ * all. Each entry in the list renders as its own chart frame, stacked in
1755
+ * authoring order.
1756
+ *
1757
+ * Each chart declares its `type` (`area | line | bar`) and one or more
1758
+ * `metrics`. Built-in metrics (`passRate`, `durationMs`) aggregate
1759
+ * the run summary. Column metrics aggregate a score or numeric `setEvalOutput`
1760
+ * column across the run using an `aggregate` reducer (`avg`, `sum`, `min`,
1761
+ * `max`, `latest`, `passThresholdRate`). `passThresholdRate` requires a
1762
+ * score column with `passThreshold`.
1763
+ */
1764
+ charts?: EvalChartsConfig;
1765
+ };
1766
+ //#endregion
1767
+ //#region ../sdk/src/defineEval.d.ts
1768
+ /**
1769
+ * Registered eval metadata tracked by the SDK during module loading.
1770
+ *
1771
+ * Consumers usually access these entries through `getEvalRegistry()`.
1772
+ */
1773
+ type EvalRegistryEntry = {
1774
+ id: string;
1775
+ title?: string;
1776
+ use: <R>(fn: <TInput>(def: EvalDefinition<TInput>) => R) => R;
1777
+ };
1778
+ /** Return the in-memory registry of evals defined in the current process. */
1779
+ declare function getEvalRegistry(): Map<string, EvalRegistryEntry>;
1780
+ /**
1781
+ * Register an eval definition with the SDK so the runner can discover it
1782
+ * after importing the eval module.
1783
+ */
1784
+ declare function defineEval<TInput>(definition: EvalDefinition<TInput>): void;
1785
+ //#endregion
1786
+ //#region ../sdk/src/repoFile.d.ts
1787
+ /**
1788
+ * Create a file reference that can be emitted via `setEvalOutput(...)` and rendered
1789
+ * by a column configured with `format: 'image' | 'audio' | 'video' | 'file'`.
1790
+ *
1791
+ * @param path Relative or absolute path to the repository file.
1792
+ * @param mimeType Optional MIME type hint for UI rendering.
1793
+ * @returns A repo-backed file reference suitable for file/media columns.
1794
+ */
1795
+ declare function repoFile(path: string, mimeType?: string): RepoFileRef;
1796
+ //#endregion
1797
+ //#region ../sdk/src/runtime.d.ts
1798
+ /**
1799
+ * Adapter used by the SDK to read and write cache entries for cached spans.
1800
+ *
1801
+ * Implementations are typically injected by the runner before the eval case
1802
+ * starts executing.
1803
+ */
1804
+ type CacheAdapter = {
1805
+ /** Return the stored entry for `keyHash` under `namespace`, or `null`. */lookup(namespace: string, keyHash: string): Promise<CacheEntry | null>; /** Persist a cache entry. Must be safe under concurrent calls. */
1806
+ write(entry: CacheEntry): Promise<void>;
1807
+ };
1808
+ /** Runner-supplied cache context attached to an eval case scope. */
1809
+ type CacheScopeContext = {
1810
+ adapter: CacheAdapter;
1811
+ mode: CacheMode;
1812
+ evalId: string; /** Hash of the eval source file; used to invalidate on code changes. */
1813
+ codeFingerprint: string;
1814
+ };
1815
+ /** Active recording frame captured while a cached span body executes. */
1816
+ type CacheRecordingFrame = {
1817
+ /** Length of `scope.spans` immediately before the cached body started. */baseSpanIndex: number; /** Id of the cached span that owns this recording. */
1818
+ cachedSpanId: string; /** Ordered observable effects recorded during the cached body. */
1819
+ ops: CacheRecordingOp[];
1820
+ };
1821
+ /** Mutable per-case runtime state stored in async local storage. */
1822
+ type EvalCaseScope = {
1823
+ caseId: string;
1824
+ outputs: Record<string, unknown>; /** Structured assertion failures recorded for the current case. */
1825
+ assertionFailures: AssertionFailure[];
1826
+ spans: EvalTraceSpan[];
1827
+ checkpoints: Map<string, unknown>;
1828
+ spanStack: string[];
1829
+ activeSpanStack: EvalTraceSpan[];
1830
+ /**
1831
+ * Stack of active cache recorders. Ops are written to the top-most frame
1832
+ * when it exists and `replayingDepth === 0`.
1833
+ */
1834
+ recordingStack: CacheRecordingFrame[];
1835
+ /**
1836
+ * Incremented while replaying a cached span, so nested SDK calls do not
1837
+ * accidentally double-record ops into outer recorders.
1838
+ */
1839
+ replayingDepth: number; /** Runner-provided cache adapter + mode; absent when caching is disabled. */
1840
+ cacheContext: CacheScopeContext | undefined;
1841
+ };
1842
+ /** Error thrown when an eval assertion fails during case execution. */
1843
+ declare class EvalAssertionError extends Error {
1844
+ constructor(message: string);
1845
+ }
1846
+ /** Return the current eval scope for the active async context, if any. */
1847
+ declare function getCurrentScope(): EvalCaseScope | undefined;
1848
+ /**
1849
+ * Return whether the current async execution is inside an active eval case.
1850
+ *
1851
+ * This is useful for shared workflow code that wants to branch on eval-only
1852
+ * behavior without importing or inspecting the full eval scope.
1853
+ */
1854
+ declare function isInEvalScope(): boolean;
1855
+ /**
1856
+ * Attach cache context (adapter, mode, eval id, fingerprint) to a scope.
1857
+ *
1858
+ * Runner-internal helper called immediately before the user's `execute`
1859
+ * function runs inside `runInEvalScope`.
1860
+ */
1861
+ declare function setScopeCacheContext(scope: EvalCaseScope, context: CacheScopeContext): void;
1862
+ /** Optional inputs accepted when starting a new eval case scope. */
1863
+ type RunInEvalScopeOptions = {
1864
+ /** Cache adapter + mode attached to the scope before `fn` runs. */cacheContext?: CacheScopeContext;
1865
+ };
1866
+ /**
1867
+ * Execute a callback inside a fresh eval case scope and capture its outputs,
1868
+ * trace data, and terminal error state.
1869
+ */
1870
+ declare function runInEvalScope<T>(caseId: string, fn: () => Promise<T> | T, options?: RunInEvalScopeOptions): Promise<{
1871
+ result: T | undefined;
1872
+ scope: EvalCaseScope;
1873
+ error: Error | undefined;
1874
+ }>;
1875
+ /**
1876
+ * Record or replace an output value for the current case scope.
1877
+ *
1878
+ * Supported values include scalars, JSON-safe objects/arrays, explicit file
1879
+ * refs, and native `Blob`/`File` instances for media or file columns.
1880
+ */
1881
+ declare function setEvalOutput(key: string, value: unknown): void;
1882
+ /**
1883
+ * Add a numeric delta to an output value in the current case scope.
1884
+ *
1885
+ * If the existing value is non-numeric, the operation is recorded as an
1886
+ * assertion failure instead of mutating the output.
1887
+ */
1888
+ declare function incrementEvalOutput(key: string, delta: number): void;
1889
+ /**
1890
+ * Assert a condition for the current eval case and throw on failure.
1891
+ *
1892
+ * Calls made outside `runInEvalScope(...)` are ignored so shared workflow code
1893
+ * can safely reuse `evalAssert(...)` when it also runs outside an eval.
1894
+ */
1895
+ declare function evalAssert(condition: boolean, message: string): void;
1896
+ //#endregion
1897
+ //#region ../sdk/src/tracer.d.ts
1898
+ /**
1899
+ * Mutable handle for the current span.
1900
+ *
1901
+ * Prefer the ambient `evalSpan` export for most code so helpers deeper in the call
1902
+ * stack can annotate the active span without receiving an injected argument.
1903
+ */
1904
+ type TraceActiveSpan = {
1905
+ /** Rename the active span after it has been created. */setName(value: string): void; /** Set a single attribute on the active span. Later writes replace the same key. */
1906
+ setAttribute(key: string, value: unknown): void; /** Merge multiple attributes into the active span. */
1907
+ setAttributes(value: Record<string, unknown>): void;
1908
+ };
1909
+ /**
1910
+ * Ambient handle for the active span in the current async context.
1911
+ *
1912
+ * Calls are no-ops when executed outside of `evalTracer.span(...)`.
1913
+ */
1914
+ declare const evalSpan: TraceActiveSpan;
1915
+ type TraceSpanInfoBase = {
1916
+ kind: EvalTraceSpan['kind'];
1917
+ name: string;
1918
+ attributes?: Record<string, unknown>;
1919
+ };
1920
+ /** Info accepted by `evalTracer.span(info, fn)` when creating an uncached span. */
1921
+ type TraceSpanInfoUncached = TraceSpanInfoBase & {
1922
+ cache?: undefined;
1923
+ };
1924
+ /**
1925
+ * Info accepted by `evalTracer.span(info, fn)` when opting in to caching.
1926
+ *
1927
+ * Cached spans return `Promise<unknown>` because the replayed value comes from
1928
+ * a JSON round-trip on cache hit. Narrow the value yourself when you need a
1929
+ * typed return.
1930
+ */
1931
+ type TraceSpanInfoCached = TraceSpanInfoBase & {
1932
+ cache: SpanCacheOptions;
1933
+ };
1934
+ /** Info accepted by `evalTracer.span(info, fn)`. */
1935
+ type TraceSpanInfo = TraceSpanInfoUncached | TraceSpanInfoCached;
1936
+ declare function traceSpan<T>(info: TraceSpanInfoUncached, fn: () => Promise<T> | T): Promise<T>;
1937
+ declare function traceSpan<T>(info: TraceSpanInfoUncached, fn: (span: TraceActiveSpan) => Promise<T> | T): Promise<T>;
1938
+ declare function traceSpan(info: TraceSpanInfoCached, fn: () => unknown): Promise<unknown>;
1939
+ declare function traceSpan(info: TraceSpanInfoCached, fn: (span: TraceActiveSpan) => unknown): Promise<unknown>;
1940
+ /**
1941
+ * Trace builder used to create hierarchical spans and checkpoints during eval
1942
+ * execution.
1943
+ */
1944
+ declare const evalTracer: {
1945
+ /** Run a callback inside a new trace span and record its lifecycle. */span: typeof traceSpan; /** Record a named point-in-time value alongside the trace. */
1946
+ checkpoint(name: string, data: unknown): void;
1947
+ };
1948
+ /** Build a queryable trace tree helper from a flat span list and checkpoints. */
1949
+ declare function buildTraceTree(spans: EvalTraceSpan[], checkpoints: Map<string, unknown>): EvalTraceTree;
1950
+ /** Hash the components of a cache key into a deterministic hex digest. */
1951
+ declare function hashCacheKey(input: {
1952
+ namespace: string;
1953
+ codeFingerprint: string;
1954
+ key: unknown;
1955
+ }): string;
1956
+ //#endregion
1957
+ //#region ../runner/src/cacheStore.d.ts
1958
+ /** Filter accepted by `FsCacheStore.clear` to narrow the set of entries removed. */
1959
+ type CacheClearFilter = {
1960
+ namespace?: string;
1961
+ key?: string;
1962
+ };
1963
+ //#endregion
1964
+ //#region ../runner/src/runner.d.ts
1965
+ /** Imperative runner interface used by the server and CLI. */
1966
+ type EvalRunner = {
1967
+ /** Load workspace config, discover evals, and start file watching when enabled. */init(): Promise<void>; /** Return the currently discovered eval summaries for the active workspace. */
1968
+ getEvals(): EvalSummary[]; /** Look up one discovered eval by id. */
1969
+ getEval(id: string): EvalSummary | undefined; /** Re-scan configured eval files and emit a discovery update to listeners. */
1970
+ refreshDiscovery(): Promise<void>;
1971
+ startRun(request: CreateRunRequest): Promise<{
1972
+ manifest: RunManifest;
1973
+ summary: RunSummary;
1974
+ cases: CaseRow[];
1975
+ }>; /** Return run manifests tracked in memory, including persisted runs loaded during init. */
1976
+ getRuns(): RunManifest[]; /** Return one run with its summary and case rows when available in memory. */
1977
+ getRun(id: string): {
1978
+ manifest: RunManifest;
1979
+ summary: RunSummary;
1980
+ cases: CaseRow[];
1981
+ } | undefined; /** Request cancellation for an in-flight run. */
1982
+ cancelRun(id: string): void; /** Return full details for a single case in a run, when available. */
1983
+ getCaseDetail(runId: string, caseId: string): CaseDetail | undefined; /** Subscribe to streamed events for a specific run. */
1984
+ subscribe(runId: string, listener: (event: SseEnvelope) => void): () => void; /** Subscribe to discovery updates triggered by file changes or manual refresh. */
1985
+ subscribeDiscovery(listener: (event: SseEnvelope) => void): () => void; /** Resolve the workspace root backing this runner instance. */
1986
+ getWorkspaceRoot(): string; /** Resolve a persisted artifact path when artifact storage is supported. */
1987
+ getArtifactPath(artifactId: string): string | undefined; /** Return summaries for every persisted cache entry in the workspace. */
1988
+ listCache(): Promise<CacheListItem[]>;
1989
+ /**
1990
+ * Remove cache entries matching `filter`, or all entries when no filter is
1991
+ * supplied.
1992
+ */
1993
+ clearCache(filter?: CacheClearFilter): Promise<void>; /** Recompute persisted case and run statuses for terminal runs touching one eval. */
1994
+ recomputeStatusesForEval(evalId: string): Promise<{
1995
+ updatedRuns: number;
1996
+ }>; /** Delete terminal persisted runs that touch one eval from in-memory history and disk. */
1997
+ cleanRunsForEval(evalId: string): Promise<{
1998
+ deletedRuns: number;
1999
+ }>; /** Persist a UI-authored manual score for one case and recompute affected summaries. */
2000
+ updateManualScore(params: {
2001
+ runId: string;
2002
+ caseId: string;
2003
+ scoreKey: string;
2004
+ value: number | null;
2005
+ }): Promise<{
2006
+ updated: true;
2007
+ run: {
2008
+ manifest: RunManifest;
2009
+ summary: RunSummary;
2010
+ cases: CaseRow[];
2011
+ };
2012
+ caseDetail: CaseDetail;
2013
+ } | {
2014
+ updated: false;
2015
+ reason: string;
2016
+ }>;
2017
+ /**
2018
+ * Delete one persisted run from in-memory history and disk.
2019
+ *
2020
+ * Ignored for in-flight runs — cancel first, then delete.
2021
+ * Returns `deleted: false` when the run is missing or still running.
2022
+ */
2023
+ deleteRun(runId: string): Promise<{
2024
+ deleted: boolean;
2025
+ }>;
2026
+ };
2027
+ type CreateRunnerOptions = {
2028
+ watchForChanges?: boolean;
2029
+ };
2030
+ /** Create an in-memory eval runner bound to the current workspace config. */
2031
+ declare function createRunner({
2032
+ watchForChanges
2033
+ }?: CreateRunnerOptions): EvalRunner;
2034
+ //#endregion
2035
+ //#region src/cli.d.ts
2036
+ /**
2037
+ * Run the Agent Evals CLI against the current workspace.
2038
+ *
2039
+ * @param argv Raw command-line arguments excluding the executable name.
2040
+ */
2041
+ declare function runCli(argv: string[]): Promise<void>;
2042
+ //#endregion
2043
+ export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheListItem, type CacheMode, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceTree, type FileRef, type JsonCell, type NumberDisplayOptions, type RepoFileRef, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TraceSpanKind, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };