@jean.gnc/harness-kit 0.12.7 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +43 -0
  2. package/dist/cli.js +13 -1
  3. package/dist/cli.js.map +1 -1
  4. package/dist/eval/capture.d.ts +23 -0
  5. package/dist/eval/capture.d.ts.map +1 -0
  6. package/dist/eval/capture.js +79 -0
  7. package/dist/eval/capture.js.map +1 -0
  8. package/dist/eval/cases.d.ts +10 -2
  9. package/dist/eval/cases.d.ts.map +1 -1
  10. package/dist/eval/cases.js +9 -3
  11. package/dist/eval/cases.js.map +1 -1
  12. package/dist/eval/detect.d.ts +1 -0
  13. package/dist/eval/detect.d.ts.map +1 -1
  14. package/dist/eval/detect.js +1 -1
  15. package/dist/eval/detect.js.map +1 -1
  16. package/dist/eval/grade-deterministic.d.ts +9 -0
  17. package/dist/eval/grade-deterministic.d.ts.map +1 -0
  18. package/dist/eval/grade-deterministic.js +87 -0
  19. package/dist/eval/grade-deterministic.js.map +1 -0
  20. package/dist/eval/grade-judge.d.ts +12 -0
  21. package/dist/eval/grade-judge.d.ts.map +1 -0
  22. package/dist/eval/grade-judge.js +14 -0
  23. package/dist/eval/grade-judge.js.map +1 -0
  24. package/dist/eval/grade.d.ts +5 -0
  25. package/dist/eval/grade.d.ts.map +1 -0
  26. package/dist/eval/grade.js +25 -0
  27. package/dist/eval/grade.js.map +1 -0
  28. package/dist/eval/index.d.ts +4 -0
  29. package/dist/eval/index.d.ts.map +1 -1
  30. package/dist/eval/index.js +27 -5
  31. package/dist/eval/index.js.map +1 -1
  32. package/dist/eval/judge.d.ts +26 -0
  33. package/dist/eval/judge.d.ts.map +1 -0
  34. package/dist/eval/judge.js +55 -0
  35. package/dist/eval/judge.js.map +1 -0
  36. package/dist/eval/report.d.ts +5 -1
  37. package/dist/eval/report.d.ts.map +1 -1
  38. package/dist/eval/report.js +66 -13
  39. package/dist/eval/report.js.map +1 -1
  40. package/dist/eval/runner.d.ts +13 -5
  41. package/dist/eval/runner.d.ts.map +1 -1
  42. package/dist/eval/runner.js +105 -31
  43. package/dist/eval/runner.js.map +1 -1
  44. package/dist/eval/schema.d.ts +644 -29
  45. package/dist/eval/schema.d.ts.map +1 -1
  46. package/dist/eval/schema.js +57 -6
  47. package/dist/eval/schema.js.map +1 -1
  48. package/dist/eval/score.d.ts +8 -0
  49. package/dist/eval/score.d.ts.map +1 -1
  50. package/dist/eval/score.js +17 -0
  51. package/dist/eval/score.js.map +1 -1
  52. package/dist/skill/includes.d.ts +4 -0
  53. package/dist/skill/includes.d.ts.map +1 -1
  54. package/dist/skill/includes.js +38 -32
  55. package/dist/skill/includes.js.map +1 -1
  56. package/package.json +2 -1
@@ -33,9 +33,131 @@ declare const Expectation: z.ZodUnion<[z.ZodObject<{
33
33
  }, {
34
34
  noSkill: true;
35
35
  }>]>;
36
- declare const Case: z.ZodObject<{
37
- id: z.ZodString;
38
- prompt: z.ZodString;
36
+ declare const Assertion: z.ZodDiscriminatedUnion<"kind", [z.ZodObject<{
37
+ kind: z.ZodLiteral<"outputMatches">;
38
+ pattern: z.ZodString;
39
+ regex: z.ZodDefault<z.ZodBoolean>;
40
+ }, "strip", z.ZodTypeAny, {
41
+ regex: boolean;
42
+ kind: "outputMatches";
43
+ pattern: string;
44
+ }, {
45
+ kind: "outputMatches";
46
+ pattern: string;
47
+ regex?: boolean | undefined;
48
+ }>, z.ZodObject<{
49
+ kind: z.ZodLiteral<"outputExcludes">;
50
+ pattern: z.ZodString;
51
+ regex: z.ZodDefault<z.ZodBoolean>;
52
+ }, "strip", z.ZodTypeAny, {
53
+ regex: boolean;
54
+ kind: "outputExcludes";
55
+ pattern: string;
56
+ }, {
57
+ kind: "outputExcludes";
58
+ pattern: string;
59
+ regex?: boolean | undefined;
60
+ }>, z.ZodObject<{
61
+ kind: z.ZodLiteral<"usedTool">;
62
+ tool: z.ZodString;
63
+ }, "strip", z.ZodTypeAny, {
64
+ kind: "usedTool";
65
+ tool: string;
66
+ }, {
67
+ kind: "usedTool";
68
+ tool: string;
69
+ }>, z.ZodObject<{
70
+ kind: z.ZodLiteral<"didNotUseTool">;
71
+ tool: z.ZodString;
72
+ }, "strip", z.ZodTypeAny, {
73
+ kind: "didNotUseTool";
74
+ tool: string;
75
+ }, {
76
+ kind: "didNotUseTool";
77
+ tool: string;
78
+ }>, z.ZodObject<{
79
+ kind: z.ZodLiteral<"wroteFile">;
80
+ path: z.ZodString;
81
+ contentMatches: z.ZodOptional<z.ZodString>;
82
+ regex: z.ZodDefault<z.ZodBoolean>;
83
+ }, "strip", z.ZodTypeAny, {
84
+ path: string;
85
+ regex: boolean;
86
+ kind: "wroteFile";
87
+ contentMatches?: string | undefined;
88
+ }, {
89
+ path: string;
90
+ kind: "wroteFile";
91
+ regex?: boolean | undefined;
92
+ contentMatches?: string | undefined;
93
+ }>]>;
94
+ declare const CombineRule: z.ZodDiscriminatedUnion<"combine", [z.ZodObject<{
95
+ combine: z.ZodLiteral<"all">;
96
+ }, "strip", z.ZodTypeAny, {
97
+ combine: "all";
98
+ }, {
99
+ combine: "all";
100
+ }>, z.ZodObject<{
101
+ combine: z.ZodLiteral<"fraction">;
102
+ threshold: z.ZodNumber;
103
+ }, "strip", z.ZodTypeAny, {
104
+ combine: "fraction";
105
+ threshold: number;
106
+ }, {
107
+ combine: "fraction";
108
+ threshold: number;
109
+ }>]>;
110
+ declare const Rubric: z.ZodObject<{
111
+ dimensions: z.ZodArray<z.ZodObject<{
112
+ dimension: z.ZodString;
113
+ criterion: z.ZodString;
114
+ }, "strip", z.ZodTypeAny, {
115
+ dimension: string;
116
+ criterion: string;
117
+ }, {
118
+ dimension: string;
119
+ criterion: string;
120
+ }>, "many">;
121
+ combine: z.ZodDefault<z.ZodDiscriminatedUnion<"combine", [z.ZodObject<{
122
+ combine: z.ZodLiteral<"all">;
123
+ }, "strip", z.ZodTypeAny, {
124
+ combine: "all";
125
+ }, {
126
+ combine: "all";
127
+ }>, z.ZodObject<{
128
+ combine: z.ZodLiteral<"fraction">;
129
+ threshold: z.ZodNumber;
130
+ }, "strip", z.ZodTypeAny, {
131
+ combine: "fraction";
132
+ threshold: number;
133
+ }, {
134
+ combine: "fraction";
135
+ threshold: number;
136
+ }>]>>;
137
+ }, "strip", z.ZodTypeAny, {
138
+ combine: {
139
+ combine: "all";
140
+ } | {
141
+ combine: "fraction";
142
+ threshold: number;
143
+ };
144
+ dimensions: {
145
+ dimension: string;
146
+ criterion: string;
147
+ }[];
148
+ }, {
149
+ dimensions: {
150
+ dimension: string;
151
+ criterion: string;
152
+ }[];
153
+ combine?: {
154
+ combine: "all";
155
+ } | {
156
+ combine: "fraction";
157
+ threshold: number;
158
+ } | undefined;
159
+ }>;
160
+ declare const RoutingCase: z.ZodObject<{
39
161
  expect: z.ZodUnion<[z.ZodObject<{
40
162
  first: z.ZodString;
41
163
  not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
@@ -70,13 +192,13 @@ declare const Case: z.ZodObject<{
70
192
  }, {
71
193
  noSkill: true;
72
194
  }>]>;
195
+ id: z.ZodString;
196
+ prompt: z.ZodString;
73
197
  cwd: z.ZodOptional<z.ZodString>;
74
198
  runs: z.ZodOptional<z.ZodNumber>;
75
199
  threshold: z.ZodOptional<z.ZodNumber>;
76
200
  note: z.ZodOptional<z.ZodString>;
77
- }, "strip", z.ZodTypeAny, {
78
- id: string;
79
- prompt: string;
201
+ }, "strict", z.ZodTypeAny, {
80
202
  expect: {
81
203
  first: string;
82
204
  not?: string[] | undefined;
@@ -89,13 +211,13 @@ declare const Case: z.ZodObject<{
89
211
  } | {
90
212
  noSkill: true;
91
213
  };
214
+ id: string;
215
+ prompt: string;
216
+ threshold?: number | undefined;
92
217
  cwd?: string | undefined;
93
218
  runs?: number | undefined;
94
- threshold?: number | undefined;
95
219
  note?: string | undefined;
96
220
  }, {
97
- id: string;
98
- prompt: string;
99
221
  expect: {
100
222
  first: string;
101
223
  not?: string[] | undefined;
@@ -108,18 +230,215 @@ declare const Case: z.ZodObject<{
108
230
  } | {
109
231
  noSkill: true;
110
232
  };
233
+ id: string;
234
+ prompt: string;
235
+ threshold?: number | undefined;
111
236
  cwd?: string | undefined;
112
237
  runs?: number | undefined;
238
+ note?: string | undefined;
239
+ }>;
240
+ declare const SolvingCase: z.ZodObject<{
241
+ expectSkill: z.ZodOptional<z.ZodString>;
242
+ assert: z.ZodDefault<z.ZodArray<z.ZodDiscriminatedUnion<"kind", [z.ZodObject<{
243
+ kind: z.ZodLiteral<"outputMatches">;
244
+ pattern: z.ZodString;
245
+ regex: z.ZodDefault<z.ZodBoolean>;
246
+ }, "strip", z.ZodTypeAny, {
247
+ regex: boolean;
248
+ kind: "outputMatches";
249
+ pattern: string;
250
+ }, {
251
+ kind: "outputMatches";
252
+ pattern: string;
253
+ regex?: boolean | undefined;
254
+ }>, z.ZodObject<{
255
+ kind: z.ZodLiteral<"outputExcludes">;
256
+ pattern: z.ZodString;
257
+ regex: z.ZodDefault<z.ZodBoolean>;
258
+ }, "strip", z.ZodTypeAny, {
259
+ regex: boolean;
260
+ kind: "outputExcludes";
261
+ pattern: string;
262
+ }, {
263
+ kind: "outputExcludes";
264
+ pattern: string;
265
+ regex?: boolean | undefined;
266
+ }>, z.ZodObject<{
267
+ kind: z.ZodLiteral<"usedTool">;
268
+ tool: z.ZodString;
269
+ }, "strip", z.ZodTypeAny, {
270
+ kind: "usedTool";
271
+ tool: string;
272
+ }, {
273
+ kind: "usedTool";
274
+ tool: string;
275
+ }>, z.ZodObject<{
276
+ kind: z.ZodLiteral<"didNotUseTool">;
277
+ tool: z.ZodString;
278
+ }, "strip", z.ZodTypeAny, {
279
+ kind: "didNotUseTool";
280
+ tool: string;
281
+ }, {
282
+ kind: "didNotUseTool";
283
+ tool: string;
284
+ }>, z.ZodObject<{
285
+ kind: z.ZodLiteral<"wroteFile">;
286
+ path: z.ZodString;
287
+ contentMatches: z.ZodOptional<z.ZodString>;
288
+ regex: z.ZodDefault<z.ZodBoolean>;
289
+ }, "strip", z.ZodTypeAny, {
290
+ path: string;
291
+ regex: boolean;
292
+ kind: "wroteFile";
293
+ contentMatches?: string | undefined;
294
+ }, {
295
+ path: string;
296
+ kind: "wroteFile";
297
+ regex?: boolean | undefined;
298
+ contentMatches?: string | undefined;
299
+ }>]>, "many">>;
300
+ rubric: z.ZodOptional<z.ZodObject<{
301
+ dimensions: z.ZodArray<z.ZodObject<{
302
+ dimension: z.ZodString;
303
+ criterion: z.ZodString;
304
+ }, "strip", z.ZodTypeAny, {
305
+ dimension: string;
306
+ criterion: string;
307
+ }, {
308
+ dimension: string;
309
+ criterion: string;
310
+ }>, "many">;
311
+ combine: z.ZodDefault<z.ZodDiscriminatedUnion<"combine", [z.ZodObject<{
312
+ combine: z.ZodLiteral<"all">;
313
+ }, "strip", z.ZodTypeAny, {
314
+ combine: "all";
315
+ }, {
316
+ combine: "all";
317
+ }>, z.ZodObject<{
318
+ combine: z.ZodLiteral<"fraction">;
319
+ threshold: z.ZodNumber;
320
+ }, "strip", z.ZodTypeAny, {
321
+ combine: "fraction";
322
+ threshold: number;
323
+ }, {
324
+ combine: "fraction";
325
+ threshold: number;
326
+ }>]>>;
327
+ }, "strip", z.ZodTypeAny, {
328
+ combine: {
329
+ combine: "all";
330
+ } | {
331
+ combine: "fraction";
332
+ threshold: number;
333
+ };
334
+ dimensions: {
335
+ dimension: string;
336
+ criterion: string;
337
+ }[];
338
+ }, {
339
+ dimensions: {
340
+ dimension: string;
341
+ criterion: string;
342
+ }[];
343
+ combine?: {
344
+ combine: "all";
345
+ } | {
346
+ combine: "fraction";
347
+ threshold: number;
348
+ } | undefined;
349
+ }>>;
350
+ id: z.ZodString;
351
+ prompt: z.ZodString;
352
+ cwd: z.ZodOptional<z.ZodString>;
353
+ runs: z.ZodOptional<z.ZodNumber>;
354
+ threshold: z.ZodOptional<z.ZodNumber>;
355
+ note: z.ZodOptional<z.ZodString>;
356
+ }, "strict", z.ZodTypeAny, {
357
+ id: string;
358
+ prompt: string;
359
+ assert: ({
360
+ regex: boolean;
361
+ kind: "outputMatches";
362
+ pattern: string;
363
+ } | {
364
+ regex: boolean;
365
+ kind: "outputExcludes";
366
+ pattern: string;
367
+ } | {
368
+ kind: "usedTool";
369
+ tool: string;
370
+ } | {
371
+ kind: "didNotUseTool";
372
+ tool: string;
373
+ } | {
374
+ path: string;
375
+ regex: boolean;
376
+ kind: "wroteFile";
377
+ contentMatches?: string | undefined;
378
+ })[];
379
+ threshold?: number | undefined;
380
+ cwd?: string | undefined;
381
+ runs?: number | undefined;
382
+ note?: string | undefined;
383
+ expectSkill?: string | undefined;
384
+ rubric?: {
385
+ combine: {
386
+ combine: "all";
387
+ } | {
388
+ combine: "fraction";
389
+ threshold: number;
390
+ };
391
+ dimensions: {
392
+ dimension: string;
393
+ criterion: string;
394
+ }[];
395
+ } | undefined;
396
+ }, {
397
+ id: string;
398
+ prompt: string;
113
399
  threshold?: number | undefined;
400
+ cwd?: string | undefined;
401
+ runs?: number | undefined;
114
402
  note?: string | undefined;
403
+ expectSkill?: string | undefined;
404
+ assert?: ({
405
+ kind: "outputMatches";
406
+ pattern: string;
407
+ regex?: boolean | undefined;
408
+ } | {
409
+ kind: "outputExcludes";
410
+ pattern: string;
411
+ regex?: boolean | undefined;
412
+ } | {
413
+ kind: "usedTool";
414
+ tool: string;
415
+ } | {
416
+ kind: "didNotUseTool";
417
+ tool: string;
418
+ } | {
419
+ path: string;
420
+ kind: "wroteFile";
421
+ regex?: boolean | undefined;
422
+ contentMatches?: string | undefined;
423
+ })[] | undefined;
424
+ rubric?: {
425
+ dimensions: {
426
+ dimension: string;
427
+ criterion: string;
428
+ }[];
429
+ combine?: {
430
+ combine: "all";
431
+ } | {
432
+ combine: "fraction";
433
+ threshold: number;
434
+ } | undefined;
435
+ } | undefined;
115
436
  }>;
116
437
  export declare const TIERS: readonly ["routing", "solving"];
117
- export declare const CaseFileSchema: z.ZodObject<{
438
+ export declare const CaseFileSchema: z.ZodDiscriminatedUnion<"tier", [z.ZodObject<{
118
439
  suite: z.ZodString;
119
- tier: z.ZodEnum<["routing", "solving"]>;
440
+ tier: z.ZodLiteral<"routing">;
120
441
  cases: z.ZodArray<z.ZodObject<{
121
- id: z.ZodString;
122
- prompt: z.ZodString;
123
442
  expect: z.ZodUnion<[z.ZodObject<{
124
443
  first: z.ZodString;
125
444
  not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
@@ -154,13 +473,13 @@ export declare const CaseFileSchema: z.ZodObject<{
154
473
  }, {
155
474
  noSkill: true;
156
475
  }>]>;
476
+ id: z.ZodString;
477
+ prompt: z.ZodString;
157
478
  cwd: z.ZodOptional<z.ZodString>;
158
479
  runs: z.ZodOptional<z.ZodNumber>;
159
480
  threshold: z.ZodOptional<z.ZodNumber>;
160
481
  note: z.ZodOptional<z.ZodString>;
161
- }, "strip", z.ZodTypeAny, {
162
- id: string;
163
- prompt: string;
482
+ }, "strict", z.ZodTypeAny, {
164
483
  expect: {
165
484
  first: string;
166
485
  not?: string[] | undefined;
@@ -173,13 +492,13 @@ export declare const CaseFileSchema: z.ZodObject<{
173
492
  } | {
174
493
  noSkill: true;
175
494
  };
495
+ id: string;
496
+ prompt: string;
497
+ threshold?: number | undefined;
176
498
  cwd?: string | undefined;
177
499
  runs?: number | undefined;
178
- threshold?: number | undefined;
179
500
  note?: string | undefined;
180
501
  }, {
181
- id: string;
182
- prompt: string;
183
502
  expect: {
184
503
  first: string;
185
504
  not?: string[] | undefined;
@@ -192,17 +511,17 @@ export declare const CaseFileSchema: z.ZodObject<{
192
511
  } | {
193
512
  noSkill: true;
194
513
  };
514
+ id: string;
515
+ prompt: string;
516
+ threshold?: number | undefined;
195
517
  cwd?: string | undefined;
196
518
  runs?: number | undefined;
197
- threshold?: number | undefined;
198
519
  note?: string | undefined;
199
520
  }>, "many">;
200
521
  }, "strip", z.ZodTypeAny, {
201
522
  suite: string;
202
- tier: "routing" | "solving";
523
+ tier: "routing";
203
524
  cases: {
204
- id: string;
205
- prompt: string;
206
525
  expect: {
207
526
  first: string;
208
527
  not?: string[] | undefined;
@@ -215,17 +534,17 @@ export declare const CaseFileSchema: z.ZodObject<{
215
534
  } | {
216
535
  noSkill: true;
217
536
  };
537
+ id: string;
538
+ prompt: string;
539
+ threshold?: number | undefined;
218
540
  cwd?: string | undefined;
219
541
  runs?: number | undefined;
220
- threshold?: number | undefined;
221
542
  note?: string | undefined;
222
543
  }[];
223
544
  }, {
224
545
  suite: string;
225
- tier: "routing" | "solving";
546
+ tier: "routing";
226
547
  cases: {
227
- id: string;
228
- prompt: string;
229
548
  expect: {
230
549
  first: string;
231
550
  not?: string[] | undefined;
@@ -238,16 +557,312 @@ export declare const CaseFileSchema: z.ZodObject<{
238
557
  } | {
239
558
  noSkill: true;
240
559
  };
560
+ id: string;
561
+ prompt: string;
562
+ threshold?: number | undefined;
563
+ cwd?: string | undefined;
564
+ runs?: number | undefined;
565
+ note?: string | undefined;
566
+ }[];
567
+ }>, z.ZodObject<{
568
+ suite: z.ZodString;
569
+ tier: z.ZodLiteral<"solving">;
570
+ cases: z.ZodArray<z.ZodObject<{
571
+ expectSkill: z.ZodOptional<z.ZodString>;
572
+ assert: z.ZodDefault<z.ZodArray<z.ZodDiscriminatedUnion<"kind", [z.ZodObject<{
573
+ kind: z.ZodLiteral<"outputMatches">;
574
+ pattern: z.ZodString;
575
+ regex: z.ZodDefault<z.ZodBoolean>;
576
+ }, "strip", z.ZodTypeAny, {
577
+ regex: boolean;
578
+ kind: "outputMatches";
579
+ pattern: string;
580
+ }, {
581
+ kind: "outputMatches";
582
+ pattern: string;
583
+ regex?: boolean | undefined;
584
+ }>, z.ZodObject<{
585
+ kind: z.ZodLiteral<"outputExcludes">;
586
+ pattern: z.ZodString;
587
+ regex: z.ZodDefault<z.ZodBoolean>;
588
+ }, "strip", z.ZodTypeAny, {
589
+ regex: boolean;
590
+ kind: "outputExcludes";
591
+ pattern: string;
592
+ }, {
593
+ kind: "outputExcludes";
594
+ pattern: string;
595
+ regex?: boolean | undefined;
596
+ }>, z.ZodObject<{
597
+ kind: z.ZodLiteral<"usedTool">;
598
+ tool: z.ZodString;
599
+ }, "strip", z.ZodTypeAny, {
600
+ kind: "usedTool";
601
+ tool: string;
602
+ }, {
603
+ kind: "usedTool";
604
+ tool: string;
605
+ }>, z.ZodObject<{
606
+ kind: z.ZodLiteral<"didNotUseTool">;
607
+ tool: z.ZodString;
608
+ }, "strip", z.ZodTypeAny, {
609
+ kind: "didNotUseTool";
610
+ tool: string;
611
+ }, {
612
+ kind: "didNotUseTool";
613
+ tool: string;
614
+ }>, z.ZodObject<{
615
+ kind: z.ZodLiteral<"wroteFile">;
616
+ path: z.ZodString;
617
+ contentMatches: z.ZodOptional<z.ZodString>;
618
+ regex: z.ZodDefault<z.ZodBoolean>;
619
+ }, "strip", z.ZodTypeAny, {
620
+ path: string;
621
+ regex: boolean;
622
+ kind: "wroteFile";
623
+ contentMatches?: string | undefined;
624
+ }, {
625
+ path: string;
626
+ kind: "wroteFile";
627
+ regex?: boolean | undefined;
628
+ contentMatches?: string | undefined;
629
+ }>]>, "many">>;
630
+ rubric: z.ZodOptional<z.ZodObject<{
631
+ dimensions: z.ZodArray<z.ZodObject<{
632
+ dimension: z.ZodString;
633
+ criterion: z.ZodString;
634
+ }, "strip", z.ZodTypeAny, {
635
+ dimension: string;
636
+ criterion: string;
637
+ }, {
638
+ dimension: string;
639
+ criterion: string;
640
+ }>, "many">;
641
+ combine: z.ZodDefault<z.ZodDiscriminatedUnion<"combine", [z.ZodObject<{
642
+ combine: z.ZodLiteral<"all">;
643
+ }, "strip", z.ZodTypeAny, {
644
+ combine: "all";
645
+ }, {
646
+ combine: "all";
647
+ }>, z.ZodObject<{
648
+ combine: z.ZodLiteral<"fraction">;
649
+ threshold: z.ZodNumber;
650
+ }, "strip", z.ZodTypeAny, {
651
+ combine: "fraction";
652
+ threshold: number;
653
+ }, {
654
+ combine: "fraction";
655
+ threshold: number;
656
+ }>]>>;
657
+ }, "strip", z.ZodTypeAny, {
658
+ combine: {
659
+ combine: "all";
660
+ } | {
661
+ combine: "fraction";
662
+ threshold: number;
663
+ };
664
+ dimensions: {
665
+ dimension: string;
666
+ criterion: string;
667
+ }[];
668
+ }, {
669
+ dimensions: {
670
+ dimension: string;
671
+ criterion: string;
672
+ }[];
673
+ combine?: {
674
+ combine: "all";
675
+ } | {
676
+ combine: "fraction";
677
+ threshold: number;
678
+ } | undefined;
679
+ }>>;
680
+ id: z.ZodString;
681
+ prompt: z.ZodString;
682
+ cwd: z.ZodOptional<z.ZodString>;
683
+ runs: z.ZodOptional<z.ZodNumber>;
684
+ threshold: z.ZodOptional<z.ZodNumber>;
685
+ note: z.ZodOptional<z.ZodString>;
686
+ }, "strict", z.ZodTypeAny, {
687
+ id: string;
688
+ prompt: string;
689
+ assert: ({
690
+ regex: boolean;
691
+ kind: "outputMatches";
692
+ pattern: string;
693
+ } | {
694
+ regex: boolean;
695
+ kind: "outputExcludes";
696
+ pattern: string;
697
+ } | {
698
+ kind: "usedTool";
699
+ tool: string;
700
+ } | {
701
+ kind: "didNotUseTool";
702
+ tool: string;
703
+ } | {
704
+ path: string;
705
+ regex: boolean;
706
+ kind: "wroteFile";
707
+ contentMatches?: string | undefined;
708
+ })[];
709
+ threshold?: number | undefined;
241
710
  cwd?: string | undefined;
242
711
  runs?: number | undefined;
712
+ note?: string | undefined;
713
+ expectSkill?: string | undefined;
714
+ rubric?: {
715
+ combine: {
716
+ combine: "all";
717
+ } | {
718
+ combine: "fraction";
719
+ threshold: number;
720
+ };
721
+ dimensions: {
722
+ dimension: string;
723
+ criterion: string;
724
+ }[];
725
+ } | undefined;
726
+ }, {
727
+ id: string;
728
+ prompt: string;
243
729
  threshold?: number | undefined;
730
+ cwd?: string | undefined;
731
+ runs?: number | undefined;
244
732
  note?: string | undefined;
733
+ expectSkill?: string | undefined;
734
+ assert?: ({
735
+ kind: "outputMatches";
736
+ pattern: string;
737
+ regex?: boolean | undefined;
738
+ } | {
739
+ kind: "outputExcludes";
740
+ pattern: string;
741
+ regex?: boolean | undefined;
742
+ } | {
743
+ kind: "usedTool";
744
+ tool: string;
745
+ } | {
746
+ kind: "didNotUseTool";
747
+ tool: string;
748
+ } | {
749
+ path: string;
750
+ kind: "wroteFile";
751
+ regex?: boolean | undefined;
752
+ contentMatches?: string | undefined;
753
+ })[] | undefined;
754
+ rubric?: {
755
+ dimensions: {
756
+ dimension: string;
757
+ criterion: string;
758
+ }[];
759
+ combine?: {
760
+ combine: "all";
761
+ } | {
762
+ combine: "fraction";
763
+ threshold: number;
764
+ } | undefined;
765
+ } | undefined;
766
+ }>, "many">;
767
+ }, "strip", z.ZodTypeAny, {
768
+ suite: string;
769
+ tier: "solving";
770
+ cases: {
771
+ id: string;
772
+ prompt: string;
773
+ assert: ({
774
+ regex: boolean;
775
+ kind: "outputMatches";
776
+ pattern: string;
777
+ } | {
778
+ regex: boolean;
779
+ kind: "outputExcludes";
780
+ pattern: string;
781
+ } | {
782
+ kind: "usedTool";
783
+ tool: string;
784
+ } | {
785
+ kind: "didNotUseTool";
786
+ tool: string;
787
+ } | {
788
+ path: string;
789
+ regex: boolean;
790
+ kind: "wroteFile";
791
+ contentMatches?: string | undefined;
792
+ })[];
793
+ threshold?: number | undefined;
794
+ cwd?: string | undefined;
795
+ runs?: number | undefined;
796
+ note?: string | undefined;
797
+ expectSkill?: string | undefined;
798
+ rubric?: {
799
+ combine: {
800
+ combine: "all";
801
+ } | {
802
+ combine: "fraction";
803
+ threshold: number;
804
+ };
805
+ dimensions: {
806
+ dimension: string;
807
+ criterion: string;
808
+ }[];
809
+ } | undefined;
245
810
  }[];
246
- }>;
811
+ }, {
812
+ suite: string;
813
+ tier: "solving";
814
+ cases: {
815
+ id: string;
816
+ prompt: string;
817
+ threshold?: number | undefined;
818
+ cwd?: string | undefined;
819
+ runs?: number | undefined;
820
+ note?: string | undefined;
821
+ expectSkill?: string | undefined;
822
+ assert?: ({
823
+ kind: "outputMatches";
824
+ pattern: string;
825
+ regex?: boolean | undefined;
826
+ } | {
827
+ kind: "outputExcludes";
828
+ pattern: string;
829
+ regex?: boolean | undefined;
830
+ } | {
831
+ kind: "usedTool";
832
+ tool: string;
833
+ } | {
834
+ kind: "didNotUseTool";
835
+ tool: string;
836
+ } | {
837
+ path: string;
838
+ kind: "wroteFile";
839
+ regex?: boolean | undefined;
840
+ contentMatches?: string | undefined;
841
+ })[] | undefined;
842
+ rubric?: {
843
+ dimensions: {
844
+ dimension: string;
845
+ criterion: string;
846
+ }[];
847
+ combine?: {
848
+ combine: "all";
849
+ } | {
850
+ combine: "fraction";
851
+ threshold: number;
852
+ } | undefined;
853
+ } | undefined;
854
+ }[];
855
+ }>]>;
247
856
  export type Tier = (typeof TIERS)[number];
248
857
  export type Expectation = z.infer<typeof Expectation>;
249
- export type EvalCase = z.infer<typeof Case>;
858
+ export type Assertion = z.infer<typeof Assertion>;
859
+ export type Rubric = z.infer<typeof Rubric>;
860
+ export type CombineRule = z.infer<typeof CombineRule>;
861
+ export type RoutingCase = z.infer<typeof RoutingCase>;
862
+ export type SolvingCase = z.infer<typeof SolvingCase>;
863
+ export type EvalCase = RoutingCase | SolvingCase;
250
864
  export type CaseFile = z.infer<typeof CaseFileSchema>;
251
865
  export declare function expectedSkills(expectation: Expectation): readonly string[];
866
+ export declare function caseExpectedSkills(evalCase: EvalCase): readonly string[];
252
867
  export {};
253
868
  //# sourceMappingURL=schema.d.ts.map