@sanity/ailf 4.5.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/canonical/grader-references/agent-harness-tools.yaml +42 -0
  2. package/canonical/grader-references/knowledge-probe-recall.yaml +36 -0
  3. package/canonical/grader-references/mcp-server-spec.yaml +51 -0
  4. package/canonical/grader-references/portable-text.yaml +48 -0
  5. package/config/rubrics.ts +38 -2
  6. package/dist/_vendor/ailf-core/artifact-registry.d.ts +197 -2
  7. package/dist/_vendor/ailf-core/artifact-registry.js +419 -5
  8. package/dist/_vendor/ailf-core/examples/index.d.ts +125 -26
  9. package/dist/_vendor/ailf-core/examples/index.js +146 -47
  10. package/dist/_vendor/ailf-core/ports/context.d.ts +26 -0
  11. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  12. package/dist/_vendor/ailf-core/ports/index.js +1 -0
  13. package/dist/_vendor/ailf-core/ports/llm-client.d.ts +112 -0
  14. package/dist/_vendor/ailf-core/ports/llm-client.js +68 -0
  15. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +15 -0
  16. package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +40 -0
  17. package/dist/_vendor/ailf-core/schemas/branded-string.js +45 -0
  18. package/dist/_vendor/ailf-core/schemas/confidence-schema.d.ts +36 -0
  19. package/dist/_vendor/ailf-core/schemas/confidence-schema.js +32 -0
  20. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  21. package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -4
  22. package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
  23. package/dist/_vendor/ailf-core/schemas/index.js +9 -0
  24. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
  25. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
  26. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +34 -8
  27. package/dist/_vendor/ailf-core/schemas/pipeline.js +23 -1
  28. package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +40 -0
  29. package/dist/_vendor/ailf-core/services/diagnosis/registry.js +25 -0
  30. package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +19 -0
  31. package/dist/_vendor/ailf-core/services/diagnosis-runner.js +19 -0
  32. package/dist/_vendor/ailf-core/services/index.d.ts +2 -0
  33. package/dist/_vendor/ailf-core/services/index.js +5 -0
  34. package/dist/_vendor/ailf-core/services/report-to-markdown.js +3 -2
  35. package/dist/_vendor/ailf-core/types/attribution.d.ts +82 -0
  36. package/dist/_vendor/ailf-core/types/attribution.js +18 -0
  37. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +26 -1
  38. package/dist/_vendor/ailf-core/types/branded-ids.js +80 -4
  39. package/dist/_vendor/ailf-core/types/confidence.d.ts +68 -0
  40. package/dist/_vendor/ailf-core/types/confidence.js +56 -0
  41. package/dist/_vendor/ailf-core/types/diagnosis.d.ts +169 -0
  42. package/dist/_vendor/ailf-core/types/diagnosis.js +17 -0
  43. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +16 -1
  44. package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +125 -0
  45. package/dist/_vendor/ailf-core/types/grader-judgment.js +30 -0
  46. package/dist/_vendor/ailf-core/types/index.d.ts +82 -29
  47. package/dist/_vendor/ailf-core/types/index.js +16 -1
  48. package/dist/_vendor/ailf-core/types/legacy-grader-judgment.d.ts +55 -0
  49. package/dist/_vendor/ailf-core/types/legacy-grader-judgment.js +30 -0
  50. package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
  51. package/dist/_vendor/ailf-core/types/repo-config.d.ts +8 -0
  52. package/dist/_vendor/ailf-shared/document-ref.d.ts +1 -1
  53. package/dist/adapters/api-client/build-request.d.ts +1 -0
  54. package/dist/adapters/api-client/build-request.js +3 -0
  55. package/dist/adapters/attribution/attribution-meta-writer.d.ts +35 -0
  56. package/dist/adapters/attribution/attribution-meta-writer.js +34 -0
  57. package/dist/adapters/attribution/index.d.ts +9 -0
  58. package/dist/adapters/attribution/index.js +8 -0
  59. package/dist/adapters/attribution/per-entry-attribution-writer.d.ts +56 -0
  60. package/dist/adapters/attribution/per-entry-attribution-writer.js +49 -0
  61. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  62. package/dist/adapters/grader-outputs/index.d.ts +10 -0
  63. package/dist/adapters/grader-outputs/index.js +8 -0
  64. package/dist/adapters/grader-outputs/legacy/index.d.ts +11 -0
  65. package/dist/adapters/grader-outputs/legacy/index.js +10 -0
  66. package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.d.ts +49 -0
  67. package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.js +48 -0
  68. package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +102 -0
  69. package/dist/adapters/grader-outputs/promptfoo-grader-output.js +93 -0
  70. package/dist/adapters/index.d.ts +3 -0
  71. package/dist/adapters/index.js +4 -0
  72. package/dist/adapters/llm/anthropic-llm-client.d.ts +48 -0
  73. package/dist/adapters/llm/anthropic-llm-client.js +205 -0
  74. package/dist/adapters/llm/fake-llm-client.d.ts +49 -0
  75. package/dist/adapters/llm/fake-llm-client.js +63 -0
  76. package/dist/adapters/llm/index.d.ts +9 -0
  77. package/dist/adapters/llm/index.js +4 -0
  78. package/dist/adapters/llm/openai-llm-client.d.ts +44 -0
  79. package/dist/adapters/llm/openai-llm-client.js +168 -0
  80. package/dist/adapters/llm/pricing.d.ts +12 -0
  81. package/dist/adapters/llm/pricing.js +8 -0
  82. package/dist/adapters/llm/retry.d.ts +56 -0
  83. package/dist/adapters/llm/retry.js +66 -0
  84. package/dist/adapters/task-sources/content-lake-task-source.d.ts +5 -1
  85. package/dist/adapters/task-sources/content-lake-task-source.js +28 -2
  86. package/dist/adapters/task-sources/repo-schemas.d.ts +90 -22
  87. package/dist/adapters/task-sources/repo-schemas.js +19 -2
  88. package/dist/artifact-capture/api-gateway-artifact-writer.js +2 -1
  89. package/dist/artifact-capture/batching-api-gateway-artifact-writer.js +2 -1
  90. package/dist/artifact-capture/gcs-artifact-writer.js +3 -1
  91. package/dist/artifact-capture/local-fs-artifact-writer.js +3 -1
  92. package/dist/commands/calculate-scores.js +1 -1
  93. package/dist/commands/explain-handler.js +1 -1
  94. package/dist/commands/lookup-doc.d.ts +1 -1
  95. package/dist/commands/lookup-doc.js +3 -3
  96. package/dist/commands/pipeline-action.d.ts +6 -0
  97. package/dist/commands/pipeline-action.js +2 -0
  98. package/dist/commands/remote-pipeline.js +1 -0
  99. package/dist/composition-root.d.ts +59 -1
  100. package/dist/composition-root.js +95 -0
  101. package/dist/config/rubrics.ts +38 -2
  102. package/dist/grader/agent-harness.d.ts +14 -0
  103. package/dist/grader/agent-harness.js +17 -0
  104. package/dist/grader/common.d.ts +17 -0
  105. package/dist/grader/common.js +21 -0
  106. package/dist/grader/index.d.ts +38 -0
  107. package/dist/grader/index.js +75 -0
  108. package/dist/grader/knowledge-probe.d.ts +14 -0
  109. package/dist/grader/knowledge-probe.js +18 -0
  110. package/dist/grader/literacy.d.ts +13 -0
  111. package/dist/grader/literacy.js +17 -0
  112. package/dist/grader/mcp.d.ts +14 -0
  113. package/dist/grader/mcp.js +18 -0
  114. package/dist/orchestration/build-app-context.js +1 -0
  115. package/dist/orchestration/build-step-sequence.js +5 -0
  116. package/dist/orchestration/steps/calculate-scores-step.js +23 -1
  117. package/dist/orchestration/steps/compute-attribution-step.d.ts +44 -0
  118. package/dist/orchestration/steps/compute-attribution-step.js +279 -0
  119. package/dist/orchestration/steps/gap-analysis-step.js +35 -7
  120. package/dist/orchestration/steps/index.d.ts +1 -0
  121. package/dist/orchestration/steps/index.js +1 -0
  122. package/dist/pipeline/attribution.d.ts +15 -0
  123. package/dist/pipeline/attribution.js +18 -9
  124. package/dist/pipeline/borderline-consensus-runner.d.ts +63 -0
  125. package/dist/pipeline/borderline-consensus-runner.js +124 -0
  126. package/dist/pipeline/borderline-detector.d.ts +24 -0
  127. package/dist/pipeline/borderline-detector.js +26 -0
  128. package/dist/pipeline/calculate-scores.d.ts +114 -3
  129. package/dist/pipeline/calculate-scores.js +426 -24
  130. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  131. package/dist/pipeline/compiler/literacy-bridge.js +35 -17
  132. package/dist/pipeline/compiler/rubric-resolution.d.ts +15 -0
  133. package/dist/pipeline/compiler/rubric-resolution.js +9 -1
  134. package/dist/pipeline/compute-attribution.d.ts +80 -0
  135. package/dist/pipeline/compute-attribution.js +196 -0
  136. package/dist/pipeline/failure-modes.d.ts +52 -17
  137. package/dist/pipeline/failure-modes.js +178 -117
  138. package/dist/pipeline/map-request-to-config.js +1 -0
  139. package/package.json +6 -4
@@ -32,6 +32,40 @@ export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
32
32
  */
33
33
  export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage", "mcp-input-validation", "mcp-output-correctness", "mcp-error-handling", "mcp-security", "factual-correctness", "completeness", "currency", "process-quality", "agent-output", "agent-tool-usage"];
34
34
  export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
35
+ /**
36
+ * A single criterion within an llm-rubric assertion. Stable id-text pair.
37
+ */
38
+ export declare const CriterionRefSchema: z.ZodObject<{
39
+ id: z.ZodString;
40
+ text: z.ZodString;
41
+ }, z.core.$strip>;
42
+ /**
43
+ * A templated LLM-rubric assertion — uses one of the predefined rubric
44
+ * templates with author-supplied criteria.
45
+ */
46
+ export declare const TemplatedAssertionSchema: z.ZodObject<{
47
+ type: z.ZodLiteral<"llm-rubric">;
48
+ template: z.ZodEnum<{
49
+ "task-completion": "task-completion";
50
+ "code-correctness": "code-correctness";
51
+ "doc-coverage": "doc-coverage";
52
+ "mcp-input-validation": "mcp-input-validation";
53
+ "mcp-output-correctness": "mcp-output-correctness";
54
+ "mcp-error-handling": "mcp-error-handling";
55
+ "mcp-security": "mcp-security";
56
+ "factual-correctness": "factual-correctness";
57
+ completeness: "completeness";
58
+ currency: "currency";
59
+ "process-quality": "process-quality";
60
+ "agent-output": "agent-output";
61
+ "agent-tool-usage": "agent-tool-usage";
62
+ }>;
63
+ criteria: z.ZodArray<z.ZodObject<{
64
+ id: z.ZodString;
65
+ text: z.ZodString;
66
+ }, z.core.$strip>>;
67
+ weight: z.ZodOptional<z.ZodNumber>;
68
+ }, z.core.$strip>;
35
69
  /**
36
70
  * Zod schema for a single task definition — a mode-discriminated union
37
71
  * mirroring `GeneralizedTaskDefinition`.
@@ -73,10 +107,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
73
107
  "agent-output": "agent-output";
74
108
  "agent-tool-usage": "agent-tool-usage";
75
109
  }>;
76
- criteria: z.ZodArray<z.ZodString>;
110
+ criteria: z.ZodArray<z.ZodObject<{
111
+ id: z.ZodString;
112
+ text: z.ZodString;
113
+ }, z.core.$strip>>;
77
114
  weight: z.ZodOptional<z.ZodNumber>;
78
115
  }, z.core.$strip>, z.ZodObject<{
79
116
  type: z.ZodEnum<{
117
+ cost: "cost";
80
118
  "llm-rubric": "llm-rubric";
81
119
  contains: "contains";
82
120
  "contains-any": "contains-any";
@@ -87,7 +125,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
87
125
  regex: "regex";
88
126
  javascript: "javascript";
89
127
  similar: "similar";
90
- cost: "cost";
91
128
  latency: "latency";
92
129
  "file-exists": "file-exists";
93
130
  "file-contains": "file-contains";
@@ -187,10 +224,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
187
224
  "agent-output": "agent-output";
188
225
  "agent-tool-usage": "agent-tool-usage";
189
226
  }>;
190
- criteria: z.ZodArray<z.ZodString>;
227
+ criteria: z.ZodArray<z.ZodObject<{
228
+ id: z.ZodString;
229
+ text: z.ZodString;
230
+ }, z.core.$strip>>;
191
231
  weight: z.ZodOptional<z.ZodNumber>;
192
232
  }, z.core.$strip>, z.ZodObject<{
193
233
  type: z.ZodEnum<{
234
+ cost: "cost";
194
235
  "llm-rubric": "llm-rubric";
195
236
  contains: "contains";
196
237
  "contains-any": "contains-any";
@@ -201,7 +242,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
201
242
  regex: "regex";
202
243
  javascript: "javascript";
203
244
  similar: "similar";
204
- cost: "cost";
205
245
  latency: "latency";
206
246
  "file-exists": "file-exists";
207
247
  "file-contains": "file-contains";
@@ -341,10 +381,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
341
381
  "agent-output": "agent-output";
342
382
  "agent-tool-usage": "agent-tool-usage";
343
383
  }>;
344
- criteria: z.ZodArray<z.ZodString>;
384
+ criteria: z.ZodArray<z.ZodObject<{
385
+ id: z.ZodString;
386
+ text: z.ZodString;
387
+ }, z.core.$strip>>;
345
388
  weight: z.ZodOptional<z.ZodNumber>;
346
389
  }, z.core.$strip>, z.ZodObject<{
347
390
  type: z.ZodEnum<{
391
+ cost: "cost";
348
392
  "llm-rubric": "llm-rubric";
349
393
  contains: "contains";
350
394
  "contains-any": "contains-any";
@@ -355,7 +399,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
355
399
  regex: "regex";
356
400
  javascript: "javascript";
357
401
  similar: "similar";
358
- cost: "cost";
359
402
  latency: "latency";
360
403
  "file-exists": "file-exists";
361
404
  "file-contains": "file-contains";
@@ -472,10 +515,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
472
515
  "agent-output": "agent-output";
473
516
  "agent-tool-usage": "agent-tool-usage";
474
517
  }>;
475
- criteria: z.ZodArray<z.ZodString>;
518
+ criteria: z.ZodArray<z.ZodObject<{
519
+ id: z.ZodString;
520
+ text: z.ZodString;
521
+ }, z.core.$strip>>;
476
522
  weight: z.ZodOptional<z.ZodNumber>;
477
523
  }, z.core.$strip>, z.ZodObject<{
478
524
  type: z.ZodEnum<{
525
+ cost: "cost";
479
526
  "llm-rubric": "llm-rubric";
480
527
  contains: "contains";
481
528
  "contains-any": "contains-any";
@@ -486,7 +533,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
486
533
  regex: "regex";
487
534
  javascript: "javascript";
488
535
  similar: "similar";
489
- cost: "cost";
490
536
  latency: "latency";
491
537
  "file-exists": "file-exists";
492
538
  "file-contains": "file-contains";
@@ -591,10 +637,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
591
637
  "agent-output": "agent-output";
592
638
  "agent-tool-usage": "agent-tool-usage";
593
639
  }>;
594
- criteria: z.ZodArray<z.ZodString>;
640
+ criteria: z.ZodArray<z.ZodObject<{
641
+ id: z.ZodString;
642
+ text: z.ZodString;
643
+ }, z.core.$strip>>;
595
644
  weight: z.ZodOptional<z.ZodNumber>;
596
645
  }, z.core.$strip>, z.ZodObject<{
597
646
  type: z.ZodEnum<{
647
+ cost: "cost";
598
648
  "llm-rubric": "llm-rubric";
599
649
  contains: "contains";
600
650
  "contains-any": "contains-any";
@@ -605,7 +655,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
605
655
  regex: "regex";
606
656
  javascript: "javascript";
607
657
  similar: "similar";
608
- cost: "cost";
609
658
  latency: "latency";
610
659
  "file-exists": "file-exists";
611
660
  "file-contains": "file-contains";
@@ -699,10 +748,14 @@ export declare const ContentLakeAuthorableTaskSchema: z.ZodObject<{
699
748
  "agent-output": "agent-output";
700
749
  "agent-tool-usage": "agent-tool-usage";
701
750
  }>;
702
- criteria: z.ZodArray<z.ZodString>;
751
+ criteria: z.ZodArray<z.ZodObject<{
752
+ id: z.ZodString;
753
+ text: z.ZodString;
754
+ }, z.core.$strip>>;
703
755
  weight: z.ZodOptional<z.ZodNumber>;
704
756
  }, z.core.$strip>, z.ZodObject<{
705
757
  type: z.ZodEnum<{
758
+ cost: "cost";
706
759
  "llm-rubric": "llm-rubric";
707
760
  contains: "contains";
708
761
  "contains-any": "contains-any";
@@ -713,7 +766,6 @@ export declare const ContentLakeAuthorableTaskSchema: z.ZodObject<{
713
766
  regex: "regex";
714
767
  javascript: "javascript";
715
768
  similar: "similar";
716
- cost: "cost";
717
769
  latency: "latency";
718
770
  "file-exists": "file-exists";
719
771
  "file-contains": "file-contains";
@@ -819,10 +871,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
819
871
  "agent-output": "agent-output";
820
872
  "agent-tool-usage": "agent-tool-usage";
821
873
  }>;
822
- criteria: z.ZodArray<z.ZodString>;
874
+ criteria: z.ZodArray<z.ZodObject<{
875
+ id: z.ZodString;
876
+ text: z.ZodString;
877
+ }, z.core.$strip>>;
823
878
  weight: z.ZodOptional<z.ZodNumber>;
824
879
  }, z.core.$strip>, z.ZodObject<{
825
880
  type: z.ZodEnum<{
881
+ cost: "cost";
826
882
  "llm-rubric": "llm-rubric";
827
883
  contains: "contains";
828
884
  "contains-any": "contains-any";
@@ -833,7 +889,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
833
889
  regex: "regex";
834
890
  javascript: "javascript";
835
891
  similar: "similar";
836
- cost: "cost";
837
892
  latency: "latency";
838
893
  "file-exists": "file-exists";
839
894
  "file-contains": "file-contains";
@@ -933,10 +988,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
933
988
  "agent-output": "agent-output";
934
989
  "agent-tool-usage": "agent-tool-usage";
935
990
  }>;
936
- criteria: z.ZodArray<z.ZodString>;
991
+ criteria: z.ZodArray<z.ZodObject<{
992
+ id: z.ZodString;
993
+ text: z.ZodString;
994
+ }, z.core.$strip>>;
937
995
  weight: z.ZodOptional<z.ZodNumber>;
938
996
  }, z.core.$strip>, z.ZodObject<{
939
997
  type: z.ZodEnum<{
998
+ cost: "cost";
940
999
  "llm-rubric": "llm-rubric";
941
1000
  contains: "contains";
942
1001
  "contains-any": "contains-any";
@@ -947,7 +1006,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
947
1006
  regex: "regex";
948
1007
  javascript: "javascript";
949
1008
  similar: "similar";
950
- cost: "cost";
951
1009
  latency: "latency";
952
1010
  "file-exists": "file-exists";
953
1011
  "file-contains": "file-contains";
@@ -1087,10 +1145,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
1087
1145
  "agent-output": "agent-output";
1088
1146
  "agent-tool-usage": "agent-tool-usage";
1089
1147
  }>;
1090
- criteria: z.ZodArray<z.ZodString>;
1148
+ criteria: z.ZodArray<z.ZodObject<{
1149
+ id: z.ZodString;
1150
+ text: z.ZodString;
1151
+ }, z.core.$strip>>;
1091
1152
  weight: z.ZodOptional<z.ZodNumber>;
1092
1153
  }, z.core.$strip>, z.ZodObject<{
1093
1154
  type: z.ZodEnum<{
1155
+ cost: "cost";
1094
1156
  "llm-rubric": "llm-rubric";
1095
1157
  contains: "contains";
1096
1158
  "contains-any": "contains-any";
@@ -1101,7 +1163,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
1101
1163
  regex: "regex";
1102
1164
  javascript: "javascript";
1103
1165
  similar: "similar";
1104
- cost: "cost";
1105
1166
  latency: "latency";
1106
1167
  "file-exists": "file-exists";
1107
1168
  "file-contains": "file-contains";
@@ -1218,10 +1279,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
1218
1279
  "agent-output": "agent-output";
1219
1280
  "agent-tool-usage": "agent-tool-usage";
1220
1281
  }>;
1221
- criteria: z.ZodArray<z.ZodString>;
1282
+ criteria: z.ZodArray<z.ZodObject<{
1283
+ id: z.ZodString;
1284
+ text: z.ZodString;
1285
+ }, z.core.$strip>>;
1222
1286
  weight: z.ZodOptional<z.ZodNumber>;
1223
1287
  }, z.core.$strip>, z.ZodObject<{
1224
1288
  type: z.ZodEnum<{
1289
+ cost: "cost";
1225
1290
  "llm-rubric": "llm-rubric";
1226
1291
  contains: "contains";
1227
1292
  "contains-any": "contains-any";
@@ -1232,7 +1297,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
1232
1297
  regex: "regex";
1233
1298
  javascript: "javascript";
1234
1299
  similar: "similar";
1235
- cost: "cost";
1236
1300
  latency: "latency";
1237
1301
  "file-exists": "file-exists";
1238
1302
  "file-contains": "file-contains";
@@ -1337,10 +1401,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
1337
1401
  "agent-output": "agent-output";
1338
1402
  "agent-tool-usage": "agent-tool-usage";
1339
1403
  }>;
1340
- criteria: z.ZodArray<z.ZodString>;
1404
+ criteria: z.ZodArray<z.ZodObject<{
1405
+ id: z.ZodString;
1406
+ text: z.ZodString;
1407
+ }, z.core.$strip>>;
1341
1408
  weight: z.ZodOptional<z.ZodNumber>;
1342
1409
  }, z.core.$strip>, z.ZodObject<{
1343
1410
  type: z.ZodEnum<{
1411
+ cost: "cost";
1344
1412
  "llm-rubric": "llm-rubric";
1345
1413
  contains: "contains";
1346
1414
  "contains-any": "contains-any";
@@ -1351,7 +1419,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
1351
1419
  regex: "regex";
1352
1420
  javascript: "javascript";
1353
1421
  similar: "similar";
1354
- cost: "cost";
1355
1422
  latency: "latency";
1356
1423
  "file-exists": "file-exists";
1357
1424
  "file-contains": "file-contains";
@@ -1468,6 +1535,7 @@ export declare const RepoConfigSchema: z.ZodObject<{
1468
1535
  execution: z.ZodOptional<z.ZodObject<{
1469
1536
  concurrency: z.ZodOptional<z.ZodNumber>;
1470
1537
  graderReplications: z.ZodOptional<z.ZodNumber>;
1538
+ borderlineReplications: z.ZodOptional<z.ZodNumber>;
1471
1539
  gapAnalysis: z.ZodOptional<z.ZodBoolean>;
1472
1540
  apiUrl: z.ZodOptional<z.ZodString>;
1473
1541
  }, z.core.$strip>>;
@@ -111,14 +111,26 @@ const CanonicalDocRefSchema = z.union([
111
111
  // ---------------------------------------------------------------------------
112
112
  // Assertion schemas
113
113
  // ---------------------------------------------------------------------------
114
+ /**
115
+ * A single criterion within an llm-rubric assertion. Stable id-text pair.
116
+ */
117
+ export const CriterionRefSchema = z.object({
118
+ id: z
119
+ .string()
120
+ .min(1)
121
+ .regex(/^[a-z0-9][a-z0-9-]*$/, {
122
+ message: "criterion id must be lowercase alphanumeric with hyphens",
123
+ }),
124
+ text: z.string().min(1),
125
+ });
114
126
  /**
115
127
  * A templated LLM-rubric assertion — uses one of the predefined rubric
116
128
  * templates with author-supplied criteria.
117
129
  */
118
- const TemplatedAssertionSchema = z.object({
130
+ export const TemplatedAssertionSchema = z.object({
119
131
  type: z.literal("llm-rubric"),
120
132
  template: z.enum(RUBRIC_TEMPLATE_NAMES),
121
- criteria: z.array(z.string().min(1)).min(1),
133
+ criteria: z.array(CriterionRefSchema).min(1),
122
134
  weight: z.number().optional(),
123
135
  });
124
136
  /**
@@ -562,6 +574,11 @@ const ExecutionConfigSchema = z
562
574
  .object({
563
575
  concurrency: z.number().int().positive().optional(),
564
576
  graderReplications: z.number().int().positive().optional(),
577
+ /**
578
+ * Plan 03-04 GRAD-04 — replications per borderline judgment.
579
+ * Default 3 (composition-root). Positive integer.
580
+ */
581
+ borderlineReplications: z.number().int().positive().optional(),
565
582
  gapAnalysis: z.boolean().optional(),
566
583
  apiUrl: z.string().url().optional(),
567
584
  })
@@ -27,7 +27,7 @@
27
27
  * @see docs/decisions/D0032-run-anchored-artifact-store.md
28
28
  * @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
29
29
  */
30
- import { ARTIFACT_REGISTRY, NotImplementedError, } from "../_vendor/ailf-core/index.js";
30
+ import { ARTIFACT_REGISTRY, assertWritePolicyMatches, NotImplementedError, } from "../_vendor/ailf-core/index.js";
31
31
  import { prepareUploadBody } from "./prepare-upload-body.js";
32
32
  import { NO_OP_UPLOAD_METRICS, } from "./upload-metrics.js";
33
33
  export class ApiGatewayArtifactWriter {
@@ -40,6 +40,7 @@ export class ApiGatewayArtifactWriter {
40
40
  // ---- Canonical W0049 API ------------------------------------------------
41
41
  async emit(type, association, payload) {
42
42
  const descriptor = ARTIFACT_REGISTRY[type];
43
+ assertWritePolicyMatches("pipeline", descriptor);
43
44
  const runId = association.run;
44
45
  if (!runId) {
45
46
  console.warn(` ⚠️ emit("${type}"): association.run is required, skipping`);
@@ -25,7 +25,7 @@
25
25
  * does this writer. Traces flow through the GCS-direct writer when ADC
26
26
  * credentials are present.
27
27
  */
28
- import { ARTIFACT_REGISTRY, BULK_ENTRY_KEY, NotImplementedError, } from "../_vendor/ailf-core/index.js";
28
+ import { ARTIFACT_REGISTRY, assertWritePolicyMatches, BULK_ENTRY_KEY, NotImplementedError, } from "../_vendor/ailf-core/index.js";
29
29
  import { prepareUploadBody } from "./prepare-upload-body.js";
30
30
  import { NO_OP_UPLOAD_METRICS, } from "./upload-metrics.js";
31
31
  /**
@@ -64,6 +64,7 @@ export class BatchingApiGatewayArtifactWriter {
64
64
  // ---- ArtifactWriter surface --------------------------------------------
65
65
  async emit(type, association, payload) {
66
66
  const descriptor = ARTIFACT_REGISTRY[type];
67
+ assertWritePolicyMatches("pipeline", descriptor);
67
68
  const runId = association.run;
68
69
  if (!runId) {
69
70
  console.warn(` ⚠️ emit("${type}"): association.run is required, skipping`);
@@ -28,7 +28,7 @@
28
28
  * @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
29
29
  */
30
30
  import { Storage } from "@google-cloud/storage";
31
- import { ARTIFACT_REGISTRY, buildManifestPreview, } from "../_vendor/ailf-core/index.js";
31
+ import { ARTIFACT_REGISTRY, assertWritePolicyMatches, buildManifestPreview, } from "../_vendor/ailf-core/index.js";
32
32
  import { resolveUploadConcurrency } from "./parallel-emit.js";
33
33
  import { prepareUploadBody } from "./prepare-upload-body.js";
34
34
  import { redactArtifactData } from "./redact-artifact.js";
@@ -79,6 +79,7 @@ export class GcsArtifactWriter {
79
79
  // ---- Canonical W0049 API ------------------------------------------------
80
80
  async emit(type, association, payload) {
81
81
  const descriptor = ARTIFACT_REGISTRY[type];
82
+ assertWritePolicyMatches("pipeline", descriptor);
82
83
  const runId = association.run;
83
84
  if (!runId) {
84
85
  console.warn(` ⚠️ emit("${type}"): association.run is required, skipping`);
@@ -132,6 +133,7 @@ export class GcsArtifactWriter {
132
133
  }
133
134
  async appendNdjson(type, association, rows) {
134
135
  const descriptor = ARTIFACT_REGISTRY[type];
136
+ assertWritePolicyMatches("pipeline", descriptor);
135
137
  if (descriptor.mime !== "application/x-ndjson") {
136
138
  console.warn(` ⚠️ appendNdjson("${type}"): descriptor mime is ${descriptor.mime}, not application/x-ndjson — skipping`);
137
139
  return null;
@@ -38,7 +38,7 @@
38
38
  */
39
39
  import { promises as fs } from "node:fs";
40
40
  import path from "node:path";
41
- import { ARTIFACT_REGISTRY, buildManifestPreview, } from "../_vendor/ailf-core/index.js";
41
+ import { ARTIFACT_REGISTRY, assertWritePolicyMatches, buildManifestPreview, } from "../_vendor/ailf-core/index.js";
42
42
  import { redactArtifactData } from "./redact-artifact.js";
43
43
  // ---------------------------------------------------------------------------
44
44
  // Implementation
@@ -66,6 +66,7 @@ export class LocalFilesystemArtifactWriter {
66
66
  if (this.excludeSet.has(type))
67
67
  return null;
68
68
  const descriptor = ARTIFACT_REGISTRY[type];
69
+ assertWritePolicyMatches("pipeline", descriptor);
69
70
  const runId = association.run;
70
71
  if (!runId) {
71
72
  console.warn(` ⚠️ emit("${type}"): association.run is required, skipping`);
@@ -127,6 +128,7 @@ export class LocalFilesystemArtifactWriter {
127
128
  if (this.excludeSet.has(type))
128
129
  return null;
129
130
  const descriptor = ARTIFACT_REGISTRY[type];
131
+ assertWritePolicyMatches("pipeline", descriptor);
130
132
  if (descriptor.mime !== "application/x-ndjson") {
131
133
  console.warn(` ⚠️ appendNdjson("${type}"): descriptor mime is ${descriptor.mime}, not application/x-ndjson — skipping`);
132
134
  return null;
@@ -38,7 +38,7 @@ export function createCalculateScoresCommand() {
38
38
  remote: false,
39
39
  apiUrl: "https://ailf-api.sanity.build",
40
40
  });
41
- const result = calculateAndWriteScores({
41
+ const result = await calculateAndWriteScores({
42
42
  resultsPath,
43
43
  rootDir: ctx.config.rootDir,
44
44
  source: opts.source,
@@ -298,7 +298,7 @@ const EXPLAIN_REGISTRY = {
298
298
  ],
299
299
  },
300
300
  "lookup-doc": {
301
- description: "Search Sanity for documentation articles by keyword (find slugs for canonicalDocs)",
301
+ description: "Search Sanity for documentation articles by keyword (find slugs for contextDocs)",
302
302
  steps: [
303
303
  {
304
304
  cacheStatus: "miss",
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * lookup-doc command — search Sanity for documentation articles by keyword.
3
3
  *
4
- * Helps external contributors find the correct `slug` for canonicalDocs
4
+ * Helps external contributors find the correct `slug` for contextDocs
5
5
  * references without needing to browse the CMS or guess from URLs.
6
6
  *
7
7
  * Usage:
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * lookup-doc command — search Sanity for documentation articles by keyword.
3
3
  *
4
- * Helps external contributors find the correct `slug` for canonicalDocs
4
+ * Helps external contributors find the correct `slug` for contextDocs
5
5
  * references without needing to browse the CMS or guess from URLs.
6
6
  *
7
7
  * Usage:
@@ -14,7 +14,7 @@
14
14
  import { Command } from "commander";
15
15
  export function createLookupDocCommand() {
16
16
  return new Command("lookup-doc")
17
- .description("Search Sanity docs by keyword — find slugs for canonicalDocs references")
17
+ .description("Search Sanity docs by keyword — find slugs for contextDocs references")
18
18
  .argument("<keyword>", "Search keyword (matches title and slug)")
19
19
  .option("-l, --limit <n>", "Maximum results to show", parseInt, 10)
20
20
  .option("-s, --source <name>", "Documentation source (from sources.yaml)")
@@ -73,7 +73,7 @@ export function createLookupDocCommand() {
73
73
  console.log(` ${"".padEnd(maxSlugLen + 6)} │ Section: ${section}\n`);
74
74
  }
75
75
  console.log(" Usage in .ailf/tasks/*.yaml:\n");
76
- console.log(" canonicalDocs:");
76
+ console.log(" contextDocs:");
77
77
  console.log(` - slug: ${results[0].slug}`);
78
78
  console.log(` reason: "${results[0].title}"`);
79
79
  if (results[0].sectionSlug) {
@@ -27,6 +27,12 @@ export interface ResolvedOptions {
27
27
  dryRun: boolean;
28
28
  gapAnalysisEnabled: boolean;
29
29
  graderReplications?: number;
30
+ /**
31
+ * Replications per borderline judgment for the GRAD-04 intra-grader
32
+ * consensus pass. Sourced from `.ailf/config.yaml`'s
33
+ * `execution.borderlineReplications`.
34
+ */
35
+ borderlineReplications?: number;
30
36
  /** Grader context policy from `.ailf/config.yaml` `grader.context` */
31
37
  graderContext?: "rubric-only" | "with-docs";
32
38
  headerArgs: string[];
@@ -248,6 +248,7 @@ export function computeResolvedOptions(opts) {
248
248
  // env var (where one exists) > .ailf/config.yaml > built-in default
249
249
  const concurrency = repoConfig?.execution?.concurrency;
250
250
  const graderReplications = repoConfig?.execution?.graderReplications;
251
+ const borderlineReplications = repoConfig?.execution?.borderlineReplications;
251
252
  const gapAnalysisEnabled = repoConfig?.execution?.gapAnalysis ?? true;
252
253
  // Grader context policy. Cascade: env var > .ailf/config.yaml > unset
253
254
  // (defaults to rubric-only at the EvalConfig boundary). The env var is the
@@ -291,6 +292,7 @@ export function computeResolvedOptions(opts) {
291
292
  dryRun: opts.dryRun,
292
293
  gapAnalysisEnabled,
293
294
  graderReplications,
295
+ borderlineReplications,
294
296
  graderContext,
295
297
  headerArgs,
296
298
  impactSummary,
@@ -142,6 +142,7 @@ function toConfigSlice(opts) {
142
142
  perspectiveOverride: opts.perspectiveOverride,
143
143
  graderContext: opts.graderContext,
144
144
  graderReplications: opts.graderReplications,
145
+ borderlineReplications: opts.borderlineReplications,
145
146
  gapAnalysisEnabled: opts.gapAnalysisEnabled,
146
147
  noRemoteCache: opts.noRemoteCache,
147
148
  // D0037 / W0069 caller envelope overrides — flags override env vars
@@ -15,7 +15,8 @@
15
15
  * @see packages/core/src/ports/context.ts — AppContext interface
16
16
  * @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
17
17
  */
18
- import { type AppContext, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssertionRegistration, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
18
+ import { type AppContext, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssertionRegistration, type LLMClient, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
19
+ import { type BorderlineConsensusOptions, type BorderlineConsensusResult } from "./pipeline/borderline-consensus-runner.js";
19
20
  import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource } from "./adapters/task-sources/index.js";
20
21
  /**
21
22
  * Create a fully wired AppContext from resolved configuration.
@@ -24,6 +25,28 @@ import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource } from "./ad
24
25
  * Swapping an adapter is a one-line change in this function.
25
26
  */
26
27
  export declare function createAppContext(config: ResolvedConfig): AppContext;
28
+ /**
29
+ * Typed key bag passed to `createLLMClient`. The composition root reads
30
+ * env once and supplies values here; the factory stays pure so tests don't
31
+ * have to mutate `process.env`.
32
+ */
33
+ export interface LLMClientKeys {
34
+ anthropicApiKey?: string;
35
+ openaiApiKey?: string;
36
+ }
37
+ /**
38
+ * Select the LLMClient adapter based on `config.llmProvider` and the
39
+ * supplied API keys. Returns `undefined` when no usable credential is
40
+ * present — `AppContext.llmClient` stays unset and consumers handle that
41
+ * explicitly.
42
+ *
43
+ * Adapters never read `process.env` themselves (per
44
+ * `.claude/rules/typescript.md`); env mapping happens at the call site
45
+ * (typically `createAppContext`).
46
+ *
47
+ * Exported for unit-test access; not part of the public package API.
48
+ */
49
+ export declare function createLLMClient(config: ResolvedConfig, keys: LLMClientKeys, logger: Logger): LLMClient | undefined;
27
50
  /**
28
51
  * Selects the `ArtifactWriter` wiring per D0033 M4:
29
52
  *
@@ -61,3 +84,38 @@ export declare function createTaskSource(config: ResolvedConfig): CompositeTaskS
61
84
  * explicit mode whitelists.
62
85
  */
63
86
  export declare const FRAMEWORK_ASSERTIONS: AssertionRegistration[];
87
+ /**
88
+ * Severity boundaries from `packages/eval/config/thresholds.ts`
89
+ * (severity.critical/warning/info `composite-below` at L50/54/58 — 30, 50,
90
+ * 60). The borderline detector flags a judgment when its score is within
91
+ * ±5 of any of these. Composition-root reads them ONCE and threads the
92
+ * typed `readonly number[]` into `runBorderlineConsensus` rather than
93
+ * re-deriving them at each call site (Pitfall 5 — single source of truth
94
+ * for the scale).
95
+ */
96
+ export declare const BORDERLINE_SEVERITY_THRESHOLDS: readonly number[];
97
+ /**
98
+ * Default replications per borderline judgment when the caller's
99
+ * `RepoConfig.execution.borderlineReplications` is unset (locked answer
100
+ * #4 in plan 03-04). Three replications + the original score = four
101
+ * scores per consistency record, which is the minimum that produces a
102
+ * non-degenerate stdDev / median split.
103
+ */
104
+ export declare const DEFAULT_BORDERLINE_REPLICATIONS = 3;
105
+ /**
106
+ * Factory for the borderline-consensus runner. Returns a function that
107
+ * applies the severity-threshold and replication defaults from
108
+ * composition-root, leaving the live grader entry point (the `regrade`
109
+ * callback) and the candidate `judgments` array as runtime inputs.
110
+ *
111
+ * The pipeline-side caller (currently `pipeline/calculate-scores.ts`'s
112
+ * post-extraction junction) supplies the `regrade` callback that maps a
113
+ * `GraderJudgment` to a fresh score via the response/rubric text from
114
+ * the original Promptfoo result. See the runner's header for the
115
+ * rationale on injecting the regrader rather than calling `gradeOnce`
116
+ * inline (Pitfall 6 — preserve the runner's purity wrt the existing
117
+ * grader-comparison split).
118
+ */
119
+ export declare function createBorderlineConsensusRunner(opts: {
120
+ borderlineReplications?: number;
121
+ }): (args: Pick<BorderlineConsensusOptions, "judgments" | "logger" | "regrade">) => Promise<BorderlineConsensusResult>;