@sanity/ailf 4.5.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/agent-harness-tools.yaml +42 -0
- package/canonical/grader-references/knowledge-probe-recall.yaml +36 -0
- package/canonical/grader-references/mcp-server-spec.yaml +51 -0
- package/canonical/grader-references/portable-text.yaml +48 -0
- package/config/rubrics.ts +38 -2
- package/dist/_vendor/ailf-core/artifact-registry.d.ts +197 -2
- package/dist/_vendor/ailf-core/artifact-registry.js +419 -5
- package/dist/_vendor/ailf-core/examples/index.d.ts +125 -26
- package/dist/_vendor/ailf-core/examples/index.js +146 -47
- package/dist/_vendor/ailf-core/ports/context.d.ts +26 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/llm-client.d.ts +112 -0
- package/dist/_vendor/ailf-core/ports/llm-client.js +68 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +15 -0
- package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +40 -0
- package/dist/_vendor/ailf-core/schemas/branded-string.js +45 -0
- package/dist/_vendor/ailf-core/schemas/confidence-schema.d.ts +36 -0
- package/dist/_vendor/ailf-core/schemas/confidence-schema.js +32 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -4
- package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/index.js +9 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +34 -8
- package/dist/_vendor/ailf-core/schemas/pipeline.js +23 -1
- package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +40 -0
- package/dist/_vendor/ailf-core/services/diagnosis/registry.js +25 -0
- package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +19 -0
- package/dist/_vendor/ailf-core/services/diagnosis-runner.js +19 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/services/index.js +5 -0
- package/dist/_vendor/ailf-core/services/report-to-markdown.js +3 -2
- package/dist/_vendor/ailf-core/types/attribution.d.ts +82 -0
- package/dist/_vendor/ailf-core/types/attribution.js +18 -0
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +26 -1
- package/dist/_vendor/ailf-core/types/branded-ids.js +80 -4
- package/dist/_vendor/ailf-core/types/confidence.d.ts +68 -0
- package/dist/_vendor/ailf-core/types/confidence.js +56 -0
- package/dist/_vendor/ailf-core/types/diagnosis.d.ts +169 -0
- package/dist/_vendor/ailf-core/types/diagnosis.js +17 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +16 -1
- package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +125 -0
- package/dist/_vendor/ailf-core/types/grader-judgment.js +30 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +82 -29
- package/dist/_vendor/ailf-core/types/index.js +16 -1
- package/dist/_vendor/ailf-core/types/legacy-grader-judgment.d.ts +55 -0
- package/dist/_vendor/ailf-core/types/legacy-grader-judgment.js +30 -0
- package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/types/repo-config.d.ts +8 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +1 -1
- package/dist/adapters/api-client/build-request.d.ts +1 -0
- package/dist/adapters/api-client/build-request.js +3 -0
- package/dist/adapters/attribution/attribution-meta-writer.d.ts +35 -0
- package/dist/adapters/attribution/attribution-meta-writer.js +34 -0
- package/dist/adapters/attribution/index.d.ts +9 -0
- package/dist/adapters/attribution/index.js +8 -0
- package/dist/adapters/attribution/per-entry-attribution-writer.d.ts +56 -0
- package/dist/adapters/attribution/per-entry-attribution-writer.js +49 -0
- package/dist/adapters/config-sources/file-config-adapter.js +1 -0
- package/dist/adapters/grader-outputs/index.d.ts +10 -0
- package/dist/adapters/grader-outputs/index.js +8 -0
- package/dist/adapters/grader-outputs/legacy/index.d.ts +11 -0
- package/dist/adapters/grader-outputs/legacy/index.js +10 -0
- package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.d.ts +49 -0
- package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.js +48 -0
- package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +102 -0
- package/dist/adapters/grader-outputs/promptfoo-grader-output.js +93 -0
- package/dist/adapters/index.d.ts +3 -0
- package/dist/adapters/index.js +4 -0
- package/dist/adapters/llm/anthropic-llm-client.d.ts +48 -0
- package/dist/adapters/llm/anthropic-llm-client.js +205 -0
- package/dist/adapters/llm/fake-llm-client.d.ts +49 -0
- package/dist/adapters/llm/fake-llm-client.js +63 -0
- package/dist/adapters/llm/index.d.ts +9 -0
- package/dist/adapters/llm/index.js +4 -0
- package/dist/adapters/llm/openai-llm-client.d.ts +44 -0
- package/dist/adapters/llm/openai-llm-client.js +168 -0
- package/dist/adapters/llm/pricing.d.ts +12 -0
- package/dist/adapters/llm/pricing.js +8 -0
- package/dist/adapters/llm/retry.d.ts +56 -0
- package/dist/adapters/llm/retry.js +66 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +5 -1
- package/dist/adapters/task-sources/content-lake-task-source.js +28 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +90 -22
- package/dist/adapters/task-sources/repo-schemas.js +19 -2
- package/dist/artifact-capture/api-gateway-artifact-writer.js +2 -1
- package/dist/artifact-capture/batching-api-gateway-artifact-writer.js +2 -1
- package/dist/artifact-capture/gcs-artifact-writer.js +3 -1
- package/dist/artifact-capture/local-fs-artifact-writer.js +3 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/explain-handler.js +1 -1
- package/dist/commands/lookup-doc.d.ts +1 -1
- package/dist/commands/lookup-doc.js +3 -3
- package/dist/commands/pipeline-action.d.ts +6 -0
- package/dist/commands/pipeline-action.js +2 -0
- package/dist/commands/remote-pipeline.js +1 -0
- package/dist/composition-root.d.ts +59 -1
- package/dist/composition-root.js +95 -0
- package/dist/config/rubrics.ts +38 -2
- package/dist/grader/agent-harness.d.ts +14 -0
- package/dist/grader/agent-harness.js +17 -0
- package/dist/grader/common.d.ts +17 -0
- package/dist/grader/common.js +21 -0
- package/dist/grader/index.d.ts +38 -0
- package/dist/grader/index.js +75 -0
- package/dist/grader/knowledge-probe.d.ts +14 -0
- package/dist/grader/knowledge-probe.js +18 -0
- package/dist/grader/literacy.d.ts +13 -0
- package/dist/grader/literacy.js +17 -0
- package/dist/grader/mcp.d.ts +14 -0
- package/dist/grader/mcp.js +18 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +5 -0
- package/dist/orchestration/steps/calculate-scores-step.js +23 -1
- package/dist/orchestration/steps/compute-attribution-step.d.ts +44 -0
- package/dist/orchestration/steps/compute-attribution-step.js +279 -0
- package/dist/orchestration/steps/gap-analysis-step.js +35 -7
- package/dist/orchestration/steps/index.d.ts +1 -0
- package/dist/orchestration/steps/index.js +1 -0
- package/dist/pipeline/attribution.d.ts +15 -0
- package/dist/pipeline/attribution.js +18 -9
- package/dist/pipeline/borderline-consensus-runner.d.ts +63 -0
- package/dist/pipeline/borderline-consensus-runner.js +124 -0
- package/dist/pipeline/borderline-detector.d.ts +24 -0
- package/dist/pipeline/borderline-detector.js +26 -0
- package/dist/pipeline/calculate-scores.d.ts +114 -3
- package/dist/pipeline/calculate-scores.js +426 -24
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +35 -17
- package/dist/pipeline/compiler/rubric-resolution.d.ts +15 -0
- package/dist/pipeline/compiler/rubric-resolution.js +9 -1
- package/dist/pipeline/compute-attribution.d.ts +80 -0
- package/dist/pipeline/compute-attribution.js +196 -0
- package/dist/pipeline/failure-modes.d.ts +52 -17
- package/dist/pipeline/failure-modes.js +178 -117
- package/dist/pipeline/map-request-to-config.js +1 -0
- package/package.json +6 -4
|
@@ -32,6 +32,40 @@ export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
|
|
|
32
32
|
*/
|
|
33
33
|
export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage", "mcp-input-validation", "mcp-output-correctness", "mcp-error-handling", "mcp-security", "factual-correctness", "completeness", "currency", "process-quality", "agent-output", "agent-tool-usage"];
|
|
34
34
|
export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
|
|
35
|
+
/**
|
|
36
|
+
* A single criterion within an llm-rubric assertion. Stable id-text pair.
|
|
37
|
+
*/
|
|
38
|
+
export declare const CriterionRefSchema: z.ZodObject<{
|
|
39
|
+
id: z.ZodString;
|
|
40
|
+
text: z.ZodString;
|
|
41
|
+
}, z.core.$strip>;
|
|
42
|
+
/**
|
|
43
|
+
* A templated LLM-rubric assertion — uses one of the predefined rubric
|
|
44
|
+
* templates with author-supplied criteria.
|
|
45
|
+
*/
|
|
46
|
+
export declare const TemplatedAssertionSchema: z.ZodObject<{
|
|
47
|
+
type: z.ZodLiteral<"llm-rubric">;
|
|
48
|
+
template: z.ZodEnum<{
|
|
49
|
+
"task-completion": "task-completion";
|
|
50
|
+
"code-correctness": "code-correctness";
|
|
51
|
+
"doc-coverage": "doc-coverage";
|
|
52
|
+
"mcp-input-validation": "mcp-input-validation";
|
|
53
|
+
"mcp-output-correctness": "mcp-output-correctness";
|
|
54
|
+
"mcp-error-handling": "mcp-error-handling";
|
|
55
|
+
"mcp-security": "mcp-security";
|
|
56
|
+
"factual-correctness": "factual-correctness";
|
|
57
|
+
completeness: "completeness";
|
|
58
|
+
currency: "currency";
|
|
59
|
+
"process-quality": "process-quality";
|
|
60
|
+
"agent-output": "agent-output";
|
|
61
|
+
"agent-tool-usage": "agent-tool-usage";
|
|
62
|
+
}>;
|
|
63
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
64
|
+
id: z.ZodString;
|
|
65
|
+
text: z.ZodString;
|
|
66
|
+
}, z.core.$strip>>;
|
|
67
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
68
|
+
}, z.core.$strip>;
|
|
35
69
|
/**
|
|
36
70
|
* Zod schema for a single task definition — a mode-discriminated union
|
|
37
71
|
* mirroring `GeneralizedTaskDefinition`.
|
|
@@ -73,10 +107,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
73
107
|
"agent-output": "agent-output";
|
|
74
108
|
"agent-tool-usage": "agent-tool-usage";
|
|
75
109
|
}>;
|
|
76
|
-
criteria: z.ZodArray<z.
|
|
110
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
111
|
+
id: z.ZodString;
|
|
112
|
+
text: z.ZodString;
|
|
113
|
+
}, z.core.$strip>>;
|
|
77
114
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
78
115
|
}, z.core.$strip>, z.ZodObject<{
|
|
79
116
|
type: z.ZodEnum<{
|
|
117
|
+
cost: "cost";
|
|
80
118
|
"llm-rubric": "llm-rubric";
|
|
81
119
|
contains: "contains";
|
|
82
120
|
"contains-any": "contains-any";
|
|
@@ -87,7 +125,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
87
125
|
regex: "regex";
|
|
88
126
|
javascript: "javascript";
|
|
89
127
|
similar: "similar";
|
|
90
|
-
cost: "cost";
|
|
91
128
|
latency: "latency";
|
|
92
129
|
"file-exists": "file-exists";
|
|
93
130
|
"file-contains": "file-contains";
|
|
@@ -187,10 +224,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
187
224
|
"agent-output": "agent-output";
|
|
188
225
|
"agent-tool-usage": "agent-tool-usage";
|
|
189
226
|
}>;
|
|
190
|
-
criteria: z.ZodArray<z.
|
|
227
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
228
|
+
id: z.ZodString;
|
|
229
|
+
text: z.ZodString;
|
|
230
|
+
}, z.core.$strip>>;
|
|
191
231
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
192
232
|
}, z.core.$strip>, z.ZodObject<{
|
|
193
233
|
type: z.ZodEnum<{
|
|
234
|
+
cost: "cost";
|
|
194
235
|
"llm-rubric": "llm-rubric";
|
|
195
236
|
contains: "contains";
|
|
196
237
|
"contains-any": "contains-any";
|
|
@@ -201,7 +242,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
201
242
|
regex: "regex";
|
|
202
243
|
javascript: "javascript";
|
|
203
244
|
similar: "similar";
|
|
204
|
-
cost: "cost";
|
|
205
245
|
latency: "latency";
|
|
206
246
|
"file-exists": "file-exists";
|
|
207
247
|
"file-contains": "file-contains";
|
|
@@ -341,10 +381,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
341
381
|
"agent-output": "agent-output";
|
|
342
382
|
"agent-tool-usage": "agent-tool-usage";
|
|
343
383
|
}>;
|
|
344
|
-
criteria: z.ZodArray<z.
|
|
384
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
385
|
+
id: z.ZodString;
|
|
386
|
+
text: z.ZodString;
|
|
387
|
+
}, z.core.$strip>>;
|
|
345
388
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
346
389
|
}, z.core.$strip>, z.ZodObject<{
|
|
347
390
|
type: z.ZodEnum<{
|
|
391
|
+
cost: "cost";
|
|
348
392
|
"llm-rubric": "llm-rubric";
|
|
349
393
|
contains: "contains";
|
|
350
394
|
"contains-any": "contains-any";
|
|
@@ -355,7 +399,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
355
399
|
regex: "regex";
|
|
356
400
|
javascript: "javascript";
|
|
357
401
|
similar: "similar";
|
|
358
|
-
cost: "cost";
|
|
359
402
|
latency: "latency";
|
|
360
403
|
"file-exists": "file-exists";
|
|
361
404
|
"file-contains": "file-contains";
|
|
@@ -472,10 +515,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
472
515
|
"agent-output": "agent-output";
|
|
473
516
|
"agent-tool-usage": "agent-tool-usage";
|
|
474
517
|
}>;
|
|
475
|
-
criteria: z.ZodArray<z.
|
|
518
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
519
|
+
id: z.ZodString;
|
|
520
|
+
text: z.ZodString;
|
|
521
|
+
}, z.core.$strip>>;
|
|
476
522
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
477
523
|
}, z.core.$strip>, z.ZodObject<{
|
|
478
524
|
type: z.ZodEnum<{
|
|
525
|
+
cost: "cost";
|
|
479
526
|
"llm-rubric": "llm-rubric";
|
|
480
527
|
contains: "contains";
|
|
481
528
|
"contains-any": "contains-any";
|
|
@@ -486,7 +533,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
486
533
|
regex: "regex";
|
|
487
534
|
javascript: "javascript";
|
|
488
535
|
similar: "similar";
|
|
489
|
-
cost: "cost";
|
|
490
536
|
latency: "latency";
|
|
491
537
|
"file-exists": "file-exists";
|
|
492
538
|
"file-contains": "file-contains";
|
|
@@ -591,10 +637,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
591
637
|
"agent-output": "agent-output";
|
|
592
638
|
"agent-tool-usage": "agent-tool-usage";
|
|
593
639
|
}>;
|
|
594
|
-
criteria: z.ZodArray<z.
|
|
640
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
641
|
+
id: z.ZodString;
|
|
642
|
+
text: z.ZodString;
|
|
643
|
+
}, z.core.$strip>>;
|
|
595
644
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
596
645
|
}, z.core.$strip>, z.ZodObject<{
|
|
597
646
|
type: z.ZodEnum<{
|
|
647
|
+
cost: "cost";
|
|
598
648
|
"llm-rubric": "llm-rubric";
|
|
599
649
|
contains: "contains";
|
|
600
650
|
"contains-any": "contains-any";
|
|
@@ -605,7 +655,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
605
655
|
regex: "regex";
|
|
606
656
|
javascript: "javascript";
|
|
607
657
|
similar: "similar";
|
|
608
|
-
cost: "cost";
|
|
609
658
|
latency: "latency";
|
|
610
659
|
"file-exists": "file-exists";
|
|
611
660
|
"file-contains": "file-contains";
|
|
@@ -699,10 +748,14 @@ export declare const ContentLakeAuthorableTaskSchema: z.ZodObject<{
|
|
|
699
748
|
"agent-output": "agent-output";
|
|
700
749
|
"agent-tool-usage": "agent-tool-usage";
|
|
701
750
|
}>;
|
|
702
|
-
criteria: z.ZodArray<z.
|
|
751
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
752
|
+
id: z.ZodString;
|
|
753
|
+
text: z.ZodString;
|
|
754
|
+
}, z.core.$strip>>;
|
|
703
755
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
704
756
|
}, z.core.$strip>, z.ZodObject<{
|
|
705
757
|
type: z.ZodEnum<{
|
|
758
|
+
cost: "cost";
|
|
706
759
|
"llm-rubric": "llm-rubric";
|
|
707
760
|
contains: "contains";
|
|
708
761
|
"contains-any": "contains-any";
|
|
@@ -713,7 +766,6 @@ export declare const ContentLakeAuthorableTaskSchema: z.ZodObject<{
|
|
|
713
766
|
regex: "regex";
|
|
714
767
|
javascript: "javascript";
|
|
715
768
|
similar: "similar";
|
|
716
|
-
cost: "cost";
|
|
717
769
|
latency: "latency";
|
|
718
770
|
"file-exists": "file-exists";
|
|
719
771
|
"file-contains": "file-contains";
|
|
@@ -819,10 +871,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
819
871
|
"agent-output": "agent-output";
|
|
820
872
|
"agent-tool-usage": "agent-tool-usage";
|
|
821
873
|
}>;
|
|
822
|
-
criteria: z.ZodArray<z.
|
|
874
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
875
|
+
id: z.ZodString;
|
|
876
|
+
text: z.ZodString;
|
|
877
|
+
}, z.core.$strip>>;
|
|
823
878
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
824
879
|
}, z.core.$strip>, z.ZodObject<{
|
|
825
880
|
type: z.ZodEnum<{
|
|
881
|
+
cost: "cost";
|
|
826
882
|
"llm-rubric": "llm-rubric";
|
|
827
883
|
contains: "contains";
|
|
828
884
|
"contains-any": "contains-any";
|
|
@@ -833,7 +889,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
833
889
|
regex: "regex";
|
|
834
890
|
javascript: "javascript";
|
|
835
891
|
similar: "similar";
|
|
836
|
-
cost: "cost";
|
|
837
892
|
latency: "latency";
|
|
838
893
|
"file-exists": "file-exists";
|
|
839
894
|
"file-contains": "file-contains";
|
|
@@ -933,10 +988,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
933
988
|
"agent-output": "agent-output";
|
|
934
989
|
"agent-tool-usage": "agent-tool-usage";
|
|
935
990
|
}>;
|
|
936
|
-
criteria: z.ZodArray<z.
|
|
991
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
992
|
+
id: z.ZodString;
|
|
993
|
+
text: z.ZodString;
|
|
994
|
+
}, z.core.$strip>>;
|
|
937
995
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
938
996
|
}, z.core.$strip>, z.ZodObject<{
|
|
939
997
|
type: z.ZodEnum<{
|
|
998
|
+
cost: "cost";
|
|
940
999
|
"llm-rubric": "llm-rubric";
|
|
941
1000
|
contains: "contains";
|
|
942
1001
|
"contains-any": "contains-any";
|
|
@@ -947,7 +1006,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
947
1006
|
regex: "regex";
|
|
948
1007
|
javascript: "javascript";
|
|
949
1008
|
similar: "similar";
|
|
950
|
-
cost: "cost";
|
|
951
1009
|
latency: "latency";
|
|
952
1010
|
"file-exists": "file-exists";
|
|
953
1011
|
"file-contains": "file-contains";
|
|
@@ -1087,10 +1145,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
1087
1145
|
"agent-output": "agent-output";
|
|
1088
1146
|
"agent-tool-usage": "agent-tool-usage";
|
|
1089
1147
|
}>;
|
|
1090
|
-
criteria: z.ZodArray<z.
|
|
1148
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
1149
|
+
id: z.ZodString;
|
|
1150
|
+
text: z.ZodString;
|
|
1151
|
+
}, z.core.$strip>>;
|
|
1091
1152
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
1092
1153
|
}, z.core.$strip>, z.ZodObject<{
|
|
1093
1154
|
type: z.ZodEnum<{
|
|
1155
|
+
cost: "cost";
|
|
1094
1156
|
"llm-rubric": "llm-rubric";
|
|
1095
1157
|
contains: "contains";
|
|
1096
1158
|
"contains-any": "contains-any";
|
|
@@ -1101,7 +1163,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
1101
1163
|
regex: "regex";
|
|
1102
1164
|
javascript: "javascript";
|
|
1103
1165
|
similar: "similar";
|
|
1104
|
-
cost: "cost";
|
|
1105
1166
|
latency: "latency";
|
|
1106
1167
|
"file-exists": "file-exists";
|
|
1107
1168
|
"file-contains": "file-contains";
|
|
@@ -1218,10 +1279,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
1218
1279
|
"agent-output": "agent-output";
|
|
1219
1280
|
"agent-tool-usage": "agent-tool-usage";
|
|
1220
1281
|
}>;
|
|
1221
|
-
criteria: z.ZodArray<z.
|
|
1282
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
1283
|
+
id: z.ZodString;
|
|
1284
|
+
text: z.ZodString;
|
|
1285
|
+
}, z.core.$strip>>;
|
|
1222
1286
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
1223
1287
|
}, z.core.$strip>, z.ZodObject<{
|
|
1224
1288
|
type: z.ZodEnum<{
|
|
1289
|
+
cost: "cost";
|
|
1225
1290
|
"llm-rubric": "llm-rubric";
|
|
1226
1291
|
contains: "contains";
|
|
1227
1292
|
"contains-any": "contains-any";
|
|
@@ -1232,7 +1297,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
1232
1297
|
regex: "regex";
|
|
1233
1298
|
javascript: "javascript";
|
|
1234
1299
|
similar: "similar";
|
|
1235
|
-
cost: "cost";
|
|
1236
1300
|
latency: "latency";
|
|
1237
1301
|
"file-exists": "file-exists";
|
|
1238
1302
|
"file-contains": "file-contains";
|
|
@@ -1337,10 +1401,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
1337
1401
|
"agent-output": "agent-output";
|
|
1338
1402
|
"agent-tool-usage": "agent-tool-usage";
|
|
1339
1403
|
}>;
|
|
1340
|
-
criteria: z.ZodArray<z.
|
|
1404
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
1405
|
+
id: z.ZodString;
|
|
1406
|
+
text: z.ZodString;
|
|
1407
|
+
}, z.core.$strip>>;
|
|
1341
1408
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
1342
1409
|
}, z.core.$strip>, z.ZodObject<{
|
|
1343
1410
|
type: z.ZodEnum<{
|
|
1411
|
+
cost: "cost";
|
|
1344
1412
|
"llm-rubric": "llm-rubric";
|
|
1345
1413
|
contains: "contains";
|
|
1346
1414
|
"contains-any": "contains-any";
|
|
@@ -1351,7 +1419,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
1351
1419
|
regex: "regex";
|
|
1352
1420
|
javascript: "javascript";
|
|
1353
1421
|
similar: "similar";
|
|
1354
|
-
cost: "cost";
|
|
1355
1422
|
latency: "latency";
|
|
1356
1423
|
"file-exists": "file-exists";
|
|
1357
1424
|
"file-contains": "file-contains";
|
|
@@ -1468,6 +1535,7 @@ export declare const RepoConfigSchema: z.ZodObject<{
|
|
|
1468
1535
|
execution: z.ZodOptional<z.ZodObject<{
|
|
1469
1536
|
concurrency: z.ZodOptional<z.ZodNumber>;
|
|
1470
1537
|
graderReplications: z.ZodOptional<z.ZodNumber>;
|
|
1538
|
+
borderlineReplications: z.ZodOptional<z.ZodNumber>;
|
|
1471
1539
|
gapAnalysis: z.ZodOptional<z.ZodBoolean>;
|
|
1472
1540
|
apiUrl: z.ZodOptional<z.ZodString>;
|
|
1473
1541
|
}, z.core.$strip>>;
|
|
@@ -111,14 +111,26 @@ const CanonicalDocRefSchema = z.union([
|
|
|
111
111
|
// ---------------------------------------------------------------------------
|
|
112
112
|
// Assertion schemas
|
|
113
113
|
// ---------------------------------------------------------------------------
|
|
114
|
+
/**
|
|
115
|
+
* A single criterion within an llm-rubric assertion. Stable id-text pair.
|
|
116
|
+
*/
|
|
117
|
+
export const CriterionRefSchema = z.object({
|
|
118
|
+
id: z
|
|
119
|
+
.string()
|
|
120
|
+
.min(1)
|
|
121
|
+
.regex(/^[a-z0-9][a-z0-9-]*$/, {
|
|
122
|
+
message: "criterion id must be lowercase alphanumeric with hyphens",
|
|
123
|
+
}),
|
|
124
|
+
text: z.string().min(1),
|
|
125
|
+
});
|
|
114
126
|
/**
|
|
115
127
|
* A templated LLM-rubric assertion — uses one of the predefined rubric
|
|
116
128
|
* templates with author-supplied criteria.
|
|
117
129
|
*/
|
|
118
|
-
const TemplatedAssertionSchema = z.object({
|
|
130
|
+
export const TemplatedAssertionSchema = z.object({
|
|
119
131
|
type: z.literal("llm-rubric"),
|
|
120
132
|
template: z.enum(RUBRIC_TEMPLATE_NAMES),
|
|
121
|
-
criteria: z.array(
|
|
133
|
+
criteria: z.array(CriterionRefSchema).min(1),
|
|
122
134
|
weight: z.number().optional(),
|
|
123
135
|
});
|
|
124
136
|
/**
|
|
@@ -562,6 +574,11 @@ const ExecutionConfigSchema = z
|
|
|
562
574
|
.object({
|
|
563
575
|
concurrency: z.number().int().positive().optional(),
|
|
564
576
|
graderReplications: z.number().int().positive().optional(),
|
|
577
|
+
/**
|
|
578
|
+
* Plan 03-04 GRAD-04 — replications per borderline judgment.
|
|
579
|
+
* Default 3 (composition-root). Positive integer.
|
|
580
|
+
*/
|
|
581
|
+
borderlineReplications: z.number().int().positive().optional(),
|
|
565
582
|
gapAnalysis: z.boolean().optional(),
|
|
566
583
|
apiUrl: z.string().url().optional(),
|
|
567
584
|
})
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
* @see docs/decisions/D0032-run-anchored-artifact-store.md
|
|
28
28
|
* @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
|
|
29
29
|
*/
|
|
30
|
-
import { ARTIFACT_REGISTRY, NotImplementedError, } from "../_vendor/ailf-core/index.js";
|
|
30
|
+
import { ARTIFACT_REGISTRY, assertWritePolicyMatches, NotImplementedError, } from "../_vendor/ailf-core/index.js";
|
|
31
31
|
import { prepareUploadBody } from "./prepare-upload-body.js";
|
|
32
32
|
import { NO_OP_UPLOAD_METRICS, } from "./upload-metrics.js";
|
|
33
33
|
export class ApiGatewayArtifactWriter {
|
|
@@ -40,6 +40,7 @@ export class ApiGatewayArtifactWriter {
|
|
|
40
40
|
// ---- Canonical W0049 API ------------------------------------------------
|
|
41
41
|
async emit(type, association, payload) {
|
|
42
42
|
const descriptor = ARTIFACT_REGISTRY[type];
|
|
43
|
+
assertWritePolicyMatches("pipeline", descriptor);
|
|
43
44
|
const runId = association.run;
|
|
44
45
|
if (!runId) {
|
|
45
46
|
console.warn(` ⚠️ emit("${type}"): association.run is required, skipping`);
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
* does this writer. Traces flow through the GCS-direct writer when ADC
|
|
26
26
|
* credentials are present.
|
|
27
27
|
*/
|
|
28
|
-
import { ARTIFACT_REGISTRY, BULK_ENTRY_KEY, NotImplementedError, } from "../_vendor/ailf-core/index.js";
|
|
28
|
+
import { ARTIFACT_REGISTRY, assertWritePolicyMatches, BULK_ENTRY_KEY, NotImplementedError, } from "../_vendor/ailf-core/index.js";
|
|
29
29
|
import { prepareUploadBody } from "./prepare-upload-body.js";
|
|
30
30
|
import { NO_OP_UPLOAD_METRICS, } from "./upload-metrics.js";
|
|
31
31
|
/**
|
|
@@ -64,6 +64,7 @@ export class BatchingApiGatewayArtifactWriter {
|
|
|
64
64
|
// ---- ArtifactWriter surface --------------------------------------------
|
|
65
65
|
async emit(type, association, payload) {
|
|
66
66
|
const descriptor = ARTIFACT_REGISTRY[type];
|
|
67
|
+
assertWritePolicyMatches("pipeline", descriptor);
|
|
67
68
|
const runId = association.run;
|
|
68
69
|
if (!runId) {
|
|
69
70
|
console.warn(` ⚠️ emit("${type}"): association.run is required, skipping`);
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
* @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
|
|
29
29
|
*/
|
|
30
30
|
import { Storage } from "@google-cloud/storage";
|
|
31
|
-
import { ARTIFACT_REGISTRY, buildManifestPreview, } from "../_vendor/ailf-core/index.js";
|
|
31
|
+
import { ARTIFACT_REGISTRY, assertWritePolicyMatches, buildManifestPreview, } from "../_vendor/ailf-core/index.js";
|
|
32
32
|
import { resolveUploadConcurrency } from "./parallel-emit.js";
|
|
33
33
|
import { prepareUploadBody } from "./prepare-upload-body.js";
|
|
34
34
|
import { redactArtifactData } from "./redact-artifact.js";
|
|
@@ -79,6 +79,7 @@ export class GcsArtifactWriter {
|
|
|
79
79
|
// ---- Canonical W0049 API ------------------------------------------------
|
|
80
80
|
async emit(type, association, payload) {
|
|
81
81
|
const descriptor = ARTIFACT_REGISTRY[type];
|
|
82
|
+
assertWritePolicyMatches("pipeline", descriptor);
|
|
82
83
|
const runId = association.run;
|
|
83
84
|
if (!runId) {
|
|
84
85
|
console.warn(` ⚠️ emit("${type}"): association.run is required, skipping`);
|
|
@@ -132,6 +133,7 @@ export class GcsArtifactWriter {
|
|
|
132
133
|
}
|
|
133
134
|
async appendNdjson(type, association, rows) {
|
|
134
135
|
const descriptor = ARTIFACT_REGISTRY[type];
|
|
136
|
+
assertWritePolicyMatches("pipeline", descriptor);
|
|
135
137
|
if (descriptor.mime !== "application/x-ndjson") {
|
|
136
138
|
console.warn(` ⚠️ appendNdjson("${type}"): descriptor mime is ${descriptor.mime}, not application/x-ndjson — skipping`);
|
|
137
139
|
return null;
|
|
@@ -38,7 +38,7 @@
|
|
|
38
38
|
*/
|
|
39
39
|
import { promises as fs } from "node:fs";
|
|
40
40
|
import path from "node:path";
|
|
41
|
-
import { ARTIFACT_REGISTRY, buildManifestPreview, } from "../_vendor/ailf-core/index.js";
|
|
41
|
+
import { ARTIFACT_REGISTRY, assertWritePolicyMatches, buildManifestPreview, } from "../_vendor/ailf-core/index.js";
|
|
42
42
|
import { redactArtifactData } from "./redact-artifact.js";
|
|
43
43
|
// ---------------------------------------------------------------------------
|
|
44
44
|
// Implementation
|
|
@@ -66,6 +66,7 @@ export class LocalFilesystemArtifactWriter {
|
|
|
66
66
|
if (this.excludeSet.has(type))
|
|
67
67
|
return null;
|
|
68
68
|
const descriptor = ARTIFACT_REGISTRY[type];
|
|
69
|
+
assertWritePolicyMatches("pipeline", descriptor);
|
|
69
70
|
const runId = association.run;
|
|
70
71
|
if (!runId) {
|
|
71
72
|
console.warn(` ⚠️ emit("${type}"): association.run is required, skipping`);
|
|
@@ -127,6 +128,7 @@ export class LocalFilesystemArtifactWriter {
|
|
|
127
128
|
if (this.excludeSet.has(type))
|
|
128
129
|
return null;
|
|
129
130
|
const descriptor = ARTIFACT_REGISTRY[type];
|
|
131
|
+
assertWritePolicyMatches("pipeline", descriptor);
|
|
130
132
|
if (descriptor.mime !== "application/x-ndjson") {
|
|
131
133
|
console.warn(` ⚠️ appendNdjson("${type}"): descriptor mime is ${descriptor.mime}, not application/x-ndjson — skipping`);
|
|
132
134
|
return null;
|
|
@@ -38,7 +38,7 @@ export function createCalculateScoresCommand() {
|
|
|
38
38
|
remote: false,
|
|
39
39
|
apiUrl: "https://ailf-api.sanity.build",
|
|
40
40
|
});
|
|
41
|
-
const result = calculateAndWriteScores({
|
|
41
|
+
const result = await calculateAndWriteScores({
|
|
42
42
|
resultsPath,
|
|
43
43
|
rootDir: ctx.config.rootDir,
|
|
44
44
|
source: opts.source,
|
|
@@ -298,7 +298,7 @@ const EXPLAIN_REGISTRY = {
|
|
|
298
298
|
],
|
|
299
299
|
},
|
|
300
300
|
"lookup-doc": {
|
|
301
|
-
description: "Search Sanity for documentation articles by keyword (find slugs for
|
|
301
|
+
description: "Search Sanity for documentation articles by keyword (find slugs for contextDocs)",
|
|
302
302
|
steps: [
|
|
303
303
|
{
|
|
304
304
|
cacheStatus: "miss",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* lookup-doc command — search Sanity for documentation articles by keyword.
|
|
3
3
|
*
|
|
4
|
-
* Helps external contributors find the correct `slug` for
|
|
4
|
+
* Helps external contributors find the correct `slug` for contextDocs
|
|
5
5
|
* references without needing to browse the CMS or guess from URLs.
|
|
6
6
|
*
|
|
7
7
|
* Usage:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* lookup-doc command — search Sanity for documentation articles by keyword.
|
|
3
3
|
*
|
|
4
|
-
* Helps external contributors find the correct `slug` for
|
|
4
|
+
* Helps external contributors find the correct `slug` for contextDocs
|
|
5
5
|
* references without needing to browse the CMS or guess from URLs.
|
|
6
6
|
*
|
|
7
7
|
* Usage:
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
import { Command } from "commander";
|
|
15
15
|
export function createLookupDocCommand() {
|
|
16
16
|
return new Command("lookup-doc")
|
|
17
|
-
.description("Search Sanity docs by keyword — find slugs for
|
|
17
|
+
.description("Search Sanity docs by keyword — find slugs for contextDocs references")
|
|
18
18
|
.argument("<keyword>", "Search keyword (matches title and slug)")
|
|
19
19
|
.option("-l, --limit <n>", "Maximum results to show", parseInt, 10)
|
|
20
20
|
.option("-s, --source <name>", "Documentation source (from sources.yaml)")
|
|
@@ -73,7 +73,7 @@ export function createLookupDocCommand() {
|
|
|
73
73
|
console.log(` ${"".padEnd(maxSlugLen + 6)} │ Section: ${section}\n`);
|
|
74
74
|
}
|
|
75
75
|
console.log(" Usage in .ailf/tasks/*.yaml:\n");
|
|
76
|
-
console.log("
|
|
76
|
+
console.log(" contextDocs:");
|
|
77
77
|
console.log(` - slug: ${results[0].slug}`);
|
|
78
78
|
console.log(` reason: "${results[0].title}"`);
|
|
79
79
|
if (results[0].sectionSlug) {
|
|
@@ -27,6 +27,12 @@ export interface ResolvedOptions {
|
|
|
27
27
|
dryRun: boolean;
|
|
28
28
|
gapAnalysisEnabled: boolean;
|
|
29
29
|
graderReplications?: number;
|
|
30
|
+
/**
|
|
31
|
+
* Replications per borderline judgment for the GRAD-04 intra-grader
|
|
32
|
+
* consensus pass. Sourced from `.ailf/config.yaml`'s
|
|
33
|
+
* `execution.borderlineReplications`.
|
|
34
|
+
*/
|
|
35
|
+
borderlineReplications?: number;
|
|
30
36
|
/** Grader context policy from `.ailf/config.yaml` `grader.context` */
|
|
31
37
|
graderContext?: "rubric-only" | "with-docs";
|
|
32
38
|
headerArgs: string[];
|
|
@@ -248,6 +248,7 @@ export function computeResolvedOptions(opts) {
|
|
|
248
248
|
// env var (where one exists) > .ailf/config.yaml > built-in default
|
|
249
249
|
const concurrency = repoConfig?.execution?.concurrency;
|
|
250
250
|
const graderReplications = repoConfig?.execution?.graderReplications;
|
|
251
|
+
const borderlineReplications = repoConfig?.execution?.borderlineReplications;
|
|
251
252
|
const gapAnalysisEnabled = repoConfig?.execution?.gapAnalysis ?? true;
|
|
252
253
|
// Grader context policy. Cascade: env var > .ailf/config.yaml > unset
|
|
253
254
|
// (defaults to rubric-only at the EvalConfig boundary). The env var is the
|
|
@@ -291,6 +292,7 @@ export function computeResolvedOptions(opts) {
|
|
|
291
292
|
dryRun: opts.dryRun,
|
|
292
293
|
gapAnalysisEnabled,
|
|
293
294
|
graderReplications,
|
|
295
|
+
borderlineReplications,
|
|
294
296
|
graderContext,
|
|
295
297
|
headerArgs,
|
|
296
298
|
impactSummary,
|
|
@@ -142,6 +142,7 @@ function toConfigSlice(opts) {
|
|
|
142
142
|
perspectiveOverride: opts.perspectiveOverride,
|
|
143
143
|
graderContext: opts.graderContext,
|
|
144
144
|
graderReplications: opts.graderReplications,
|
|
145
|
+
borderlineReplications: opts.borderlineReplications,
|
|
145
146
|
gapAnalysisEnabled: opts.gapAnalysisEnabled,
|
|
146
147
|
noRemoteCache: opts.noRemoteCache,
|
|
147
148
|
// D0037 / W0069 caller envelope overrides — flags override env vars
|
|
@@ -15,7 +15,8 @@
|
|
|
15
15
|
* @see packages/core/src/ports/context.ts — AppContext interface
|
|
16
16
|
* @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
|
|
17
17
|
*/
|
|
18
|
-
import { type AppContext, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssertionRegistration, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
|
|
18
|
+
import { type AppContext, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssertionRegistration, type LLMClient, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
|
|
19
|
+
import { type BorderlineConsensusOptions, type BorderlineConsensusResult } from "./pipeline/borderline-consensus-runner.js";
|
|
19
20
|
import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource } from "./adapters/task-sources/index.js";
|
|
20
21
|
/**
|
|
21
22
|
* Create a fully wired AppContext from resolved configuration.
|
|
@@ -24,6 +25,28 @@ import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource } from "./ad
|
|
|
24
25
|
* Swapping an adapter is a one-line change in this function.
|
|
25
26
|
*/
|
|
26
27
|
export declare function createAppContext(config: ResolvedConfig): AppContext;
|
|
28
|
+
/**
|
|
29
|
+
* Typed key bag passed to `createLLMClient`. The composition root reads
|
|
30
|
+
* env once and supplies values here; the factory stays pure so tests don't
|
|
31
|
+
* have to mutate `process.env`.
|
|
32
|
+
*/
|
|
33
|
+
export interface LLMClientKeys {
|
|
34
|
+
anthropicApiKey?: string;
|
|
35
|
+
openaiApiKey?: string;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Select the LLMClient adapter based on `config.llmProvider` and the
|
|
39
|
+
* supplied API keys. Returns `undefined` when no usable credential is
|
|
40
|
+
* present — `AppContext.llmClient` stays unset and consumers handle that
|
|
41
|
+
* explicitly.
|
|
42
|
+
*
|
|
43
|
+
* Adapters never read `process.env` themselves (per
|
|
44
|
+
* `.claude/rules/typescript.md`); env mapping happens at the call site
|
|
45
|
+
* (typically `createAppContext`).
|
|
46
|
+
*
|
|
47
|
+
* Exported for unit-test access; not part of the public package API.
|
|
48
|
+
*/
|
|
49
|
+
export declare function createLLMClient(config: ResolvedConfig, keys: LLMClientKeys, logger: Logger): LLMClient | undefined;
|
|
27
50
|
/**
|
|
28
51
|
* Selects the `ArtifactWriter` wiring per D0033 M4:
|
|
29
52
|
*
|
|
@@ -61,3 +84,38 @@ export declare function createTaskSource(config: ResolvedConfig): CompositeTaskS
|
|
|
61
84
|
* explicit mode whitelists.
|
|
62
85
|
*/
|
|
63
86
|
export declare const FRAMEWORK_ASSERTIONS: AssertionRegistration[];
|
|
87
|
+
/**
|
|
88
|
+
* Severity boundaries from `packages/eval/config/thresholds.ts`
|
|
89
|
+
* (severity.critical/warning/info `composite-below` at L50/54/58 — 30, 50,
|
|
90
|
+
* 60). The borderline detector flags a judgment when its score is within
|
|
91
|
+
* ±5 of any of these. Composition-root reads them ONCE and threads the
|
|
92
|
+
* typed `readonly number[]` into `runBorderlineConsensus` rather than
|
|
93
|
+
* re-deriving them at each call site (Pitfall 5 — single source of truth
|
|
94
|
+
* for the scale).
|
|
95
|
+
*/
|
|
96
|
+
export declare const BORDERLINE_SEVERITY_THRESHOLDS: readonly number[];
|
|
97
|
+
/**
|
|
98
|
+
* Default replications per borderline judgment when the caller's
|
|
99
|
+
* `RepoConfig.execution.borderlineReplications` is unset (locked answer
|
|
100
|
+
* #4 in plan 03-04). Three replications + the original score = four
|
|
101
|
+
* scores per consistency record, which is the minimum that produces a
|
|
102
|
+
* non-degenerate stdDev / median split.
|
|
103
|
+
*/
|
|
104
|
+
export declare const DEFAULT_BORDERLINE_REPLICATIONS = 3;
|
|
105
|
+
/**
|
|
106
|
+
* Factory for the borderline-consensus runner. Returns a function that
|
|
107
|
+
* applies the severity-threshold and replication defaults from
|
|
108
|
+
* composition-root, leaving the live grader entry point (the `regrade`
|
|
109
|
+
* callback) and the candidate `judgments` array as runtime inputs.
|
|
110
|
+
*
|
|
111
|
+
* The pipeline-side caller (currently `pipeline/calculate-scores.ts`'s
|
|
112
|
+
* post-extraction junction) supplies the `regrade` callback that maps a
|
|
113
|
+
* `GraderJudgment` to a fresh score via the response/rubric text from
|
|
114
|
+
* the original Promptfoo result. See the runner's header for the
|
|
115
|
+
* rationale on injecting the regrader rather than calling `gradeOnce`
|
|
116
|
+
* inline (Pitfall 6 — preserve the runner's purity wrt the existing
|
|
117
|
+
* grader-comparison split).
|
|
118
|
+
*/
|
|
119
|
+
export declare function createBorderlineConsensusRunner(opts: {
|
|
120
|
+
borderlineReplications?: number;
|
|
121
|
+
}): (args: Pick<BorderlineConsensusOptions, "judgments" | "logger" | "regrade">) => Promise<BorderlineConsensusResult>;
|