@sanity/ailf 4.6.0 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/agent-harness-tools.yaml +42 -0
- package/canonical/grader-references/knowledge-probe-recall.yaml +36 -0
- package/canonical/grader-references/mcp-server-spec.yaml +51 -0
- package/canonical/grader-references/portable-text.yaml +48 -0
- package/config/diagnosis-cards.ts +318 -0
- package/config/models.ts +12 -0
- package/config/rubrics.ts +38 -2
- package/dist/_vendor/ailf-core/artifact-registry.d.ts +60 -2
- package/dist/_vendor/ailf-core/artifact-registry.js +288 -7
- package/dist/_vendor/ailf-core/examples/index.d.ts +125 -26
- package/dist/_vendor/ailf-core/examples/index.js +146 -47
- package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.d.ts +13 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.js +16 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/common.d.ts +14 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/common.js +18 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/index.d.ts +45 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/index.js +109 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.d.ts +13 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.js +17 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/literacy.d.ts +13 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/literacy.js +17 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/mcp.d.ts +13 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/mcp.js +17 -0
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +4 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +8 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +15 -0
- package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +40 -0
- package/dist/_vendor/ailf-core/schemas/branded-string.js +45 -0
- package/dist/_vendor/ailf-core/schemas/confidence-schema.d.ts +36 -0
- package/dist/_vendor/ailf-core/schemas/confidence-schema.js +32 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -4
- package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/index.js +9 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +34 -8
- package/dist/_vendor/ailf-core/schemas/pipeline.js +23 -1
- package/dist/_vendor/ailf-core/services/diagnosis/card-validators.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/diagnosis/card-validators.js +40 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.d.ts +7 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.js +131 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.d.ts +7 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.js +171 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.d.ts +7 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.js +155 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.d.ts +17 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.js +43 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.d.ts +46 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.js +104 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.d.ts +28 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.js +96 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/index.d.ts +39 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/index.js +52 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.d.ts +27 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.js +77 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.d.ts +32 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.js +71 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.d.ts +44 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.js +126 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +107 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.d.ts +43 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.js +114 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.d.ts +72 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.js +273 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.d.ts +17 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.js +58 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.d.ts +10 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.js +10 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.d.ts +15 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.js +53 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.d.ts +14 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.js +63 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.d.ts +16 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.js +78 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.d.ts +16 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.js +86 -0
- package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +50 -0
- package/dist/_vendor/ailf-core/services/diagnosis/registry.js +35 -0
- package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +136 -0
- package/dist/_vendor/ailf-core/services/diagnosis-runner.js +153 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +6 -0
- package/dist/_vendor/ailf-core/services/index.js +18 -0
- package/dist/_vendor/ailf-core/services/llm-client-factory.d.ts +64 -0
- package/dist/_vendor/ailf-core/services/llm-client-factory.js +54 -0
- package/dist/_vendor/ailf-core/services/report-to-markdown.js +3 -2
- package/dist/_vendor/ailf-core/types/attribution.d.ts +82 -0
- package/dist/_vendor/ailf-core/types/attribution.js +18 -0
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +26 -1
- package/dist/_vendor/ailf-core/types/branded-ids.js +80 -4
- package/dist/_vendor/ailf-core/types/confidence.d.ts +1 -1
- package/dist/_vendor/ailf-core/types/confidence.js +7 -0
- package/dist/_vendor/ailf-core/types/diagnosis.d.ts +271 -0
- package/dist/_vendor/ailf-core/types/diagnosis.js +19 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +16 -1
- package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +125 -0
- package/dist/_vendor/ailf-core/types/grader-judgment.js +30 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +80 -29
- package/dist/_vendor/ailf-core/types/index.js +15 -1
- package/dist/_vendor/ailf-core/types/legacy-grader-judgment.d.ts +55 -0
- package/dist/_vendor/ailf-core/types/legacy-grader-judgment.js +30 -0
- package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/types/repo-config.d.ts +8 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +1 -1
- package/dist/adapters/api-client/build-request.d.ts +1 -0
- package/dist/adapters/api-client/build-request.js +3 -0
- package/dist/adapters/attribution/attribution-meta-writer.d.ts +35 -0
- package/dist/adapters/attribution/attribution-meta-writer.js +34 -0
- package/dist/adapters/attribution/index.d.ts +9 -0
- package/dist/adapters/attribution/index.js +8 -0
- package/dist/adapters/attribution/per-entry-attribution-writer.d.ts +56 -0
- package/dist/adapters/attribution/per-entry-attribution-writer.js +49 -0
- package/dist/adapters/config-sources/file-config-adapter.js +1 -0
- package/dist/adapters/grader-outputs/index.d.ts +10 -0
- package/dist/adapters/grader-outputs/index.js +8 -0
- package/dist/adapters/grader-outputs/legacy/index.d.ts +11 -0
- package/dist/adapters/grader-outputs/legacy/index.js +10 -0
- package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.d.ts +49 -0
- package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.js +48 -0
- package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +102 -0
- package/dist/adapters/grader-outputs/promptfoo-grader-output.js +93 -0
- package/dist/adapters/index.d.ts +3 -0
- package/dist/adapters/index.js +4 -0
- package/dist/adapters/llm/fake-llm-client.d.ts +20 -0
- package/dist/adapters/llm/fake-llm-client.js +38 -1
- package/dist/adapters/llm/openai-llm-client.js +52 -3
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +5 -1
- package/dist/adapters/task-sources/content-lake-task-source.js +28 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +79 -11
- package/dist/adapters/task-sources/repo-schemas.js +19 -2
- package/dist/cli-program.js +3 -0
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/explain-handler.js +1 -1
- package/dist/commands/interpret.d.ts +50 -0
- package/dist/commands/interpret.js +212 -0
- package/dist/commands/lookup-doc.d.ts +1 -1
- package/dist/commands/lookup-doc.js +3 -3
- package/dist/commands/pipeline-action.d.ts +6 -0
- package/dist/commands/pipeline-action.js +2 -0
- package/dist/commands/remote-pipeline.js +1 -0
- package/dist/composition-root.d.ts +57 -23
- package/dist/composition-root.js +155 -41
- package/dist/config/diagnosis-cards.ts +318 -0
- package/dist/config/models.ts +12 -0
- package/dist/config/rubrics.ts +38 -2
- package/dist/grader/agent-harness.d.ts +9 -0
- package/dist/grader/agent-harness.js +9 -0
- package/dist/grader/common.d.ts +9 -0
- package/dist/grader/common.js +9 -0
- package/dist/grader/index.d.ts +24 -0
- package/dist/grader/index.js +24 -0
- package/dist/grader/knowledge-probe.d.ts +9 -0
- package/dist/grader/knowledge-probe.js +9 -0
- package/dist/grader/literacy.d.ts +9 -0
- package/dist/grader/literacy.js +9 -0
- package/dist/grader/mcp.d.ts +9 -0
- package/dist/grader/mcp.js +9 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +5 -0
- package/dist/orchestration/steps/calculate-scores-step.js +23 -1
- package/dist/orchestration/steps/compute-attribution-step.d.ts +44 -0
- package/dist/orchestration/steps/compute-attribution-step.js +279 -0
- package/dist/orchestration/steps/gap-analysis-step.js +35 -7
- package/dist/orchestration/steps/index.d.ts +1 -0
- package/dist/orchestration/steps/index.js +1 -0
- package/dist/pipeline/attribution.d.ts +15 -0
- package/dist/pipeline/attribution.js +18 -9
- package/dist/pipeline/borderline-consensus-runner.d.ts +63 -0
- package/dist/pipeline/borderline-consensus-runner.js +124 -0
- package/dist/pipeline/borderline-detector.d.ts +24 -0
- package/dist/pipeline/borderline-detector.js +26 -0
- package/dist/pipeline/calculate-scores.d.ts +114 -3
- package/dist/pipeline/calculate-scores.js +426 -24
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +35 -17
- package/dist/pipeline/compiler/rubric-resolution.d.ts +15 -0
- package/dist/pipeline/compiler/rubric-resolution.js +9 -1
- package/dist/pipeline/compute-attribution.d.ts +80 -0
- package/dist/pipeline/compute-attribution.js +196 -0
- package/dist/pipeline/failure-modes.d.ts +52 -17
- package/dist/pipeline/failure-modes.js +178 -117
- package/dist/pipeline/map-request-to-config.js +1 -0
- package/package.json +7 -5
|
@@ -32,6 +32,40 @@ export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
|
|
|
32
32
|
*/
|
|
33
33
|
export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage", "mcp-input-validation", "mcp-output-correctness", "mcp-error-handling", "mcp-security", "factual-correctness", "completeness", "currency", "process-quality", "agent-output", "agent-tool-usage"];
|
|
34
34
|
export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
|
|
35
|
+
/**
|
|
36
|
+
* A single criterion within an llm-rubric assertion. Stable id-text pair.
|
|
37
|
+
*/
|
|
38
|
+
export declare const CriterionRefSchema: z.ZodObject<{
|
|
39
|
+
id: z.ZodString;
|
|
40
|
+
text: z.ZodString;
|
|
41
|
+
}, z.core.$strip>;
|
|
42
|
+
/**
|
|
43
|
+
* A templated LLM-rubric assertion — uses one of the predefined rubric
|
|
44
|
+
* templates with author-supplied criteria.
|
|
45
|
+
*/
|
|
46
|
+
export declare const TemplatedAssertionSchema: z.ZodObject<{
|
|
47
|
+
type: z.ZodLiteral<"llm-rubric">;
|
|
48
|
+
template: z.ZodEnum<{
|
|
49
|
+
"task-completion": "task-completion";
|
|
50
|
+
"code-correctness": "code-correctness";
|
|
51
|
+
"doc-coverage": "doc-coverage";
|
|
52
|
+
"mcp-input-validation": "mcp-input-validation";
|
|
53
|
+
"mcp-output-correctness": "mcp-output-correctness";
|
|
54
|
+
"mcp-error-handling": "mcp-error-handling";
|
|
55
|
+
"mcp-security": "mcp-security";
|
|
56
|
+
"factual-correctness": "factual-correctness";
|
|
57
|
+
completeness: "completeness";
|
|
58
|
+
currency: "currency";
|
|
59
|
+
"process-quality": "process-quality";
|
|
60
|
+
"agent-output": "agent-output";
|
|
61
|
+
"agent-tool-usage": "agent-tool-usage";
|
|
62
|
+
}>;
|
|
63
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
64
|
+
id: z.ZodString;
|
|
65
|
+
text: z.ZodString;
|
|
66
|
+
}, z.core.$strip>>;
|
|
67
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
68
|
+
}, z.core.$strip>;
|
|
35
69
|
/**
|
|
36
70
|
* Zod schema for a single task definition — a mode-discriminated union
|
|
37
71
|
* mirroring `GeneralizedTaskDefinition`.
|
|
@@ -73,7 +107,10 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
73
107
|
"agent-output": "agent-output";
|
|
74
108
|
"agent-tool-usage": "agent-tool-usage";
|
|
75
109
|
}>;
|
|
76
|
-
criteria: z.ZodArray<z.
|
|
110
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
111
|
+
id: z.ZodString;
|
|
112
|
+
text: z.ZodString;
|
|
113
|
+
}, z.core.$strip>>;
|
|
77
114
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
78
115
|
}, z.core.$strip>, z.ZodObject<{
|
|
79
116
|
type: z.ZodEnum<{
|
|
@@ -187,7 +224,10 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
187
224
|
"agent-output": "agent-output";
|
|
188
225
|
"agent-tool-usage": "agent-tool-usage";
|
|
189
226
|
}>;
|
|
190
|
-
criteria: z.ZodArray<z.
|
|
227
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
228
|
+
id: z.ZodString;
|
|
229
|
+
text: z.ZodString;
|
|
230
|
+
}, z.core.$strip>>;
|
|
191
231
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
192
232
|
}, z.core.$strip>, z.ZodObject<{
|
|
193
233
|
type: z.ZodEnum<{
|
|
@@ -341,7 +381,10 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
341
381
|
"agent-output": "agent-output";
|
|
342
382
|
"agent-tool-usage": "agent-tool-usage";
|
|
343
383
|
}>;
|
|
344
|
-
criteria: z.ZodArray<z.
|
|
384
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
385
|
+
id: z.ZodString;
|
|
386
|
+
text: z.ZodString;
|
|
387
|
+
}, z.core.$strip>>;
|
|
345
388
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
346
389
|
}, z.core.$strip>, z.ZodObject<{
|
|
347
390
|
type: z.ZodEnum<{
|
|
@@ -472,7 +515,10 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
472
515
|
"agent-output": "agent-output";
|
|
473
516
|
"agent-tool-usage": "agent-tool-usage";
|
|
474
517
|
}>;
|
|
475
|
-
criteria: z.ZodArray<z.
|
|
518
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
519
|
+
id: z.ZodString;
|
|
520
|
+
text: z.ZodString;
|
|
521
|
+
}, z.core.$strip>>;
|
|
476
522
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
477
523
|
}, z.core.$strip>, z.ZodObject<{
|
|
478
524
|
type: z.ZodEnum<{
|
|
@@ -591,7 +637,10 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
591
637
|
"agent-output": "agent-output";
|
|
592
638
|
"agent-tool-usage": "agent-tool-usage";
|
|
593
639
|
}>;
|
|
594
|
-
criteria: z.ZodArray<z.
|
|
640
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
641
|
+
id: z.ZodString;
|
|
642
|
+
text: z.ZodString;
|
|
643
|
+
}, z.core.$strip>>;
|
|
595
644
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
596
645
|
}, z.core.$strip>, z.ZodObject<{
|
|
597
646
|
type: z.ZodEnum<{
|
|
@@ -699,7 +748,10 @@ export declare const ContentLakeAuthorableTaskSchema: z.ZodObject<{
|
|
|
699
748
|
"agent-output": "agent-output";
|
|
700
749
|
"agent-tool-usage": "agent-tool-usage";
|
|
701
750
|
}>;
|
|
702
|
-
criteria: z.ZodArray<z.
|
|
751
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
752
|
+
id: z.ZodString;
|
|
753
|
+
text: z.ZodString;
|
|
754
|
+
}, z.core.$strip>>;
|
|
703
755
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
704
756
|
}, z.core.$strip>, z.ZodObject<{
|
|
705
757
|
type: z.ZodEnum<{
|
|
@@ -819,7 +871,10 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
819
871
|
"agent-output": "agent-output";
|
|
820
872
|
"agent-tool-usage": "agent-tool-usage";
|
|
821
873
|
}>;
|
|
822
|
-
criteria: z.ZodArray<z.
|
|
874
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
875
|
+
id: z.ZodString;
|
|
876
|
+
text: z.ZodString;
|
|
877
|
+
}, z.core.$strip>>;
|
|
823
878
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
824
879
|
}, z.core.$strip>, z.ZodObject<{
|
|
825
880
|
type: z.ZodEnum<{
|
|
@@ -933,7 +988,10 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
933
988
|
"agent-output": "agent-output";
|
|
934
989
|
"agent-tool-usage": "agent-tool-usage";
|
|
935
990
|
}>;
|
|
936
|
-
criteria: z.ZodArray<z.
|
|
991
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
992
|
+
id: z.ZodString;
|
|
993
|
+
text: z.ZodString;
|
|
994
|
+
}, z.core.$strip>>;
|
|
937
995
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
938
996
|
}, z.core.$strip>, z.ZodObject<{
|
|
939
997
|
type: z.ZodEnum<{
|
|
@@ -1087,7 +1145,10 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
1087
1145
|
"agent-output": "agent-output";
|
|
1088
1146
|
"agent-tool-usage": "agent-tool-usage";
|
|
1089
1147
|
}>;
|
|
1090
|
-
criteria: z.ZodArray<z.
|
|
1148
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
1149
|
+
id: z.ZodString;
|
|
1150
|
+
text: z.ZodString;
|
|
1151
|
+
}, z.core.$strip>>;
|
|
1091
1152
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
1092
1153
|
}, z.core.$strip>, z.ZodObject<{
|
|
1093
1154
|
type: z.ZodEnum<{
|
|
@@ -1218,7 +1279,10 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
1218
1279
|
"agent-output": "agent-output";
|
|
1219
1280
|
"agent-tool-usage": "agent-tool-usage";
|
|
1220
1281
|
}>;
|
|
1221
|
-
criteria: z.ZodArray<z.
|
|
1282
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
1283
|
+
id: z.ZodString;
|
|
1284
|
+
text: z.ZodString;
|
|
1285
|
+
}, z.core.$strip>>;
|
|
1222
1286
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
1223
1287
|
}, z.core.$strip>, z.ZodObject<{
|
|
1224
1288
|
type: z.ZodEnum<{
|
|
@@ -1337,7 +1401,10 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
1337
1401
|
"agent-output": "agent-output";
|
|
1338
1402
|
"agent-tool-usage": "agent-tool-usage";
|
|
1339
1403
|
}>;
|
|
1340
|
-
criteria: z.ZodArray<z.
|
|
1404
|
+
criteria: z.ZodArray<z.ZodObject<{
|
|
1405
|
+
id: z.ZodString;
|
|
1406
|
+
text: z.ZodString;
|
|
1407
|
+
}, z.core.$strip>>;
|
|
1341
1408
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
1342
1409
|
}, z.core.$strip>, z.ZodObject<{
|
|
1343
1410
|
type: z.ZodEnum<{
|
|
@@ -1468,6 +1535,7 @@ export declare const RepoConfigSchema: z.ZodObject<{
|
|
|
1468
1535
|
execution: z.ZodOptional<z.ZodObject<{
|
|
1469
1536
|
concurrency: z.ZodOptional<z.ZodNumber>;
|
|
1470
1537
|
graderReplications: z.ZodOptional<z.ZodNumber>;
|
|
1538
|
+
borderlineReplications: z.ZodOptional<z.ZodNumber>;
|
|
1471
1539
|
gapAnalysis: z.ZodOptional<z.ZodBoolean>;
|
|
1472
1540
|
apiUrl: z.ZodOptional<z.ZodString>;
|
|
1473
1541
|
}, z.core.$strip>>;
|
|
@@ -111,14 +111,26 @@ const CanonicalDocRefSchema = z.union([
|
|
|
111
111
|
// ---------------------------------------------------------------------------
|
|
112
112
|
// Assertion schemas
|
|
113
113
|
// ---------------------------------------------------------------------------
|
|
114
|
+
/**
|
|
115
|
+
* A single criterion within an llm-rubric assertion. Stable id-text pair.
|
|
116
|
+
*/
|
|
117
|
+
export const CriterionRefSchema = z.object({
|
|
118
|
+
id: z
|
|
119
|
+
.string()
|
|
120
|
+
.min(1)
|
|
121
|
+
.regex(/^[a-z0-9][a-z0-9-]*$/, {
|
|
122
|
+
message: "criterion id must be lowercase alphanumeric with hyphens",
|
|
123
|
+
}),
|
|
124
|
+
text: z.string().min(1),
|
|
125
|
+
});
|
|
114
126
|
/**
|
|
115
127
|
* A templated LLM-rubric assertion — uses one of the predefined rubric
|
|
116
128
|
* templates with author-supplied criteria.
|
|
117
129
|
*/
|
|
118
|
-
const TemplatedAssertionSchema = z.object({
|
|
130
|
+
export const TemplatedAssertionSchema = z.object({
|
|
119
131
|
type: z.literal("llm-rubric"),
|
|
120
132
|
template: z.enum(RUBRIC_TEMPLATE_NAMES),
|
|
121
|
-
criteria: z.array(
|
|
133
|
+
criteria: z.array(CriterionRefSchema).min(1),
|
|
122
134
|
weight: z.number().optional(),
|
|
123
135
|
});
|
|
124
136
|
/**
|
|
@@ -562,6 +574,11 @@ const ExecutionConfigSchema = z
|
|
|
562
574
|
.object({
|
|
563
575
|
concurrency: z.number().int().positive().optional(),
|
|
564
576
|
graderReplications: z.number().int().positive().optional(),
|
|
577
|
+
/**
|
|
578
|
+
* Plan 03-04 GRAD-04 — replications per borderline judgment.
|
|
579
|
+
* Default 3 (composition-root). Positive integer.
|
|
580
|
+
*/
|
|
581
|
+
borderlineReplications: z.number().int().positive().optional(),
|
|
565
582
|
gapAnalysis: z.boolean().optional(),
|
|
566
583
|
apiUrl: z.string().url().optional(),
|
|
567
584
|
})
|
package/dist/cli-program.js
CHANGED
|
@@ -32,6 +32,7 @@ import { createFetchDocsCommand } from "./commands/fetch-docs.js";
|
|
|
32
32
|
import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
|
|
33
33
|
import { createGraderCommand } from "./commands/grader/index.js";
|
|
34
34
|
import { createInitCommand } from "./commands/init.js";
|
|
35
|
+
import { createInterpretCommand } from "./commands/interpret.js";
|
|
35
36
|
import { createInteractiveCommand } from "./commands/interactive.js";
|
|
36
37
|
import { createLookupDocCommand } from "./commands/lookup-doc.js";
|
|
37
38
|
import { createMeasureRetrievalCommand } from "./commands/measure-retrieval.js";
|
|
@@ -110,6 +111,8 @@ export function buildCliProgram(opts) {
|
|
|
110
111
|
.addCommand(createWeeklyDigestCommand())
|
|
111
112
|
.addCommand(createCheckStalenessCommand());
|
|
112
113
|
program.addCommand(reportCommand.helpGroup(CommandGroup.AnalysisReports));
|
|
114
|
+
// `ailf interpret <reportId>` — top-level (not nested under report) per AI-SPEC
|
|
115
|
+
program.addCommand(createInterpretCommand().helpGroup(CommandGroup.AnalysisReports));
|
|
113
116
|
// ── Grader Reliability ────────────────────────────────────────────────
|
|
114
117
|
program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
|
|
115
118
|
// ── Setup & Configuration ─────────────────────────────────────────────
|
|
@@ -38,7 +38,7 @@ export function createCalculateScoresCommand() {
|
|
|
38
38
|
remote: false,
|
|
39
39
|
apiUrl: "https://ailf-api.sanity.build",
|
|
40
40
|
});
|
|
41
|
-
const result = calculateAndWriteScores({
|
|
41
|
+
const result = await calculateAndWriteScores({
|
|
42
42
|
resultsPath,
|
|
43
43
|
rootDir: ctx.config.rootDir,
|
|
44
44
|
source: opts.source,
|
|
@@ -298,7 +298,7 @@ const EXPLAIN_REGISTRY = {
|
|
|
298
298
|
],
|
|
299
299
|
},
|
|
300
300
|
"lookup-doc": {
|
|
301
|
-
description: "Search Sanity for documentation articles by keyword (find slugs for
|
|
301
|
+
description: "Search Sanity for documentation articles by keyword (find slugs for contextDocs)",
|
|
302
302
|
steps: [
|
|
303
303
|
{
|
|
304
304
|
cacheStatus: "miss",
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* interpret command — generate a Diagnosis for a Report.
|
|
3
|
+
*
|
|
4
|
+
* Wraps `getDiagnosisRunner(ctx)` from the composition root in a Commander
|
|
5
|
+
* command for consistent CLI integration. Closest analog: compare.ts.
|
|
6
|
+
*
|
|
7
|
+
* Entry points:
|
|
8
|
+
* ailf interpret <reportId> — one-line-per-card summary
|
|
9
|
+
* ailf interpret <reportId> --json — full Diagnosis JSON
|
|
10
|
+
* ailf interpret latest — most recent report
|
|
11
|
+
* ailf interpret <id> --compare <ref> — DIAG-05 regression comparison
|
|
12
|
+
* ailf interpret <id> --refresh — bypass version-keyed cache
|
|
13
|
+
*
|
|
14
|
+
* @see packages/eval/src/commands/compare.ts — CLI factory analog
|
|
15
|
+
* @see packages/eval/src/composition-root.ts — getDiagnosisRunner
|
|
16
|
+
* @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §6
|
|
17
|
+
*/
|
|
18
|
+
import { Command } from "commander";
|
|
19
|
+
import type { DiagnosisRunner, VersionedInputs } from "../_vendor/ailf-core/index.d.ts";
|
|
20
|
+
interface MinimalReportStore {
|
|
21
|
+
read(id: string): Promise<unknown | null>;
|
|
22
|
+
latest(): Promise<unknown | null>;
|
|
23
|
+
}
|
|
24
|
+
export interface InterpretCommandOptions {
|
|
25
|
+
/**
|
|
26
|
+
* Override the runner factory for tests. When omitted, the command
|
|
27
|
+
* imports `getDiagnosisRunner` from the composition root at action time.
|
|
28
|
+
*/
|
|
29
|
+
readonly runnerFactory?: (ctx: unknown) => DiagnosisRunner;
|
|
30
|
+
/**
|
|
31
|
+
* Override the store factory for tests. When omitted, the command
|
|
32
|
+
* creates the app context and uses `ctx.reportStore` at action time.
|
|
33
|
+
*/
|
|
34
|
+
readonly storeFactory?: () => MinimalReportStore | null;
|
|
35
|
+
/**
|
|
36
|
+
* Override the versions resolver for tests. Receives the stored report
|
|
37
|
+
* record and returns the `VersionedInputs` needed by the runner.
|
|
38
|
+
* When omitted, the command derives versions from the report's metadata.
|
|
39
|
+
*/
|
|
40
|
+
readonly versionsFromReport?: (report: unknown) => VersionedInputs;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Create the `ailf interpret <reportId>` Commander command.
|
|
44
|
+
*
|
|
45
|
+
* Accepts optional `InterpretCommandOptions` for testability — tests can
|
|
46
|
+
* inject a fake runner factory and store factory without touching module
|
|
47
|
+
* mocks (preferred per testing.md).
|
|
48
|
+
*/
|
|
49
|
+
export declare function createInterpretCommand(options?: InterpretCommandOptions): Command;
|
|
50
|
+
export {};
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* interpret command — generate a Diagnosis for a Report.
|
|
3
|
+
*
|
|
4
|
+
* Wraps `getDiagnosisRunner(ctx)` from the composition root in a Commander
|
|
5
|
+
* command for consistent CLI integration. Closest analog: compare.ts.
|
|
6
|
+
*
|
|
7
|
+
* Entry points:
|
|
8
|
+
* ailf interpret <reportId> — one-line-per-card summary
|
|
9
|
+
* ailf interpret <reportId> --json — full Diagnosis JSON
|
|
10
|
+
* ailf interpret latest — most recent report
|
|
11
|
+
* ailf interpret <id> --compare <ref> — DIAG-05 regression comparison
|
|
12
|
+
* ailf interpret <id> --refresh — bypass version-keyed cache
|
|
13
|
+
*
|
|
14
|
+
* @see packages/eval/src/commands/compare.ts — CLI factory analog
|
|
15
|
+
* @see packages/eval/src/composition-root.ts — getDiagnosisRunner
|
|
16
|
+
* @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §6
|
|
17
|
+
*/
|
|
18
|
+
import { dirname, resolve } from "path";
|
|
19
|
+
import { fileURLToPath } from "url";
|
|
20
|
+
import { Command } from "commander";
|
|
21
|
+
import { addOutputDirOption } from "./shared/options.js";
|
|
22
|
+
import { resolveOutputDir } from "./shared/resolve-output-dir.js";
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Module-level root constant (same pattern as compare.ts)
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
27
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// Card output formatting (AI-SPEC §6 graceful-degradation-visibility)
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
/**
|
|
32
|
+
* Visual status markers — locked visual contract per plan Test 7:
|
|
33
|
+
* ready: "✓", degraded: "⚠", missing: "—"
|
|
34
|
+
*/
|
|
35
|
+
const STATUS_ICONS = {
|
|
36
|
+
ready: "✓",
|
|
37
|
+
degraded: "⚠",
|
|
38
|
+
missing: "—",
|
|
39
|
+
};
|
|
40
|
+
function getCardSummaryText(card) {
|
|
41
|
+
if (card.status === "ready") {
|
|
42
|
+
return card.body.summary;
|
|
43
|
+
}
|
|
44
|
+
if (card.status === "degraded") {
|
|
45
|
+
return card.reason;
|
|
46
|
+
}
|
|
47
|
+
// missing
|
|
48
|
+
return card.reason;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Format a single card as a one-line summary string.
|
|
52
|
+
*
|
|
53
|
+
* Format: `<icon> <cardType>: <summary>`
|
|
54
|
+
* Per AI-SPEC §6: distinct icons for ready / degraded / missing.
|
|
55
|
+
*/
|
|
56
|
+
function formatCardSummaryLine(card) {
|
|
57
|
+
const icon = STATUS_ICONS[card.status];
|
|
58
|
+
const text = getCardSummaryText(card);
|
|
59
|
+
return `${icon} ${card.cardType}: ${text}`;
|
|
60
|
+
}
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
// Default versions resolver
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
/**
|
|
65
|
+
* Derive VersionedInputs from a stored report record.
|
|
66
|
+
*
|
|
67
|
+
* The four-version chain is carried in `report.summary.versions` per the
|
|
68
|
+
* Phase 5 schema, with `diagnosisVersion` sourced from the runner's const.
|
|
69
|
+
* Falls back to hard-coded "unknown" values when the fields are not present
|
|
70
|
+
* (legacy reports without version metadata).
|
|
71
|
+
*/
|
|
72
|
+
function defaultVersionsFromReport(report) {
|
|
73
|
+
const rec = report;
|
|
74
|
+
const summary = rec.summary;
|
|
75
|
+
const versions = summary?.versions;
|
|
76
|
+
return {
|
|
77
|
+
graderJudgmentsVersion: typeof versions?.graderJudgmentsVersion === "string"
|
|
78
|
+
? versions.graderJudgmentsVersion
|
|
79
|
+
: "unknown",
|
|
80
|
+
ensembleVersion: typeof versions?.ensembleVersion === "string"
|
|
81
|
+
? versions.ensembleVersion
|
|
82
|
+
: "unknown",
|
|
83
|
+
diagnosisVersion: typeof versions?.diagnosisVersion === "string"
|
|
84
|
+
? versions.diagnosisVersion
|
|
85
|
+
: "0.1.0",
|
|
86
|
+
cardVersion: typeof versions?.cardVersion === "string"
|
|
87
|
+
? versions.cardVersion
|
|
88
|
+
: "0.1.0",
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
// Command factory
|
|
93
|
+
// ---------------------------------------------------------------------------
|
|
94
|
+
/**
|
|
95
|
+
* Create the `ailf interpret <reportId>` Commander command.
|
|
96
|
+
*
|
|
97
|
+
* Accepts optional `InterpretCommandOptions` for testability — tests can
|
|
98
|
+
* inject a fake runner factory and store factory without touching module
|
|
99
|
+
* mocks (preferred per testing.md).
|
|
100
|
+
*/
|
|
101
|
+
export function createInterpretCommand(options = {}) {
|
|
102
|
+
const { runnerFactory, storeFactory, versionsFromReport } = options;
|
|
103
|
+
const cmd = new Command("interpret")
|
|
104
|
+
.description("Generate a Diagnosis for a Report — 8 typed cards explaining what's weak and what to do")
|
|
105
|
+
.argument("<reportId>", "Report ID (or 'latest' for the most recent)")
|
|
106
|
+
.option("-c, --compare <ref>", "Baseline report ID for regression-vs-baseline comparison")
|
|
107
|
+
.option("--refresh", "Bypass the version-keyed cache and recompute")
|
|
108
|
+
.option("--json", "Print full Diagnosis JSON instead of one-line-per-card summary")
|
|
109
|
+
.action(async (reportId, opts) => {
|
|
110
|
+
const outputDir = resolveOutputDir(opts.outputDir);
|
|
111
|
+
// ---------------------------------------------------------------------------
|
|
112
|
+
// Resolve store: injected factory (tests) or composition root (production)
|
|
113
|
+
// ---------------------------------------------------------------------------
|
|
114
|
+
let store;
|
|
115
|
+
let ctx;
|
|
116
|
+
if (storeFactory) {
|
|
117
|
+
store = storeFactory();
|
|
118
|
+
ctx = null;
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
// Production path — lazy import to keep the module fast in tests
|
|
122
|
+
// Minimal config: report-read-only, no eval/fetch/publish.
|
|
123
|
+
const { createAppContext } = await import("../composition-root.js");
|
|
124
|
+
ctx = createAppContext({
|
|
125
|
+
compareEnabled: false,
|
|
126
|
+
gapAnalysisEnabled: false,
|
|
127
|
+
mode: "literacy",
|
|
128
|
+
noAutoScope: false,
|
|
129
|
+
noCache: true,
|
|
130
|
+
noRemoteCache: true,
|
|
131
|
+
outputDir,
|
|
132
|
+
publishEnabled: false,
|
|
133
|
+
rootDir: ROOT,
|
|
134
|
+
searchMode: "open",
|
|
135
|
+
skipEval: true,
|
|
136
|
+
skipFetch: true,
|
|
137
|
+
remote: false,
|
|
138
|
+
apiUrl: "https://ailf-api.sanity.build",
|
|
139
|
+
});
|
|
140
|
+
const prodCtx = ctx;
|
|
141
|
+
store = prodCtx.reportStore;
|
|
142
|
+
}
|
|
143
|
+
if (!store) {
|
|
144
|
+
process.stderr.write("Error: report store is not available\n");
|
|
145
|
+
process.exit(1);
|
|
146
|
+
}
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
// Resolve main report
|
|
149
|
+
// ---------------------------------------------------------------------------
|
|
150
|
+
const report = reportId === "latest"
|
|
151
|
+
? await store.latest()
|
|
152
|
+
: await store.read(reportId);
|
|
153
|
+
if (!report) {
|
|
154
|
+
process.stderr.write(`Error: report not found: ${reportId}\n`);
|
|
155
|
+
process.exit(1);
|
|
156
|
+
}
|
|
157
|
+
// ---------------------------------------------------------------------------
|
|
158
|
+
// Optionally resolve baseline (DIAG-05)
|
|
159
|
+
// ---------------------------------------------------------------------------
|
|
160
|
+
let baseline;
|
|
161
|
+
if (opts.compare) {
|
|
162
|
+
baseline = await store.read(opts.compare);
|
|
163
|
+
if (!baseline) {
|
|
164
|
+
process.stderr.write(`Error: baseline report not found: ${opts.compare}\n`);
|
|
165
|
+
process.exit(1);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
// ---------------------------------------------------------------------------
|
|
169
|
+
// Resolve versions
|
|
170
|
+
// ---------------------------------------------------------------------------
|
|
171
|
+
const versions = versionsFromReport
|
|
172
|
+
? versionsFromReport(report)
|
|
173
|
+
: defaultVersionsFromReport(report);
|
|
174
|
+
// ---------------------------------------------------------------------------
|
|
175
|
+
// Build runner
|
|
176
|
+
// ---------------------------------------------------------------------------
|
|
177
|
+
let runner;
|
|
178
|
+
if (runnerFactory) {
|
|
179
|
+
runner = runnerFactory(ctx);
|
|
180
|
+
}
|
|
181
|
+
else {
|
|
182
|
+
const { getDiagnosisRunner } = await import("../composition-root.js");
|
|
183
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
184
|
+
runner = getDiagnosisRunner(ctx);
|
|
185
|
+
}
|
|
186
|
+
// ---------------------------------------------------------------------------
|
|
187
|
+
// Run diagnosis
|
|
188
|
+
// ---------------------------------------------------------------------------
|
|
189
|
+
const diagnosis = await runner.run({
|
|
190
|
+
// The report here is the eval's ReportStore record, which satisfies
|
|
191
|
+
// the Report interface for runner.run purposes (both carry id + provenance.runId).
|
|
192
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
193
|
+
report: report,
|
|
194
|
+
versions,
|
|
195
|
+
...(baseline ? { baseline: baseline } : {}),
|
|
196
|
+
refresh: opts.refresh ?? false,
|
|
197
|
+
});
|
|
198
|
+
// ---------------------------------------------------------------------------
|
|
199
|
+
// Print output
|
|
200
|
+
// ---------------------------------------------------------------------------
|
|
201
|
+
if (opts.json) {
|
|
202
|
+
process.stdout.write(`${JSON.stringify(diagnosis, null, 2)}\n`);
|
|
203
|
+
}
|
|
204
|
+
else {
|
|
205
|
+
for (const card of diagnosis.cards) {
|
|
206
|
+
process.stdout.write(`${formatCardSummaryLine(card)}\n`);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
});
|
|
210
|
+
addOutputDirOption(cmd);
|
|
211
|
+
return cmd;
|
|
212
|
+
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* lookup-doc command — search Sanity for documentation articles by keyword.
|
|
3
3
|
*
|
|
4
|
-
* Helps external contributors find the correct `slug` for
|
|
4
|
+
* Helps external contributors find the correct `slug` for contextDocs
|
|
5
5
|
* references without needing to browse the CMS or guess from URLs.
|
|
6
6
|
*
|
|
7
7
|
* Usage:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* lookup-doc command — search Sanity for documentation articles by keyword.
|
|
3
3
|
*
|
|
4
|
-
* Helps external contributors find the correct `slug` for
|
|
4
|
+
* Helps external contributors find the correct `slug` for contextDocs
|
|
5
5
|
* references without needing to browse the CMS or guess from URLs.
|
|
6
6
|
*
|
|
7
7
|
* Usage:
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
import { Command } from "commander";
|
|
15
15
|
export function createLookupDocCommand() {
|
|
16
16
|
return new Command("lookup-doc")
|
|
17
|
-
.description("Search Sanity docs by keyword — find slugs for
|
|
17
|
+
.description("Search Sanity docs by keyword — find slugs for contextDocs references")
|
|
18
18
|
.argument("<keyword>", "Search keyword (matches title and slug)")
|
|
19
19
|
.option("-l, --limit <n>", "Maximum results to show", parseInt, 10)
|
|
20
20
|
.option("-s, --source <name>", "Documentation source (from sources.yaml)")
|
|
@@ -73,7 +73,7 @@ export function createLookupDocCommand() {
|
|
|
73
73
|
console.log(` ${"".padEnd(maxSlugLen + 6)} │ Section: ${section}\n`);
|
|
74
74
|
}
|
|
75
75
|
console.log(" Usage in .ailf/tasks/*.yaml:\n");
|
|
76
|
-
console.log("
|
|
76
|
+
console.log(" contextDocs:");
|
|
77
77
|
console.log(` - slug: ${results[0].slug}`);
|
|
78
78
|
console.log(` reason: "${results[0].title}"`);
|
|
79
79
|
if (results[0].sectionSlug) {
|
|
@@ -27,6 +27,12 @@ export interface ResolvedOptions {
|
|
|
27
27
|
dryRun: boolean;
|
|
28
28
|
gapAnalysisEnabled: boolean;
|
|
29
29
|
graderReplications?: number;
|
|
30
|
+
/**
|
|
31
|
+
* Replications per borderline judgment for the GRAD-04 intra-grader
|
|
32
|
+
* consensus pass. Sourced from `.ailf/config.yaml`'s
|
|
33
|
+
* `execution.borderlineReplications`.
|
|
34
|
+
*/
|
|
35
|
+
borderlineReplications?: number;
|
|
30
36
|
/** Grader context policy from `.ailf/config.yaml` `grader.context` */
|
|
31
37
|
graderContext?: "rubric-only" | "with-docs";
|
|
32
38
|
headerArgs: string[];
|
|
@@ -248,6 +248,7 @@ export function computeResolvedOptions(opts) {
|
|
|
248
248
|
// env var (where one exists) > .ailf/config.yaml > built-in default
|
|
249
249
|
const concurrency = repoConfig?.execution?.concurrency;
|
|
250
250
|
const graderReplications = repoConfig?.execution?.graderReplications;
|
|
251
|
+
const borderlineReplications = repoConfig?.execution?.borderlineReplications;
|
|
251
252
|
const gapAnalysisEnabled = repoConfig?.execution?.gapAnalysis ?? true;
|
|
252
253
|
// Grader context policy. Cascade: env var > .ailf/config.yaml > unset
|
|
253
254
|
// (defaults to rubric-only at the EvalConfig boundary). The env var is the
|
|
@@ -291,6 +292,7 @@ export function computeResolvedOptions(opts) {
|
|
|
291
292
|
dryRun: opts.dryRun,
|
|
292
293
|
gapAnalysisEnabled,
|
|
293
294
|
graderReplications,
|
|
295
|
+
borderlineReplications,
|
|
294
296
|
graderContext,
|
|
295
297
|
headerArgs,
|
|
296
298
|
impactSummary,
|
|
@@ -142,6 +142,7 @@ function toConfigSlice(opts) {
|
|
|
142
142
|
perspectiveOverride: opts.perspectiveOverride,
|
|
143
143
|
graderContext: opts.graderContext,
|
|
144
144
|
graderReplications: opts.graderReplications,
|
|
145
|
+
borderlineReplications: opts.borderlineReplications,
|
|
145
146
|
gapAnalysisEnabled: opts.gapAnalysisEnabled,
|
|
146
147
|
noRemoteCache: opts.noRemoteCache,
|
|
147
148
|
// D0037 / W0069 caller envelope overrides — flags override env vars
|