npm - @sanity/ailf - Versions diffs - 4.5.0 → 5.0.0 - Mend

@sanity/ailf 4.5.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (139) hide show

package/canonical/grader-references/agent-harness-tools.yaml +42 -0
package/canonical/grader-references/knowledge-probe-recall.yaml +36 -0
package/canonical/grader-references/mcp-server-spec.yaml +51 -0
package/canonical/grader-references/portable-text.yaml +48 -0
package/config/rubrics.ts +38 -2
package/dist/_vendor/ailf-core/artifact-registry.d.ts +197 -2
package/dist/_vendor/ailf-core/artifact-registry.js +419 -5
package/dist/_vendor/ailf-core/examples/index.d.ts +125 -26
package/dist/_vendor/ailf-core/examples/index.js +146 -47
package/dist/_vendor/ailf-core/ports/context.d.ts +26 -0
package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
package/dist/_vendor/ailf-core/ports/index.js +1 -0
package/dist/_vendor/ailf-core/ports/llm-client.d.ts +112 -0
package/dist/_vendor/ailf-core/ports/llm-client.js +68 -0
package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +15 -0
package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +40 -0
package/dist/_vendor/ailf-core/schemas/branded-string.js +45 -0
package/dist/_vendor/ailf-core/schemas/confidence-schema.d.ts +36 -0
package/dist/_vendor/ailf-core/schemas/confidence-schema.js +32 -0
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -4
package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
package/dist/_vendor/ailf-core/schemas/index.js +9 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +34 -8
package/dist/_vendor/ailf-core/schemas/pipeline.js +23 -1
package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +40 -0
package/dist/_vendor/ailf-core/services/diagnosis/registry.js +25 -0
package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +19 -0
package/dist/_vendor/ailf-core/services/diagnosis-runner.js +19 -0
package/dist/_vendor/ailf-core/services/index.d.ts +2 -0
package/dist/_vendor/ailf-core/services/index.js +5 -0
package/dist/_vendor/ailf-core/services/report-to-markdown.js +3 -2
package/dist/_vendor/ailf-core/types/attribution.d.ts +82 -0
package/dist/_vendor/ailf-core/types/attribution.js +18 -0
package/dist/_vendor/ailf-core/types/branded-ids.d.ts +26 -1
package/dist/_vendor/ailf-core/types/branded-ids.js +80 -4
package/dist/_vendor/ailf-core/types/confidence.d.ts +68 -0
package/dist/_vendor/ailf-core/types/confidence.js +56 -0
package/dist/_vendor/ailf-core/types/diagnosis.d.ts +169 -0
package/dist/_vendor/ailf-core/types/diagnosis.js +17 -0
package/dist/_vendor/ailf-core/types/generalized-task.d.ts +16 -1
package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +125 -0
package/dist/_vendor/ailf-core/types/grader-judgment.js +30 -0
package/dist/_vendor/ailf-core/types/index.d.ts +82 -29
package/dist/_vendor/ailf-core/types/index.js +16 -1
package/dist/_vendor/ailf-core/types/legacy-grader-judgment.d.ts +55 -0
package/dist/_vendor/ailf-core/types/legacy-grader-judgment.js +30 -0
package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
package/dist/_vendor/ailf-core/types/repo-config.d.ts +8 -0
package/dist/_vendor/ailf-shared/document-ref.d.ts +1 -1
package/dist/adapters/api-client/build-request.d.ts +1 -0
package/dist/adapters/api-client/build-request.js +3 -0
package/dist/adapters/attribution/attribution-meta-writer.d.ts +35 -0
package/dist/adapters/attribution/attribution-meta-writer.js +34 -0
package/dist/adapters/attribution/index.d.ts +9 -0
package/dist/adapters/attribution/index.js +8 -0
package/dist/adapters/attribution/per-entry-attribution-writer.d.ts +56 -0
package/dist/adapters/attribution/per-entry-attribution-writer.js +49 -0
package/dist/adapters/config-sources/file-config-adapter.js +1 -0
package/dist/adapters/grader-outputs/index.d.ts +10 -0
package/dist/adapters/grader-outputs/index.js +8 -0
package/dist/adapters/grader-outputs/legacy/index.d.ts +11 -0
package/dist/adapters/grader-outputs/legacy/index.js +10 -0
package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.d.ts +49 -0
package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.js +48 -0
package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +102 -0
package/dist/adapters/grader-outputs/promptfoo-grader-output.js +93 -0
package/dist/adapters/index.d.ts +3 -0
package/dist/adapters/index.js +4 -0
package/dist/adapters/llm/anthropic-llm-client.d.ts +48 -0
package/dist/adapters/llm/anthropic-llm-client.js +205 -0
package/dist/adapters/llm/fake-llm-client.d.ts +49 -0
package/dist/adapters/llm/fake-llm-client.js +63 -0
package/dist/adapters/llm/index.d.ts +9 -0
package/dist/adapters/llm/index.js +4 -0
package/dist/adapters/llm/openai-llm-client.d.ts +44 -0
package/dist/adapters/llm/openai-llm-client.js +168 -0
package/dist/adapters/llm/pricing.d.ts +12 -0
package/dist/adapters/llm/pricing.js +8 -0
package/dist/adapters/llm/retry.d.ts +56 -0
package/dist/adapters/llm/retry.js +66 -0
package/dist/adapters/task-sources/content-lake-task-source.d.ts +5 -1
package/dist/adapters/task-sources/content-lake-task-source.js +28 -2
package/dist/adapters/task-sources/repo-schemas.d.ts +90 -22
package/dist/adapters/task-sources/repo-schemas.js +19 -2
package/dist/artifact-capture/api-gateway-artifact-writer.js +2 -1
package/dist/artifact-capture/batching-api-gateway-artifact-writer.js +2 -1
package/dist/artifact-capture/gcs-artifact-writer.js +3 -1
package/dist/artifact-capture/local-fs-artifact-writer.js +3 -1
package/dist/commands/calculate-scores.js +1 -1
package/dist/commands/explain-handler.js +1 -1
package/dist/commands/lookup-doc.d.ts +1 -1
package/dist/commands/lookup-doc.js +3 -3
package/dist/commands/pipeline-action.d.ts +6 -0
package/dist/commands/pipeline-action.js +2 -0
package/dist/commands/remote-pipeline.js +1 -0
package/dist/composition-root.d.ts +59 -1
package/dist/composition-root.js +95 -0
package/dist/config/rubrics.ts +38 -2
package/dist/grader/agent-harness.d.ts +14 -0
package/dist/grader/agent-harness.js +17 -0
package/dist/grader/common.d.ts +17 -0
package/dist/grader/common.js +21 -0
package/dist/grader/index.d.ts +38 -0
package/dist/grader/index.js +75 -0
package/dist/grader/knowledge-probe.d.ts +14 -0
package/dist/grader/knowledge-probe.js +18 -0
package/dist/grader/literacy.d.ts +13 -0
package/dist/grader/literacy.js +17 -0
package/dist/grader/mcp.d.ts +14 -0
package/dist/grader/mcp.js +18 -0
package/dist/orchestration/build-app-context.js +1 -0
package/dist/orchestration/build-step-sequence.js +5 -0
package/dist/orchestration/steps/calculate-scores-step.js +23 -1
package/dist/orchestration/steps/compute-attribution-step.d.ts +44 -0
package/dist/orchestration/steps/compute-attribution-step.js +279 -0
package/dist/orchestration/steps/gap-analysis-step.js +35 -7
package/dist/orchestration/steps/index.d.ts +1 -0
package/dist/orchestration/steps/index.js +1 -0
package/dist/pipeline/attribution.d.ts +15 -0
package/dist/pipeline/attribution.js +18 -9
package/dist/pipeline/borderline-consensus-runner.d.ts +63 -0
package/dist/pipeline/borderline-consensus-runner.js +124 -0
package/dist/pipeline/borderline-detector.d.ts +24 -0
package/dist/pipeline/borderline-detector.js +26 -0
package/dist/pipeline/calculate-scores.d.ts +114 -3
package/dist/pipeline/calculate-scores.js +426 -24
package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
package/dist/pipeline/compiler/literacy-bridge.js +35 -17
package/dist/pipeline/compiler/rubric-resolution.d.ts +15 -0
package/dist/pipeline/compiler/rubric-resolution.js +9 -1
package/dist/pipeline/compute-attribution.d.ts +80 -0
package/dist/pipeline/compute-attribution.js +196 -0
package/dist/pipeline/failure-modes.d.ts +52 -17
package/dist/pipeline/failure-modes.js +178 -117
package/dist/pipeline/map-request-to-config.js +1 -0
package/package.json +6 -4

package/dist/adapters/task-sources/repo-schemas.d.ts CHANGED Viewed

@@ -32,6 +32,40 @@ export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
  */
 export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage", "mcp-input-validation", "mcp-output-correctness", "mcp-error-handling", "mcp-security", "factual-correctness", "completeness", "currency", "process-quality", "agent-output", "agent-tool-usage"];
 export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
+/**
+ * A single criterion within an llm-rubric assertion. Stable id-text pair.
+ */
+export declare const CriterionRefSchema: z.ZodObject<{
+    id: z.ZodString;
+    text: z.ZodString;
+}, z.core.$strip>;
+/**
+ * A templated LLM-rubric assertion — uses one of the predefined rubric
+ * templates with author-supplied criteria.
+ */
+export declare const TemplatedAssertionSchema: z.ZodObject<{
+    type: z.ZodLiteral<"llm-rubric">;
+    template: z.ZodEnum<{
+        "task-completion": "task-completion";
+        "code-correctness": "code-correctness";
+        "doc-coverage": "doc-coverage";
+        "mcp-input-validation": "mcp-input-validation";
+        "mcp-output-correctness": "mcp-output-correctness";
+        "mcp-error-handling": "mcp-error-handling";
+        "mcp-security": "mcp-security";
+        "factual-correctness": "factual-correctness";
+        completeness: "completeness";
+        currency: "currency";
+        "process-quality": "process-quality";
+        "agent-output": "agent-output";
+        "agent-tool-usage": "agent-tool-usage";
+    }>;
+    criteria: z.ZodArray<z.ZodObject<{
+        id: z.ZodString;
+        text: z.ZodString;
+    }, z.core.$strip>>;
+    weight: z.ZodOptional<z.ZodNumber>;
+}, z.core.$strip>;
 /**
  * Zod schema for a single task definition — a mode-discriminated union
  * mirroring `GeneralizedTaskDefinition`.
@@ -73,10 +107,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
             "agent-output": "agent-output";
             "agent-tool-usage": "agent-tool-usage";
         }>;
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         weight: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>, z.ZodObject<{
         type: z.ZodEnum<{
+            cost: "cost";
             "llm-rubric": "llm-rubric";
             contains: "contains";
             "contains-any": "contains-any";
@@ -87,7 +125,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
             regex: "regex";
             javascript: "javascript";
             similar: "similar";
-            cost: "cost";
             latency: "latency";
             "file-exists": "file-exists";
             "file-contains": "file-contains";
@@ -187,10 +224,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
             "agent-output": "agent-output";
             "agent-tool-usage": "agent-tool-usage";
         }>;
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         weight: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>, z.ZodObject<{
         type: z.ZodEnum<{
+            cost: "cost";
             "llm-rubric": "llm-rubric";
             contains: "contains";
             "contains-any": "contains-any";
@@ -201,7 +242,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
             regex: "regex";
             javascript: "javascript";
             similar: "similar";
-            cost: "cost";
             latency: "latency";
             "file-exists": "file-exists";
             "file-contains": "file-contains";
@@ -341,10 +381,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
             "agent-output": "agent-output";
             "agent-tool-usage": "agent-tool-usage";
         }>;
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         weight: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>, z.ZodObject<{
         type: z.ZodEnum<{
+            cost: "cost";
             "llm-rubric": "llm-rubric";
             contains: "contains";
             "contains-any": "contains-any";
@@ -355,7 +399,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
             regex: "regex";
             javascript: "javascript";
             similar: "similar";
-            cost: "cost";
             latency: "latency";
             "file-exists": "file-exists";
             "file-contains": "file-contains";
@@ -472,10 +515,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
             "agent-output": "agent-output";
             "agent-tool-usage": "agent-tool-usage";
         }>;
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         weight: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>, z.ZodObject<{
         type: z.ZodEnum<{
+            cost: "cost";
             "llm-rubric": "llm-rubric";
             contains: "contains";
             "contains-any": "contains-any";
@@ -486,7 +533,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
             regex: "regex";
             javascript: "javascript";
             similar: "similar";
-            cost: "cost";
             latency: "latency";
             "file-exists": "file-exists";
             "file-contains": "file-contains";
@@ -591,10 +637,14 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
             "agent-output": "agent-output";
             "agent-tool-usage": "agent-tool-usage";
         }>;
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         weight: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>, z.ZodObject<{
         type: z.ZodEnum<{
+            cost: "cost";
             "llm-rubric": "llm-rubric";
             contains: "contains";
             "contains-any": "contains-any";
@@ -605,7 +655,6 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
             regex: "regex";
             javascript: "javascript";
             similar: "similar";
-            cost: "cost";
             latency: "latency";
             "file-exists": "file-exists";
             "file-contains": "file-contains";
@@ -699,10 +748,14 @@ export declare const ContentLakeAuthorableTaskSchema: z.ZodObject<{
             "agent-output": "agent-output";
             "agent-tool-usage": "agent-tool-usage";
         }>;
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         weight: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>, z.ZodObject<{
         type: z.ZodEnum<{
+            cost: "cost";
             "llm-rubric": "llm-rubric";
             contains: "contains";
             "contains-any": "contains-any";
@@ -713,7 +766,6 @@ export declare const ContentLakeAuthorableTaskSchema: z.ZodObject<{
             regex: "regex";
             javascript: "javascript";
             similar: "similar";
-            cost: "cost";
             latency: "latency";
             "file-exists": "file-exists";
             "file-contains": "file-contains";
@@ -819,10 +871,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
             "agent-output": "agent-output";
             "agent-tool-usage": "agent-tool-usage";
         }>;
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         weight: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>, z.ZodObject<{
         type: z.ZodEnum<{
+            cost: "cost";
             "llm-rubric": "llm-rubric";
             contains: "contains";
             "contains-any": "contains-any";
@@ -833,7 +889,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
             regex: "regex";
             javascript: "javascript";
             similar: "similar";
-            cost: "cost";
             latency: "latency";
             "file-exists": "file-exists";
             "file-contains": "file-contains";
@@ -933,10 +988,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
             "agent-output": "agent-output";
             "agent-tool-usage": "agent-tool-usage";
         }>;
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         weight: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>, z.ZodObject<{
         type: z.ZodEnum<{
+            cost: "cost";
             "llm-rubric": "llm-rubric";
             contains: "contains";
             "contains-any": "contains-any";
@@ -947,7 +1006,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
             regex: "regex";
             javascript: "javascript";
             similar: "similar";
-            cost: "cost";
             latency: "latency";
             "file-exists": "file-exists";
             "file-contains": "file-contains";
@@ -1087,10 +1145,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
             "agent-output": "agent-output";
             "agent-tool-usage": "agent-tool-usage";
         }>;
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         weight: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>, z.ZodObject<{
         type: z.ZodEnum<{
+            cost: "cost";
             "llm-rubric": "llm-rubric";
             contains: "contains";
             "contains-any": "contains-any";
@@ -1101,7 +1163,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
             regex: "regex";
             javascript: "javascript";
             similar: "similar";
-            cost: "cost";
             latency: "latency";
             "file-exists": "file-exists";
             "file-contains": "file-contains";
@@ -1218,10 +1279,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
             "agent-output": "agent-output";
             "agent-tool-usage": "agent-tool-usage";
         }>;
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         weight: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>, z.ZodObject<{
         type: z.ZodEnum<{
+            cost: "cost";
             "llm-rubric": "llm-rubric";
             contains: "contains";
             "contains-any": "contains-any";
@@ -1232,7 +1297,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
             regex: "regex";
             javascript: "javascript";
             similar: "similar";
-            cost: "cost";
             latency: "latency";
             "file-exists": "file-exists";
             "file-contains": "file-contains";
@@ -1337,10 +1401,14 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
             "agent-output": "agent-output";
             "agent-tool-usage": "agent-tool-usage";
         }>;
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         weight: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>, z.ZodObject<{
         type: z.ZodEnum<{
+            cost: "cost";
             "llm-rubric": "llm-rubric";
             contains: "contains";
             "contains-any": "contains-any";
@@ -1351,7 +1419,6 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
             regex: "regex";
             javascript: "javascript";
             similar: "similar";
-            cost: "cost";
             latency: "latency";
             "file-exists": "file-exists";
             "file-contains": "file-contains";
@@ -1468,6 +1535,7 @@ export declare const RepoConfigSchema: z.ZodObject<{
     execution: z.ZodOptional<z.ZodObject<{
         concurrency: z.ZodOptional<z.ZodNumber>;
         graderReplications: z.ZodOptional<z.ZodNumber>;
+        borderlineReplications: z.ZodOptional<z.ZodNumber>;
         gapAnalysis: z.ZodOptional<z.ZodBoolean>;
         apiUrl: z.ZodOptional<z.ZodString>;
     }, z.core.$strip>>;

package/dist/adapters/task-sources/repo-schemas.js CHANGED Viewed

@@ -111,14 +111,26 @@ const CanonicalDocRefSchema = z.union([
 // ---------------------------------------------------------------------------
 // Assertion schemas
 // ---------------------------------------------------------------------------
+/**
+ * A single criterion within an llm-rubric assertion. Stable id-text pair.
+ */
+export const CriterionRefSchema = z.object({
+    id: z
+        .string()
+        .min(1)
+        .regex(/^[a-z0-9][a-z0-9-]*$/, {
+        message: "criterion id must be lowercase alphanumeric with hyphens",
+    }),
+    text: z.string().min(1),
+});
 /**
  * A templated LLM-rubric assertion — uses one of the predefined rubric
  * templates with author-supplied criteria.
  */
-const TemplatedAssertionSchema = z.object({
+export const TemplatedAssertionSchema = z.object({
     type: z.literal("llm-rubric"),
     template: z.enum(RUBRIC_TEMPLATE_NAMES),
-    criteria: z.array(z.string().min(1)).min(1),
+    criteria: z.array(CriterionRefSchema).min(1),
     weight: z.number().optional(),
 });
 /**
@@ -562,6 +574,11 @@ const ExecutionConfigSchema = z
     .object({
     concurrency: z.number().int().positive().optional(),
     graderReplications: z.number().int().positive().optional(),
+    /**
+     * Plan 03-04 GRAD-04 — replications per borderline judgment.
+     * Default 3 (composition-root). Positive integer.
+     */
+    borderlineReplications: z.number().int().positive().optional(),
     gapAnalysis: z.boolean().optional(),
     apiUrl: z.string().url().optional(),
 })

package/dist/artifact-capture/api-gateway-artifact-writer.js CHANGED Viewed

@@ -27,7 +27,7 @@
  * @see docs/decisions/D0032-run-anchored-artifact-store.md
  * @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
  */
-import { ARTIFACT_REGISTRY, NotImplementedError, } from "../_vendor/ailf-core/index.js";
+import { ARTIFACT_REGISTRY, assertWritePolicyMatches, NotImplementedError, } from "../_vendor/ailf-core/index.js";
 import { prepareUploadBody } from "./prepare-upload-body.js";
 import { NO_OP_UPLOAD_METRICS, } from "./upload-metrics.js";
 export class ApiGatewayArtifactWriter {
@@ -40,6 +40,7 @@ export class ApiGatewayArtifactWriter {
     // ---- Canonical W0049 API ------------------------------------------------
     async emit(type, association, payload) {
         const descriptor = ARTIFACT_REGISTRY[type];
+        assertWritePolicyMatches("pipeline", descriptor);
         const runId = association.run;
         if (!runId) {
             console.warn(`  ⚠️  emit("${type}"): association.run is required, skipping`);

package/dist/artifact-capture/batching-api-gateway-artifact-writer.js CHANGED Viewed

@@ -25,7 +25,7 @@
  * does this writer. Traces flow through the GCS-direct writer when ADC
  * credentials are present.
  */
-import { ARTIFACT_REGISTRY, BULK_ENTRY_KEY, NotImplementedError, } from "../_vendor/ailf-core/index.js";
+import { ARTIFACT_REGISTRY, assertWritePolicyMatches, BULK_ENTRY_KEY, NotImplementedError, } from "../_vendor/ailf-core/index.js";
 import { prepareUploadBody } from "./prepare-upload-body.js";
 import { NO_OP_UPLOAD_METRICS, } from "./upload-metrics.js";
 /**
@@ -64,6 +64,7 @@ export class BatchingApiGatewayArtifactWriter {
     // ---- ArtifactWriter surface --------------------------------------------
     async emit(type, association, payload) {
         const descriptor = ARTIFACT_REGISTRY[type];
+        assertWritePolicyMatches("pipeline", descriptor);
         const runId = association.run;
         if (!runId) {
             console.warn(`  ⚠️  emit("${type}"): association.run is required, skipping`);

package/dist/artifact-capture/gcs-artifact-writer.js CHANGED Viewed

@@ -28,7 +28,7 @@
  * @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
  */
 import { Storage } from "@google-cloud/storage";
-import { ARTIFACT_REGISTRY, buildManifestPreview, } from "../_vendor/ailf-core/index.js";
+import { ARTIFACT_REGISTRY, assertWritePolicyMatches, buildManifestPreview, } from "../_vendor/ailf-core/index.js";
 import { resolveUploadConcurrency } from "./parallel-emit.js";
 import { prepareUploadBody } from "./prepare-upload-body.js";
 import { redactArtifactData } from "./redact-artifact.js";
@@ -79,6 +79,7 @@ export class GcsArtifactWriter {
     // ---- Canonical W0049 API ------------------------------------------------
     async emit(type, association, payload) {
         const descriptor = ARTIFACT_REGISTRY[type];
+        assertWritePolicyMatches("pipeline", descriptor);
         const runId = association.run;
         if (!runId) {
             console.warn(`  ⚠️  emit("${type}"): association.run is required, skipping`);
@@ -132,6 +133,7 @@ export class GcsArtifactWriter {
     }
     async appendNdjson(type, association, rows) {
         const descriptor = ARTIFACT_REGISTRY[type];
+        assertWritePolicyMatches("pipeline", descriptor);
         if (descriptor.mime !== "application/x-ndjson") {
             console.warn(`  ⚠️  appendNdjson("${type}"): descriptor mime is ${descriptor.mime}, not application/x-ndjson — skipping`);
             return null;

package/dist/artifact-capture/local-fs-artifact-writer.js CHANGED Viewed

@@ -38,7 +38,7 @@
  */
 import { promises as fs } from "node:fs";
 import path from "node:path";
-import { ARTIFACT_REGISTRY, buildManifestPreview, } from "../_vendor/ailf-core/index.js";
+import { ARTIFACT_REGISTRY, assertWritePolicyMatches, buildManifestPreview, } from "../_vendor/ailf-core/index.js";
 import { redactArtifactData } from "./redact-artifact.js";
 // ---------------------------------------------------------------------------
 // Implementation
@@ -66,6 +66,7 @@ export class LocalFilesystemArtifactWriter {
         if (this.excludeSet.has(type))
             return null;
         const descriptor = ARTIFACT_REGISTRY[type];
+        assertWritePolicyMatches("pipeline", descriptor);
         const runId = association.run;
         if (!runId) {
             console.warn(`  ⚠️  emit("${type}"): association.run is required, skipping`);
@@ -127,6 +128,7 @@ export class LocalFilesystemArtifactWriter {
         if (this.excludeSet.has(type))
             return null;
         const descriptor = ARTIFACT_REGISTRY[type];
+        assertWritePolicyMatches("pipeline", descriptor);
         if (descriptor.mime !== "application/x-ndjson") {
             console.warn(`  ⚠️  appendNdjson("${type}"): descriptor mime is ${descriptor.mime}, not application/x-ndjson — skipping`);
             return null;

package/dist/commands/calculate-scores.js CHANGED Viewed

@@ -38,7 +38,7 @@ export function createCalculateScoresCommand() {
                 remote: false,
                 apiUrl: "https://ailf-api.sanity.build",
             });
-            const result = calculateAndWriteScores({
+            const result = await calculateAndWriteScores({
                 resultsPath,
                 rootDir: ctx.config.rootDir,
                 source: opts.source,

package/dist/commands/explain-handler.js CHANGED Viewed

@@ -298,7 +298,7 @@ const EXPLAIN_REGISTRY = {
         ],
     },
     "lookup-doc": {
-        description: "Search Sanity for documentation articles by keyword (find slugs for canonicalDocs)",
+        description: "Search Sanity for documentation articles by keyword (find slugs for contextDocs)",
         steps: [
             {
                 cacheStatus: "miss",

package/dist/commands/lookup-doc.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * lookup-doc command — search Sanity for documentation articles by keyword.
  *
- * Helps external contributors find the correct `slug` for canonicalDocs
+ * Helps external contributors find the correct `slug` for contextDocs
  * references without needing to browse the CMS or guess from URLs.
  *
  * Usage:

package/dist/commands/lookup-doc.js CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * lookup-doc command — search Sanity for documentation articles by keyword.
  *
- * Helps external contributors find the correct `slug` for canonicalDocs
+ * Helps external contributors find the correct `slug` for contextDocs
  * references without needing to browse the CMS or guess from URLs.
  *
  * Usage:
@@ -14,7 +14,7 @@
 import { Command } from "commander";
 export function createLookupDocCommand() {
     return new Command("lookup-doc")
-        .description("Search Sanity docs by keyword — find slugs for canonicalDocs references")
+        .description("Search Sanity docs by keyword — find slugs for contextDocs references")
         .argument("<keyword>", "Search keyword (matches title and slug)")
         .option("-l, --limit <n>", "Maximum results to show", parseInt, 10)
         .option("-s, --source <name>", "Documentation source (from sources.yaml)")
@@ -73,7 +73,7 @@ export function createLookupDocCommand() {
             console.log(`  ${"".padEnd(maxSlugLen + 6)}  │ Section: ${section}\n`);
         }
         console.log("  Usage in .ailf/tasks/*.yaml:\n");
-        console.log("  canonicalDocs:");
+        console.log("  contextDocs:");
         console.log(`    - slug: ${results[0].slug}`);
         console.log(`      reason: "${results[0].title}"`);
         if (results[0].sectionSlug) {

package/dist/commands/pipeline-action.d.ts CHANGED Viewed

@@ -27,6 +27,12 @@ export interface ResolvedOptions {
     dryRun: boolean;
     gapAnalysisEnabled: boolean;
     graderReplications?: number;
+    /**
+     * Replications per borderline judgment for the GRAD-04 intra-grader
+     * consensus pass. Sourced from `.ailf/config.yaml`'s
+     * `execution.borderlineReplications`.
+     */
+    borderlineReplications?: number;
     /** Grader context policy from `.ailf/config.yaml` `grader.context` */
     graderContext?: "rubric-only" | "with-docs";
     headerArgs: string[];

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -248,6 +248,7 @@ export function computeResolvedOptions(opts) {
     //   env var (where one exists) > .ailf/config.yaml > built-in default
     const concurrency = repoConfig?.execution?.concurrency;
     const graderReplications = repoConfig?.execution?.graderReplications;
+    const borderlineReplications = repoConfig?.execution?.borderlineReplications;
     const gapAnalysisEnabled = repoConfig?.execution?.gapAnalysis ?? true;
     // Grader context policy. Cascade: env var > .ailf/config.yaml > unset
     // (defaults to rubric-only at the EvalConfig boundary). The env var is the
@@ -291,6 +292,7 @@ export function computeResolvedOptions(opts) {
         dryRun: opts.dryRun,
         gapAnalysisEnabled,
         graderReplications,
+        borderlineReplications,
         graderContext,
         headerArgs,
         impactSummary,

package/dist/commands/remote-pipeline.js CHANGED Viewed

@@ -142,6 +142,7 @@ function toConfigSlice(opts) {
         perspectiveOverride: opts.perspectiveOverride,
         graderContext: opts.graderContext,
         graderReplications: opts.graderReplications,
+        borderlineReplications: opts.borderlineReplications,
         gapAnalysisEnabled: opts.gapAnalysisEnabled,
         noRemoteCache: opts.noRemoteCache,
         // D0037 / W0069 caller envelope overrides — flags override env vars

package/dist/composition-root.d.ts CHANGED Viewed

@@ -15,7 +15,8 @@
  * @see packages/core/src/ports/context.ts — AppContext interface
  * @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
  */
-import { type AppContext, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssertionRegistration, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
+import { type AppContext, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssertionRegistration, type LLMClient, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
+import { type BorderlineConsensusOptions, type BorderlineConsensusResult } from "./pipeline/borderline-consensus-runner.js";
 import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource } from "./adapters/task-sources/index.js";
 /**
  * Create a fully wired AppContext from resolved configuration.
@@ -24,6 +25,28 @@ import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource } from "./ad
  * Swapping an adapter is a one-line change in this function.
  */
 export declare function createAppContext(config: ResolvedConfig): AppContext;
+/**
+ * Typed key bag passed to `createLLMClient`. The composition root reads
+ * env once and supplies values here; the factory stays pure so tests don't
+ * have to mutate `process.env`.
+ */
+export interface LLMClientKeys {
+    anthropicApiKey?: string;
+    openaiApiKey?: string;
+}
+/**
+ * Select the LLMClient adapter based on `config.llmProvider` and the
+ * supplied API keys. Returns `undefined` when no usable credential is
+ * present — `AppContext.llmClient` stays unset and consumers handle that
+ * explicitly.
+ *
+ * Adapters never read `process.env` themselves (per
+ * `.claude/rules/typescript.md`); env mapping happens at the call site
+ * (typically `createAppContext`).
+ *
+ * Exported for unit-test access; not part of the public package API.
+ */
+export declare function createLLMClient(config: ResolvedConfig, keys: LLMClientKeys, logger: Logger): LLMClient | undefined;
 /**
  * Selects the `ArtifactWriter` wiring per D0033 M4:
  *
@@ -61,3 +84,38 @@ export declare function createTaskSource(config: ResolvedConfig): CompositeTaskS
  * explicit mode whitelists.
  */
 export declare const FRAMEWORK_ASSERTIONS: AssertionRegistration[];
+/**
+ * Severity boundaries from `packages/eval/config/thresholds.ts`
+ * (severity.critical/warning/info `composite-below` at L50/54/58 — 30, 50,
+ * 60). The borderline detector flags a judgment when its score is within
+ * ±5 of any of these. Composition-root reads them ONCE and threads the
+ * typed `readonly number[]` into `runBorderlineConsensus` rather than
+ * re-deriving them at each call site (Pitfall 5 — single source of truth
+ * for the scale).
+ */
+export declare const BORDERLINE_SEVERITY_THRESHOLDS: readonly number[];
+/**
+ * Default replications per borderline judgment when the caller's
+ * `RepoConfig.execution.borderlineReplications` is unset (locked answer
+ * #4 in plan 03-04). Three replications + the original score = four
+ * scores per consistency record, which is the minimum that produces a
+ * non-degenerate stdDev / median split.
+ */
+export declare const DEFAULT_BORDERLINE_REPLICATIONS = 3;
+/**
+ * Factory for the borderline-consensus runner. Returns a function that
+ * applies the severity-threshold and replication defaults from
+ * composition-root, leaving the live grader entry point (the `regrade`
+ * callback) and the candidate `judgments` array as runtime inputs.
+ *
+ * The pipeline-side caller (currently `pipeline/calculate-scores.ts`'s
+ * post-extraction junction) supplies the `regrade` callback that maps a
+ * `GraderJudgment` to a fresh score via the response/rubric text from
+ * the original Promptfoo result. See the runner's header for the
+ * rationale on injecting the regrader rather than calling `gradeOnce`
+ * inline (Pitfall 6 — preserve the runner's purity wrt the existing
+ * grader-comparison split).
+ */
+export declare function createBorderlineConsensusRunner(opts: {
+    borderlineReplications?: number;
+}): (args: Pick<BorderlineConsensusOptions, "judgments" | "logger" | "regrade">) => Promise<BorderlineConsensusResult>;