@sanity/ailf 4.6.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/canonical/grader-references/agent-harness-tools.yaml +42 -0
  2. package/canonical/grader-references/knowledge-probe-recall.yaml +36 -0
  3. package/canonical/grader-references/mcp-server-spec.yaml +51 -0
  4. package/canonical/grader-references/portable-text.yaml +48 -0
  5. package/config/rubrics.ts +38 -2
  6. package/dist/_vendor/ailf-core/artifact-registry.d.ts +60 -2
  7. package/dist/_vendor/ailf-core/artifact-registry.js +288 -7
  8. package/dist/_vendor/ailf-core/examples/index.d.ts +125 -26
  9. package/dist/_vendor/ailf-core/examples/index.js +146 -47
  10. package/dist/_vendor/ailf-core/ports/context.d.ts +8 -0
  11. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +15 -0
  12. package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +40 -0
  13. package/dist/_vendor/ailf-core/schemas/branded-string.js +45 -0
  14. package/dist/_vendor/ailf-core/schemas/confidence-schema.d.ts +36 -0
  15. package/dist/_vendor/ailf-core/schemas/confidence-schema.js +32 -0
  16. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  17. package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -4
  18. package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
  19. package/dist/_vendor/ailf-core/schemas/index.js +9 -0
  20. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
  21. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
  22. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +34 -8
  23. package/dist/_vendor/ailf-core/schemas/pipeline.js +23 -1
  24. package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +40 -0
  25. package/dist/_vendor/ailf-core/services/diagnosis/registry.js +25 -0
  26. package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +19 -0
  27. package/dist/_vendor/ailf-core/services/diagnosis-runner.js +19 -0
  28. package/dist/_vendor/ailf-core/services/index.d.ts +2 -0
  29. package/dist/_vendor/ailf-core/services/index.js +5 -0
  30. package/dist/_vendor/ailf-core/services/report-to-markdown.js +3 -2
  31. package/dist/_vendor/ailf-core/types/attribution.d.ts +82 -0
  32. package/dist/_vendor/ailf-core/types/attribution.js +18 -0
  33. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +26 -1
  34. package/dist/_vendor/ailf-core/types/branded-ids.js +80 -4
  35. package/dist/_vendor/ailf-core/types/confidence.d.ts +1 -1
  36. package/dist/_vendor/ailf-core/types/confidence.js +7 -0
  37. package/dist/_vendor/ailf-core/types/diagnosis.d.ts +169 -0
  38. package/dist/_vendor/ailf-core/types/diagnosis.js +17 -0
  39. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +16 -1
  40. package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +125 -0
  41. package/dist/_vendor/ailf-core/types/grader-judgment.js +30 -0
  42. package/dist/_vendor/ailf-core/types/index.d.ts +80 -29
  43. package/dist/_vendor/ailf-core/types/index.js +15 -1
  44. package/dist/_vendor/ailf-core/types/legacy-grader-judgment.d.ts +55 -0
  45. package/dist/_vendor/ailf-core/types/legacy-grader-judgment.js +30 -0
  46. package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
  47. package/dist/_vendor/ailf-core/types/repo-config.d.ts +8 -0
  48. package/dist/_vendor/ailf-shared/document-ref.d.ts +1 -1
  49. package/dist/adapters/api-client/build-request.d.ts +1 -0
  50. package/dist/adapters/api-client/build-request.js +3 -0
  51. package/dist/adapters/attribution/attribution-meta-writer.d.ts +35 -0
  52. package/dist/adapters/attribution/attribution-meta-writer.js +34 -0
  53. package/dist/adapters/attribution/index.d.ts +9 -0
  54. package/dist/adapters/attribution/index.js +8 -0
  55. package/dist/adapters/attribution/per-entry-attribution-writer.d.ts +56 -0
  56. package/dist/adapters/attribution/per-entry-attribution-writer.js +49 -0
  57. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  58. package/dist/adapters/grader-outputs/index.d.ts +10 -0
  59. package/dist/adapters/grader-outputs/index.js +8 -0
  60. package/dist/adapters/grader-outputs/legacy/index.d.ts +11 -0
  61. package/dist/adapters/grader-outputs/legacy/index.js +10 -0
  62. package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.d.ts +49 -0
  63. package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.js +48 -0
  64. package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +102 -0
  65. package/dist/adapters/grader-outputs/promptfoo-grader-output.js +93 -0
  66. package/dist/adapters/index.d.ts +3 -0
  67. package/dist/adapters/index.js +4 -0
  68. package/dist/adapters/task-sources/content-lake-task-source.d.ts +5 -1
  69. package/dist/adapters/task-sources/content-lake-task-source.js +28 -2
  70. package/dist/adapters/task-sources/repo-schemas.d.ts +79 -11
  71. package/dist/adapters/task-sources/repo-schemas.js +19 -2
  72. package/dist/commands/calculate-scores.js +1 -1
  73. package/dist/commands/explain-handler.js +1 -1
  74. package/dist/commands/lookup-doc.d.ts +1 -1
  75. package/dist/commands/lookup-doc.js +3 -3
  76. package/dist/commands/pipeline-action.d.ts +6 -0
  77. package/dist/commands/pipeline-action.js +2 -0
  78. package/dist/commands/remote-pipeline.js +1 -0
  79. package/dist/composition-root.d.ts +36 -0
  80. package/dist/composition-root.js +48 -0
  81. package/dist/config/rubrics.ts +38 -2
  82. package/dist/grader/agent-harness.d.ts +14 -0
  83. package/dist/grader/agent-harness.js +17 -0
  84. package/dist/grader/common.d.ts +17 -0
  85. package/dist/grader/common.js +21 -0
  86. package/dist/grader/index.d.ts +38 -0
  87. package/dist/grader/index.js +75 -0
  88. package/dist/grader/knowledge-probe.d.ts +14 -0
  89. package/dist/grader/knowledge-probe.js +18 -0
  90. package/dist/grader/literacy.d.ts +13 -0
  91. package/dist/grader/literacy.js +17 -0
  92. package/dist/grader/mcp.d.ts +14 -0
  93. package/dist/grader/mcp.js +18 -0
  94. package/dist/orchestration/build-app-context.js +1 -0
  95. package/dist/orchestration/build-step-sequence.js +5 -0
  96. package/dist/orchestration/steps/calculate-scores-step.js +23 -1
  97. package/dist/orchestration/steps/compute-attribution-step.d.ts +44 -0
  98. package/dist/orchestration/steps/compute-attribution-step.js +279 -0
  99. package/dist/orchestration/steps/gap-analysis-step.js +35 -7
  100. package/dist/orchestration/steps/index.d.ts +1 -0
  101. package/dist/orchestration/steps/index.js +1 -0
  102. package/dist/pipeline/attribution.d.ts +15 -0
  103. package/dist/pipeline/attribution.js +18 -9
  104. package/dist/pipeline/borderline-consensus-runner.d.ts +63 -0
  105. package/dist/pipeline/borderline-consensus-runner.js +124 -0
  106. package/dist/pipeline/borderline-detector.d.ts +24 -0
  107. package/dist/pipeline/borderline-detector.js +26 -0
  108. package/dist/pipeline/calculate-scores.d.ts +114 -3
  109. package/dist/pipeline/calculate-scores.js +426 -24
  110. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  111. package/dist/pipeline/compiler/literacy-bridge.js +35 -17
  112. package/dist/pipeline/compiler/rubric-resolution.d.ts +15 -0
  113. package/dist/pipeline/compiler/rubric-resolution.js +9 -1
  114. package/dist/pipeline/compute-attribution.d.ts +80 -0
  115. package/dist/pipeline/compute-attribution.js +196 -0
  116. package/dist/pipeline/failure-modes.d.ts +52 -17
  117. package/dist/pipeline/failure-modes.js +178 -117
  118. package/dist/pipeline/map-request-to-config.js +1 -0
  119. package/package.json +6 -4
@@ -0,0 +1,55 @@
1
+ /**
2
+ * LegacyGraderJudgment — Phase 1 superset core only, with NO GRAD-02
3
+ * additive surface. Used by the read-only legacy parser at
4
+ * `packages/eval/src/adapters/grader-outputs/legacy/` for historical
5
+ * pre-Phase-3 reports.
6
+ *
7
+ * Reports are immutable events — once a Report is written to Content
8
+ * Lake the structured grader-judgment shape it captures cannot be
9
+ * back-filled. The legacy parser exists so historical-report rendering
10
+ * paths can keep deserializing pre-Phase-3 output through Phase 7
11
+ * (GRAD-06 cutover removes Studio's `reason`-only fallback rendering
12
+ * paths and the legacy adapter alongside).
13
+ *
14
+ * Authored INDEPENDENTLY of any Zod schema (D0045 doctrine — the
15
+ * legacy schema in
16
+ * `packages/eval/src/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.ts`
17
+ * `satisfies z.ZodType<LegacyGraderJudgment>` against this type, not
18
+ * the other way around). A tautological
19
+ * `satisfies z.ZodType<z.infer<typeof Schema>>` is forbidden.
20
+ *
21
+ * Invariant — live grader output that fails the strict
22
+ * `GraderJudgmentSchema` MUST NOT fall back to this schema. Drop to
23
+ * `failureMode: "unclassified"` instead. The legacy parser is invoked
24
+ * ONLY by historical-report rendering paths.
25
+ *
26
+ * @see ./grader-judgment.ts — the Phase 1+ structured shape (live path)
27
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
28
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
29
+ */
30
+ /**
31
+ * The Phase 1 free-prose grader judgment as historical reports captured
32
+ * it. Mirrors the existing-pipeline-core surface of {@link GraderJudgment}
33
+ * (the required fields) and the pre-existing optional `outputFailure`
34
+ * flag. NO GRAD-02 additive fields (`subJudgments`, `docCitations`,
35
+ * `failureMode`, `confidence`, `hallucinationCheckedAgainst`,
36
+ * `metadata`) — those are by construction absent on pre-Phase-3 output.
37
+ */
38
+ export interface LegacyGraderJudgment {
39
+ /** Rubric template name (e.g. "task-completion", "code-correctness"). */
40
+ dimension: string;
41
+ /** The model that produced the response being graded. */
42
+ modelId: string;
43
+ /**
44
+ * True when the model failed to produce meaningful output (empty
45
+ * response, API error, or refusal). Same semantics as
46
+ * {@link GraderJudgment.outputFailure}.
47
+ */
48
+ outputFailure?: boolean;
49
+ /** The grader's natural-language reasoning (free-prose Phase 1 shape). */
50
+ reason: string;
51
+ /** Numeric score in [0, 100] (normalized). */
52
+ score: number;
53
+ /** The task this judgment belongs to. */
54
+ taskId: string;
55
+ }
@@ -0,0 +1,30 @@
1
+ /**
2
+ * LegacyGraderJudgment — Phase 1 superset core only, with NO GRAD-02
3
+ * additive surface. Used by the read-only legacy parser at
4
+ * `packages/eval/src/adapters/grader-outputs/legacy/` for historical
5
+ * pre-Phase-3 reports.
6
+ *
7
+ * Reports are immutable events — once a Report is written to Content
8
+ * Lake the structured grader-judgment shape it captures cannot be
9
+ * back-filled. The legacy parser exists so historical-report rendering
10
+ * paths can keep deserializing pre-Phase-3 output through Phase 7
11
+ * (GRAD-06 cutover removes Studio's `reason`-only fallback rendering
12
+ * paths and the legacy adapter alongside).
13
+ *
14
+ * Authored INDEPENDENTLY of any Zod schema (D0045 doctrine — the
15
+ * legacy schema in
16
+ * `packages/eval/src/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.ts`
17
+ * `satisfies z.ZodType<LegacyGraderJudgment>` against this type, not
18
+ * the other way around). A tautological
19
+ * `satisfies z.ZodType<z.infer<typeof Schema>>` is forbidden.
20
+ *
21
+ * Invariant — live grader output that fails the strict
22
+ * `GraderJudgmentSchema` MUST NOT fall back to this schema. Drop to
23
+ * `failureMode: "unclassified"` instead. The legacy parser is invoked
24
+ * ONLY by historical-report rendering paths.
25
+ *
26
+ * @see ./grader-judgment.ts — the Phase 1+ structured shape (live path)
27
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
28
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
29
+ */
30
+ export {};
@@ -84,6 +84,7 @@ export interface PipelineRequest {
84
84
  dataset?: string;
85
85
  debug?: PipelineRequestDebug | boolean;
86
86
  executor?: PipelineRequestCallerExecutor;
87
+ borderlineReplications?: number;
87
88
  gapAnalysis?: boolean;
88
89
  graderContext?: "rubric-only" | "with-docs";
89
90
  graderReplications?: number;
@@ -47,6 +47,14 @@ export interface RepoPublishConfig {
47
47
  /** Execution-tier knobs — replaces the retired `--concurrency` / `--api-url` flags. */
48
48
  export interface RepoExecutionConfig {
49
49
  apiUrl?: string;
50
+ /**
51
+ * Plan 03-04 GRAD-04 — replications per borderline judgment for the
52
+ * intra-grader consensus pass. Default 3 (set in composition-root).
53
+ * A judgment is "borderline" when its score lies within ±5 of any
54
+ * severity boundary (30/50/60). Non-borderline judgments are not
55
+ * re-graded.
56
+ */
57
+ borderlineReplications?: number;
50
58
  concurrency?: number;
51
59
  gapAnalysis?: boolean;
52
60
  graderReplications?: number;
@@ -8,7 +8,7 @@
8
8
  * Attachable at every level of the report hierarchy:
9
9
  * - ScoreSummary.documentManifest — all docs used in the evaluation
10
10
  * - FeatureScore.documents — docs used for a specific area
11
- * - StoredJudgment.canonicalDocs — docs expected for a specific task
11
+ * - StoredJudgment.contextDocs (legacy alias: canonicalDocs) — docs expected for a specific task
12
12
  */
13
13
  export interface DocumentRef {
14
14
  /**
@@ -59,6 +59,7 @@ export interface RemoteConfigSlice {
59
59
  perspectiveOverride?: string;
60
60
  graderContext?: "rubric-only" | "with-docs";
61
61
  graderReplications?: number;
62
+ borderlineReplications?: number;
62
63
  gapAnalysisEnabled?: boolean;
63
64
  noRemoteCache?: boolean;
64
65
  /**
@@ -130,6 +130,9 @@ export async function buildRemoteRequest(options) {
130
130
  if (config.graderReplications) {
131
131
  raw.graderReplications = config.graderReplications;
132
132
  }
133
+ if (config.borderlineReplications) {
134
+ raw.borderlineReplications = config.borderlineReplications;
135
+ }
133
136
  if (config.gapAnalysisEnabled)
134
137
  raw.gapAnalysis = true;
135
138
  if (config.noRemoteCache)
@@ -0,0 +1,35 @@
1
+ /**
2
+ * attribution-meta-writer.ts — Zod schema for the run-scoped
3
+ * attribution metadata artifact (ATTR-01) emitted by Phase 4 and read
4
+ * back alongside the per-entry attribution objects.
5
+ *
6
+ * The schema asserts `satisfies z.ZodType<AttributionMeta>` against the
7
+ * canonical domain type in `packages/core/src/types/attribution.ts`
8
+ * (D0045 / W0187) — drift is a build error.
9
+ *
10
+ * `embeddingModel` is REQUIRED (Pitfall #6): silently downgrading to a
11
+ * default has caused regressions in adjacent codebases — model swaps
12
+ * MUST invalidate cached weights.
13
+ *
14
+ * Phase 1 lands the SHAPE only — no compute, no file I/O.
15
+ *
16
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
17
+ * @see docs/design-docs/actionability-ladder/04-per-document-attribution-ensemble.md
18
+ */
19
+ import { z } from "zod";
20
+ /**
21
+ * Canonical schema for {@link AttributionMeta}. Persisted at
22
+ * `runs/{runId}/attribution/_meta.json` (or whatever bulk path the
23
+ * Phase 4 descriptor pins) and parsed on read.
24
+ */
25
+ export declare const AttributionMetaSchema: z.ZodObject<{
26
+ ensembleVersion: z.ZodString;
27
+ embeddingModel: z.ZodString;
28
+ calibrationSetVersion: z.ZodOptional<z.ZodString>;
29
+ weights: z.ZodObject<{
30
+ citation: z.ZodNumber;
31
+ canonical: z.ZodNumber;
32
+ retrieved: z.ZodNumber;
33
+ }, z.core.$strip>;
34
+ }, z.core.$strip>;
35
+ export type { AttributionMeta } from "../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,34 @@
1
+ /**
2
+ * attribution-meta-writer.ts — Zod schema for the run-scoped
3
+ * attribution metadata artifact (ATTR-01) emitted by Phase 4 and read
4
+ * back alongside the per-entry attribution objects.
5
+ *
6
+ * The schema asserts `satisfies z.ZodType<AttributionMeta>` against the
7
+ * canonical domain type in `packages/core/src/types/attribution.ts`
8
+ * (D0045 / W0187) — drift is a build error.
9
+ *
10
+ * `embeddingModel` is REQUIRED (Pitfall #6): silently downgrading to a
11
+ * default has caused regressions in adjacent codebases — model swaps
12
+ * MUST invalidate cached weights.
13
+ *
14
+ * Phase 1 lands the SHAPE only — no compute, no file I/O.
15
+ *
16
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
17
+ * @see docs/design-docs/actionability-ladder/04-per-document-attribution-ensemble.md
18
+ */
19
+ import { z } from "zod";
20
+ /**
21
+ * Canonical schema for {@link AttributionMeta}. Persisted at
22
+ * `runs/{runId}/attribution/_meta.json` (or whatever bulk path the
23
+ * Phase 4 descriptor pins) and parsed on read.
24
+ */
25
+ export const AttributionMetaSchema = z.object({
26
+ ensembleVersion: z.string().min(1),
27
+ embeddingModel: z.string().min(1),
28
+ calibrationSetVersion: z.string().optional(),
29
+ weights: z.object({
30
+ citation: z.number(),
31
+ canonical: z.number(),
32
+ retrieved: z.number(),
33
+ }),
34
+ });
@@ -0,0 +1,9 @@
1
+ /**
2
+ * attribution adapter barrel — named re-exports only (W0124 / D0045).
3
+ *
4
+ * The attribution schemas live here so they enter the D0045
5
+ * `pnpm check-trust-boundary-satisfies` SCAN_ROOTS gate.
6
+ */
7
+ export { JudgmentAttributionSchema } from "./per-entry-attribution-writer.js";
8
+ export { AttributionMetaSchema } from "./attribution-meta-writer.js";
9
+ export type { AttributionMeta, DocAttribution, JudgmentAttribution, } from "../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,8 @@
1
+ /**
2
+ * attribution adapter barrel — named re-exports only (W0124 / D0045).
3
+ *
4
+ * The attribution schemas live here so they enter the D0045
5
+ * `pnpm check-trust-boundary-satisfies` SCAN_ROOTS gate.
6
+ */
7
+ export { JudgmentAttributionSchema } from "./per-entry-attribution-writer.js";
8
+ export { AttributionMetaSchema } from "./attribution-meta-writer.js";
@@ -0,0 +1,56 @@
1
+ /**
2
+ * per-entry-attribution-writer.ts — Zod schema for the per-judgment
3
+ * attribution artifact (ATTR-01) emitted by Phase 4's
4
+ * `ComputeAttributionStep` and read back by Phase 5's diagnosis runner.
5
+ *
6
+ * The schema asserts `satisfies z.ZodType<JudgmentAttribution>` against
7
+ * the canonical domain type in `packages/core/src/types/attribution.ts`
8
+ * (D0045 / W0187) — drift between schema and type is a build error.
9
+ *
10
+ * Phase 1 lands the SHAPE only — no compute, no file I/O. Phase 4 wires
11
+ * the writer; Phase 5 wires the reader. Both `satisfies` against this
12
+ * single source-of-truth schema.
13
+ *
14
+ * `hallucinationCheckedAgainst` is REQUIRED (Pitfall #11): consumers
15
+ * must be able to audit citation grounding without re-deriving the
16
+ * resolvable-set. The canonical task field is `contextDocs`; do NOT
17
+ * invent `expectedDocs` / `usedDocs` synonyms.
18
+ *
19
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
20
+ * @see docs/decisions/D0049-shared-confidence-contract.md
21
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
22
+ * @see docs/design-docs/actionability-ladder/04-per-document-attribution-ensemble.md
23
+ */
24
+ import { z } from "zod";
25
+ /**
26
+ * Canonical schema for {@link JudgmentAttribution}. Persisted at
27
+ * `runs/{runId}/attribution/{entryKey}.json` (Phase 4) and parsed by
28
+ * the diagnosis runner on read (Phase 5).
29
+ */
30
+ export declare const JudgmentAttributionSchema: z.ZodObject<{
31
+ judgmentRef: z.ZodString;
32
+ taskId: z.ZodString;
33
+ modelId: z.ZodString;
34
+ dimension: z.ZodString;
35
+ attributions: z.ZodArray<z.ZodObject<{
36
+ documentId: z.ZodString;
37
+ slug: z.ZodOptional<z.ZodString>;
38
+ score: z.ZodNumber;
39
+ signals: z.ZodObject<{
40
+ citation: z.ZodOptional<z.ZodNumber>;
41
+ canonical: z.ZodOptional<z.ZodNumber>;
42
+ retrieved: z.ZodOptional<z.ZodNumber>;
43
+ }, z.core.$strip>;
44
+ confidence: z.ZodObject<{
45
+ level: z.ZodEnum<{
46
+ low: "low";
47
+ medium: "medium";
48
+ high: "high";
49
+ }>;
50
+ signalsPresent: z.ZodNumber;
51
+ derivation: z.ZodString;
52
+ }, z.core.$strip>;
53
+ }, z.core.$strip>>;
54
+ hallucinationCheckedAgainst: z.ZodArray<z.ZodString>;
55
+ }, z.core.$strip>;
56
+ export type { DocAttribution, JudgmentAttribution } from "../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,49 @@
1
+ /**
2
+ * per-entry-attribution-writer.ts — Zod schema for the per-judgment
3
+ * attribution artifact (ATTR-01) emitted by Phase 4's
4
+ * `ComputeAttributionStep` and read back by Phase 5's diagnosis runner.
5
+ *
6
+ * The schema asserts `satisfies z.ZodType<JudgmentAttribution>` against
7
+ * the canonical domain type in `packages/core/src/types/attribution.ts`
8
+ * (D0045 / W0187) — drift between schema and type is a build error.
9
+ *
10
+ * Phase 1 lands the SHAPE only — no compute, no file I/O. Phase 4 wires
11
+ * the writer; Phase 5 wires the reader. Both `satisfies` against this
12
+ * single source-of-truth schema.
13
+ *
14
+ * `hallucinationCheckedAgainst` is REQUIRED (Pitfall #11): consumers
15
+ * must be able to audit citation grounding without re-deriving the
16
+ * resolvable-set. The canonical task field is `contextDocs`; do NOT
17
+ * invent `expectedDocs` / `usedDocs` synonyms.
18
+ *
19
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
20
+ * @see docs/decisions/D0049-shared-confidence-contract.md
21
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
22
+ * @see docs/design-docs/actionability-ladder/04-per-document-attribution-ensemble.md
23
+ */
24
+ import { z } from "zod";
25
+ import { ConfidenceSchema } from "../../_vendor/ailf-core/schemas/index.js";
26
+ const DocAttributionSchema = z.object({
27
+ documentId: z.string().min(1),
28
+ slug: z.string().optional(),
29
+ score: z.number().min(0).max(1),
30
+ signals: z.object({
31
+ citation: z.number().min(0).max(1).optional(),
32
+ canonical: z.number().min(0).max(1).optional(),
33
+ retrieved: z.number().min(0).max(1).optional(),
34
+ }),
35
+ confidence: ConfidenceSchema,
36
+ });
37
+ /**
38
+ * Canonical schema for {@link JudgmentAttribution}. Persisted at
39
+ * `runs/{runId}/attribution/{entryKey}.json` (Phase 4) and parsed by
40
+ * the diagnosis runner on read (Phase 5).
41
+ */
42
+ export const JudgmentAttributionSchema = z.object({
43
+ judgmentRef: z.string().min(1),
44
+ taskId: z.string().min(1),
45
+ modelId: z.string().min(1),
46
+ dimension: z.string().min(1),
47
+ attributions: z.array(DocAttributionSchema),
48
+ hallucinationCheckedAgainst: z.array(z.string()),
49
+ });
@@ -125,6 +125,7 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
125
125
  noCache: config.noCache ?? false,
126
126
  noRemoteCache: config.noRemoteCache ?? false,
127
127
  graderReplications: config.execution?.graderReplications,
128
+ borderlineReplications: config.execution?.borderlineReplications,
128
129
  graderContext: config.grader?.context,
129
130
  urls: config.urls,
130
131
  headers: config.agentic?.headers,
@@ -0,0 +1,10 @@
1
+ /**
2
+ * grader-outputs adapter barrel — named re-exports only (W0124 / D0045).
3
+ *
4
+ * The grader-output schema lives here so it enters the D0045
5
+ * `pnpm check-trust-boundary-satisfies` SCAN_ROOTS gate.
6
+ */
7
+ export { GraderJudgmentSchema, graderJudgmentsVersion, } from "./promptfoo-grader-output.js";
8
+ export type { GraderJudgment } from "../../_vendor/ailf-core/index.d.ts";
9
+ export { LegacyGraderJudgmentSchema } from "./legacy/index.js";
10
+ export type { LegacyGraderJudgment } from "../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,8 @@
1
+ /**
2
+ * grader-outputs adapter barrel — named re-exports only (W0124 / D0045).
3
+ *
4
+ * The grader-output schema lives here so it enters the D0045
5
+ * `pnpm check-trust-boundary-satisfies` SCAN_ROOTS gate.
6
+ */
7
+ export { GraderJudgmentSchema, graderJudgmentsVersion, } from "./promptfoo-grader-output.js";
8
+ export { LegacyGraderJudgmentSchema } from "./legacy/index.js";
@@ -0,0 +1,11 @@
1
+ /**
2
+ * legacy grader-outputs adapter sub-barrel — named re-exports only
3
+ * (W0124 / D0045).
4
+ *
5
+ * Read-only schema for the Phase 1 free-prose grader-output shape,
6
+ * invoked only by historical-report rendering paths through Phase 7
7
+ * (GRAD-06 cutover). The schema lives here so it enters the D0045
8
+ * `pnpm check-trust-boundary-satisfies` SCAN_ROOTS gate.
9
+ */
10
+ export { LegacyGraderJudgmentSchema } from "./promptfoo-grader-output-legacy.js";
11
+ export type { LegacyGraderJudgment } from "../../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,10 @@
1
+ /**
2
+ * legacy grader-outputs adapter sub-barrel — named re-exports only
3
+ * (W0124 / D0045).
4
+ *
5
+ * Read-only schema for the Phase 1 free-prose grader-output shape,
6
+ * invoked only by historical-report rendering paths through Phase 7
7
+ * (GRAD-06 cutover). The schema lives here so it enters the D0045
8
+ * `pnpm check-trust-boundary-satisfies` SCAN_ROOTS gate.
9
+ */
10
+ export { LegacyGraderJudgmentSchema } from "./promptfoo-grader-output-legacy.js";
@@ -0,0 +1,49 @@
1
+ /**
2
+ * promptfoo-grader-output-legacy.ts — Zod schema for the Phase 1
3
+ * free-prose grader-output shape, used by historical-report rendering
4
+ * paths.
5
+ *
6
+ * READ-ONLY: invoked only by historical-report rendering paths through
7
+ * Phase 7 (GRAD-06 cutover). Reports are immutable events — once a
8
+ * Report is written to Content Lake, the structured grader-judgment
9
+ * shape it captures cannot be back-filled. The legacy schema exists so
10
+ * pre-Phase-3 reports continue to deserialize cleanly.
11
+ *
12
+ * Live grader output that fails the strict {@link GraderJudgmentSchema}
13
+ * parse must NOT fall back to this schema. Drop to
14
+ * `failureMode: "unclassified"` instead. Strict and legacy schemas are
15
+ * deliberate siblings, not a legacy/canonical pair to consolidate.
16
+ *
17
+ * The schema asserts `satisfies z.ZodType<LegacyGraderJudgment>` against
18
+ * the canonical domain type in
19
+ * `packages/core/src/types/legacy-grader-judgment.ts` (D0045 / W0187) —
20
+ * drift between schema and type is a build error. The domain type is
21
+ * authored independently in `@sanity/ailf-core`; this file authors ONLY
22
+ * the schema and never derives the domain type from the schema itself
23
+ * (no schema-derived self-reference allowed by D0045).
24
+ *
25
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
26
+ * @see ../promptfoo-grader-output.ts — the strict (live-path) sibling
27
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
28
+ * §"Backwards compatibility"
29
+ */
30
+ import { z } from "zod";
31
+ /**
32
+ * Canonical schema for {@link LegacyGraderJudgment}. Mirrors the Phase 1
33
+ * superset core (`taskId`, `modelId`, `dimension`, `reason`, `score`,
34
+ * optional `outputFailure`). NO GRAD-02 additive fields — those are by
35
+ * construction absent on pre-Phase-3 output.
36
+ *
37
+ * Intentionally NOT `.strict()` — pre-Phase-3 reports may carry stray
38
+ * keys; the legacy parser tolerates them so historical-report rendering
39
+ * keeps working through the GRAD-06 cutover.
40
+ */
41
+ export declare const LegacyGraderJudgmentSchema: z.ZodObject<{
42
+ taskId: z.ZodString;
43
+ modelId: z.ZodString;
44
+ dimension: z.ZodString;
45
+ reason: z.ZodString;
46
+ score: z.ZodNumber;
47
+ outputFailure: z.ZodOptional<z.ZodBoolean>;
48
+ }, z.core.$strip>;
49
+ export type { LegacyGraderJudgment } from "../../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,48 @@
1
+ /**
2
+ * promptfoo-grader-output-legacy.ts — Zod schema for the Phase 1
3
+ * free-prose grader-output shape, used by historical-report rendering
4
+ * paths.
5
+ *
6
+ * READ-ONLY: invoked only by historical-report rendering paths through
7
+ * Phase 7 (GRAD-06 cutover). Reports are immutable events — once a
8
+ * Report is written to Content Lake, the structured grader-judgment
9
+ * shape it captures cannot be back-filled. The legacy schema exists so
10
+ * pre-Phase-3 reports continue to deserialize cleanly.
11
+ *
12
+ * Live grader output that fails the strict {@link GraderJudgmentSchema}
13
+ * parse must NOT fall back to this schema. Drop to
14
+ * `failureMode: "unclassified"` instead. Strict and legacy schemas are
15
+ * deliberate siblings, not a legacy/canonical pair to consolidate.
16
+ *
17
+ * The schema asserts `satisfies z.ZodType<LegacyGraderJudgment>` against
18
+ * the canonical domain type in
19
+ * `packages/core/src/types/legacy-grader-judgment.ts` (D0045 / W0187) —
20
+ * drift between schema and type is a build error. The domain type is
21
+ * authored independently in `@sanity/ailf-core`; this file authors ONLY
22
+ * the schema and never derives the domain type from the schema itself
23
+ * (no schema-derived self-reference allowed by D0045).
24
+ *
25
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
26
+ * @see ../promptfoo-grader-output.ts — the strict (live-path) sibling
27
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
28
+ * §"Backwards compatibility"
29
+ */
30
+ import { z } from "zod";
31
+ /**
32
+ * Canonical schema for {@link LegacyGraderJudgment}. Mirrors the Phase 1
33
+ * superset core (`taskId`, `modelId`, `dimension`, `reason`, `score`,
34
+ * optional `outputFailure`). NO GRAD-02 additive fields — those are by
35
+ * construction absent on pre-Phase-3 output.
36
+ *
37
+ * Intentionally NOT `.strict()` — pre-Phase-3 reports may carry stray
38
+ * keys; the legacy parser tolerates them so historical-report rendering
39
+ * keeps working through the GRAD-06 cutover.
40
+ */
41
+ export const LegacyGraderJudgmentSchema = z.object({
42
+ taskId: z.string().min(1),
43
+ modelId: z.string().min(1),
44
+ dimension: z.string().min(1),
45
+ reason: z.string(),
46
+ score: z.number(),
47
+ outputFailure: z.boolean().optional(),
48
+ });
@@ -0,0 +1,102 @@
1
+ /**
2
+ * promptfoo-grader-output.ts — Zod schema for the structured grader output
3
+ * (GRAD-02) emitted by the promptfoo grader process and consumed by the
4
+ * eval pipeline.
5
+ *
6
+ * The schema asserts `satisfies z.ZodType<GraderJudgment>` against the
7
+ * canonical domain type in `packages/core/src/types/grader-judgment.ts`
8
+ * (D0045 / W0187) — drift between schema and type is a build error.
9
+ * The domain type was authored independently in Plan 01-01; this file
10
+ * authors ONLY the schema and never derives the domain type from the
11
+ * schema itself (no schema-derived self-reference allowed by D0045).
12
+ *
13
+ * `graderJudgmentsVersion` is co-located with the schema (VER-01 D-02 —
14
+ * source-of-truth file owns its version constant). Bumped by hand when
15
+ * the grader rubric, prompt template, or judgment shape changes.
16
+ *
17
+ * Phase 3 will replace the inline `JSON.parse` at
18
+ * `pipeline/calculate-scores.ts:380-392` (Pitfall #4) so all grader
19
+ * output flows through this schema.
20
+ *
21
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
22
+ * @see docs/decisions/D0049-shared-confidence-contract.md
23
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
24
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
25
+ */
26
+ import { z } from "zod";
27
+ /**
28
+ * VER-01 D-02 — co-located version constant. Bumped by hand when the
29
+ * grader rubric, prompt template, or judgment shape changes in a way
30
+ * that should invalidate cached Diagnoses.
31
+ *
32
+ * Phase 3 GRAD-05 bumped this from `"0.1.0"` to `"1.0.0"` (semver
33
+ * major) — the additive GRAD-02 surface is now required + the schema
34
+ * is `.strict()`. AILF has no installed external base; the legacy
35
+ * parser at `./legacy/promptfoo-grader-output-legacy.ts` is the named
36
+ * consumer for already-stored historical reports.
37
+ */
38
+ export declare const graderJudgmentsVersion = "1.0.0";
39
+ /**
40
+ * Canonical schema for {@link GraderJudgment}. Required fields mirror
41
+ * the existing pipeline core (Doc 03 §"existing, unchanged"):
42
+ * `taskId`, `modelId`, `dimension`, `reason`, `score`. Phase 3 GRAD-05
43
+ * has tightened the additive surface to required and added `.strict()`
44
+ * — the schema rejects unknown fields (defense-in-depth against future
45
+ * prompt-injection attempts that try to smuggle keys through the
46
+ * grader emission).
47
+ *
48
+ * Branded `JudgmentId` is represented at runtime by a non-empty string;
49
+ * the schema routes the brand through `brandedString<"JudgmentId">()`
50
+ * — the project's single audited cast site for branded-string
51
+ * schemas (project typescript rule: no `as` on `unknown`).
52
+ */
53
+ export declare const GraderJudgmentSchema: z.ZodObject<{
54
+ taskId: z.ZodString;
55
+ modelId: z.ZodString;
56
+ dimension: z.ZodString;
57
+ reason: z.ZodString;
58
+ score: z.ZodNumber;
59
+ outputFailure: z.ZodOptional<z.ZodBoolean>;
60
+ judgmentId: z.ZodType<import("@sanity/ailf-core").Brand<string, "JudgmentId">, unknown, z.core.$ZodTypeInternals<import("@sanity/ailf-core").Brand<string, "JudgmentId">, unknown>>;
61
+ subJudgments: z.ZodArray<z.ZodObject<{
62
+ criterionId: z.ZodString;
63
+ met: z.ZodBoolean;
64
+ evidence: z.ZodString;
65
+ confidence: z.ZodObject<{
66
+ level: z.ZodEnum<{
67
+ low: "low";
68
+ medium: "medium";
69
+ high: "high";
70
+ }>;
71
+ signalsPresent: z.ZodNumber;
72
+ derivation: z.ZodString;
73
+ }, z.core.$strip>;
74
+ }, z.core.$strip>>;
75
+ docCitations: z.ZodArray<z.ZodObject<{
76
+ documentId: z.ZodString;
77
+ slug: z.ZodOptional<z.ZodString>;
78
+ role: z.ZodEnum<{
79
+ supports: "supports";
80
+ contradicts: "contradicts";
81
+ missing: "missing";
82
+ irrelevant: "irrelevant";
83
+ }>;
84
+ hallucinated: z.ZodOptional<z.ZodBoolean>;
85
+ }, z.core.$strip>>;
86
+ failureMode: z.ZodString;
87
+ confidence: z.ZodObject<{
88
+ level: z.ZodEnum<{
89
+ low: "low";
90
+ medium: "medium";
91
+ high: "high";
92
+ }>;
93
+ signalsPresent: z.ZodNumber;
94
+ derivation: z.ZodString;
95
+ }, z.core.$strip>;
96
+ hallucinationCheckedAgainst: z.ZodArray<z.ZodString>;
97
+ metadata: z.ZodObject<{
98
+ graderModel: z.ZodString;
99
+ graderJudgmentsVersion: z.ZodString;
100
+ }, z.core.$strip>;
101
+ }, z.core.$strict>;
102
+ export type { GraderJudgment } from "../../_vendor/ailf-core/index.d.ts";