@sanity/ailf 4.6.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/canonical/grader-references/agent-harness-tools.yaml +42 -0
  2. package/canonical/grader-references/knowledge-probe-recall.yaml +36 -0
  3. package/canonical/grader-references/mcp-server-spec.yaml +51 -0
  4. package/canonical/grader-references/portable-text.yaml +48 -0
  5. package/config/diagnosis-cards.ts +318 -0
  6. package/config/models.ts +12 -0
  7. package/config/rubrics.ts +38 -2
  8. package/dist/_vendor/ailf-core/artifact-registry.d.ts +60 -2
  9. package/dist/_vendor/ailf-core/artifact-registry.js +288 -7
  10. package/dist/_vendor/ailf-core/examples/index.d.ts +125 -26
  11. package/dist/_vendor/ailf-core/examples/index.js +146 -47
  12. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.d.ts +13 -0
  13. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.js +16 -0
  14. package/dist/_vendor/ailf-core/grader/failure-modes/common.d.ts +14 -0
  15. package/dist/_vendor/ailf-core/grader/failure-modes/common.js +18 -0
  16. package/dist/_vendor/ailf-core/grader/failure-modes/index.d.ts +45 -0
  17. package/dist/_vendor/ailf-core/grader/failure-modes/index.js +109 -0
  18. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.d.ts +13 -0
  19. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.js +17 -0
  20. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.d.ts +13 -0
  21. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.js +17 -0
  22. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.d.ts +13 -0
  23. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.js +17 -0
  24. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  25. package/dist/_vendor/ailf-core/index.js +4 -0
  26. package/dist/_vendor/ailf-core/ports/context.d.ts +8 -0
  27. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +15 -0
  28. package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +40 -0
  29. package/dist/_vendor/ailf-core/schemas/branded-string.js +45 -0
  30. package/dist/_vendor/ailf-core/schemas/confidence-schema.d.ts +36 -0
  31. package/dist/_vendor/ailf-core/schemas/confidence-schema.js +32 -0
  32. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  33. package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -4
  34. package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
  35. package/dist/_vendor/ailf-core/schemas/index.js +9 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +34 -8
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +23 -1
  40. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.d.ts +41 -0
  41. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.js +40 -0
  42. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.d.ts +7 -0
  43. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.js +131 -0
  44. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.d.ts +7 -0
  45. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.js +171 -0
  46. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.d.ts +7 -0
  47. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.js +155 -0
  48. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.d.ts +17 -0
  49. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.js +43 -0
  50. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.d.ts +46 -0
  51. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.js +104 -0
  52. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.d.ts +28 -0
  53. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.js +96 -0
  54. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.d.ts +39 -0
  55. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.js +52 -0
  56. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.d.ts +27 -0
  57. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.js +77 -0
  58. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.d.ts +32 -0
  59. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.js +71 -0
  60. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.d.ts +44 -0
  61. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.js +126 -0
  62. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.d.ts +41 -0
  63. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +107 -0
  64. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.d.ts +43 -0
  65. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.js +114 -0
  66. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.d.ts +72 -0
  67. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.js +273 -0
  68. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.d.ts +17 -0
  69. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.js +58 -0
  70. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.d.ts +10 -0
  71. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.js +10 -0
  72. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.d.ts +15 -0
  73. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.js +53 -0
  74. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.d.ts +14 -0
  75. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.js +63 -0
  76. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.d.ts +16 -0
  77. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.js +78 -0
  78. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.d.ts +16 -0
  79. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.js +86 -0
  80. package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +50 -0
  81. package/dist/_vendor/ailf-core/services/diagnosis/registry.js +35 -0
  82. package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +136 -0
  83. package/dist/_vendor/ailf-core/services/diagnosis-runner.js +153 -0
  84. package/dist/_vendor/ailf-core/services/index.d.ts +6 -0
  85. package/dist/_vendor/ailf-core/services/index.js +18 -0
  86. package/dist/_vendor/ailf-core/services/llm-client-factory.d.ts +64 -0
  87. package/dist/_vendor/ailf-core/services/llm-client-factory.js +54 -0
  88. package/dist/_vendor/ailf-core/services/report-to-markdown.js +3 -2
  89. package/dist/_vendor/ailf-core/types/attribution.d.ts +82 -0
  90. package/dist/_vendor/ailf-core/types/attribution.js +18 -0
  91. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +26 -1
  92. package/dist/_vendor/ailf-core/types/branded-ids.js +80 -4
  93. package/dist/_vendor/ailf-core/types/confidence.d.ts +1 -1
  94. package/dist/_vendor/ailf-core/types/confidence.js +7 -0
  95. package/dist/_vendor/ailf-core/types/diagnosis.d.ts +271 -0
  96. package/dist/_vendor/ailf-core/types/diagnosis.js +19 -0
  97. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +16 -1
  98. package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +125 -0
  99. package/dist/_vendor/ailf-core/types/grader-judgment.js +30 -0
  100. package/dist/_vendor/ailf-core/types/index.d.ts +80 -29
  101. package/dist/_vendor/ailf-core/types/index.js +15 -1
  102. package/dist/_vendor/ailf-core/types/legacy-grader-judgment.d.ts +55 -0
  103. package/dist/_vendor/ailf-core/types/legacy-grader-judgment.js +30 -0
  104. package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
  105. package/dist/_vendor/ailf-core/types/repo-config.d.ts +8 -0
  106. package/dist/_vendor/ailf-shared/document-ref.d.ts +1 -1
  107. package/dist/adapters/api-client/build-request.d.ts +1 -0
  108. package/dist/adapters/api-client/build-request.js +3 -0
  109. package/dist/adapters/attribution/attribution-meta-writer.d.ts +35 -0
  110. package/dist/adapters/attribution/attribution-meta-writer.js +34 -0
  111. package/dist/adapters/attribution/index.d.ts +9 -0
  112. package/dist/adapters/attribution/index.js +8 -0
  113. package/dist/adapters/attribution/per-entry-attribution-writer.d.ts +56 -0
  114. package/dist/adapters/attribution/per-entry-attribution-writer.js +49 -0
  115. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  116. package/dist/adapters/grader-outputs/index.d.ts +10 -0
  117. package/dist/adapters/grader-outputs/index.js +8 -0
  118. package/dist/adapters/grader-outputs/legacy/index.d.ts +11 -0
  119. package/dist/adapters/grader-outputs/legacy/index.js +10 -0
  120. package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.d.ts +49 -0
  121. package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.js +48 -0
  122. package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +102 -0
  123. package/dist/adapters/grader-outputs/promptfoo-grader-output.js +93 -0
  124. package/dist/adapters/index.d.ts +3 -0
  125. package/dist/adapters/index.js +4 -0
  126. package/dist/adapters/llm/fake-llm-client.d.ts +20 -0
  127. package/dist/adapters/llm/fake-llm-client.js +38 -1
  128. package/dist/adapters/llm/openai-llm-client.js +52 -3
  129. package/dist/adapters/task-sources/content-lake-task-source.d.ts +5 -1
  130. package/dist/adapters/task-sources/content-lake-task-source.js +28 -2
  131. package/dist/adapters/task-sources/repo-schemas.d.ts +79 -11
  132. package/dist/adapters/task-sources/repo-schemas.js +19 -2
  133. package/dist/cli-program.js +3 -0
  134. package/dist/commands/calculate-scores.js +1 -1
  135. package/dist/commands/explain-handler.js +1 -1
  136. package/dist/commands/interpret.d.ts +50 -0
  137. package/dist/commands/interpret.js +212 -0
  138. package/dist/commands/lookup-doc.d.ts +1 -1
  139. package/dist/commands/lookup-doc.js +3 -3
  140. package/dist/commands/pipeline-action.d.ts +6 -0
  141. package/dist/commands/pipeline-action.js +2 -0
  142. package/dist/commands/remote-pipeline.js +1 -0
  143. package/dist/composition-root.d.ts +57 -23
  144. package/dist/composition-root.js +155 -41
  145. package/dist/config/diagnosis-cards.ts +318 -0
  146. package/dist/config/models.ts +12 -0
  147. package/dist/config/rubrics.ts +38 -2
  148. package/dist/grader/agent-harness.d.ts +9 -0
  149. package/dist/grader/agent-harness.js +9 -0
  150. package/dist/grader/common.d.ts +9 -0
  151. package/dist/grader/common.js +9 -0
  152. package/dist/grader/index.d.ts +24 -0
  153. package/dist/grader/index.js +24 -0
  154. package/dist/grader/knowledge-probe.d.ts +9 -0
  155. package/dist/grader/knowledge-probe.js +9 -0
  156. package/dist/grader/literacy.d.ts +9 -0
  157. package/dist/grader/literacy.js +9 -0
  158. package/dist/grader/mcp.d.ts +9 -0
  159. package/dist/grader/mcp.js +9 -0
  160. package/dist/orchestration/build-app-context.js +1 -0
  161. package/dist/orchestration/build-step-sequence.js +5 -0
  162. package/dist/orchestration/steps/calculate-scores-step.js +23 -1
  163. package/dist/orchestration/steps/compute-attribution-step.d.ts +44 -0
  164. package/dist/orchestration/steps/compute-attribution-step.js +279 -0
  165. package/dist/orchestration/steps/gap-analysis-step.js +35 -7
  166. package/dist/orchestration/steps/index.d.ts +1 -0
  167. package/dist/orchestration/steps/index.js +1 -0
  168. package/dist/pipeline/attribution.d.ts +15 -0
  169. package/dist/pipeline/attribution.js +18 -9
  170. package/dist/pipeline/borderline-consensus-runner.d.ts +63 -0
  171. package/dist/pipeline/borderline-consensus-runner.js +124 -0
  172. package/dist/pipeline/borderline-detector.d.ts +24 -0
  173. package/dist/pipeline/borderline-detector.js +26 -0
  174. package/dist/pipeline/calculate-scores.d.ts +114 -3
  175. package/dist/pipeline/calculate-scores.js +426 -24
  176. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  177. package/dist/pipeline/compiler/literacy-bridge.js +35 -17
  178. package/dist/pipeline/compiler/rubric-resolution.d.ts +15 -0
  179. package/dist/pipeline/compiler/rubric-resolution.js +9 -1
  180. package/dist/pipeline/compute-attribution.d.ts +80 -0
  181. package/dist/pipeline/compute-attribution.js +196 -0
  182. package/dist/pipeline/failure-modes.d.ts +52 -17
  183. package/dist/pipeline/failure-modes.js +178 -117
  184. package/dist/pipeline/map-request-to-config.js +1 -0
  185. package/package.json +7 -5
@@ -0,0 +1,125 @@
1
+ /**
2
+ * GraderJudgment core domain types — canonical shapes for structured
3
+ * grader output (Doc 03, GRAD-02).
4
+ *
5
+ * Authored INDEPENDENTLY of any Zod schema (D0045 doctrine — Plan 02's
6
+ * `GraderJudgmentSchema` `satisfies` against this type, not the other
7
+ * way around). A tautological `satisfies z.ZodType<z.infer<typeof
8
+ * GraderJudgmentSchema>>` is forbidden.
9
+ *
10
+ * Phase 1 retained the existing pipeline core (`taskId`, `modelId`,
11
+ * `dimension`, `reason`, `score`, `outputFailure?`) as required for
12
+ * backward compat with Phase 0 callers (Doc 03 §"existing, unchanged")
13
+ * and added the GRAD-02 additive fields (`judgmentId`, `subJudgments`,
14
+ * `docCitations`, `failureMode`, `confidence`,
15
+ * `hallucinationCheckedAgainst`, `metadata`) as additive in Phase 1;
16
+ * required from Phase 3 GRAD-05.
17
+ *
18
+ * Phase 3 GRAD-05 has flipped the additive fields to required (this
19
+ * file) and the corresponding Zod schema in
20
+ * `packages/eval/src/adapters/grader-outputs/promptfoo-grader-output.ts`
21
+ * is `.strict()` with `graderJudgmentsVersion = "1.0.0"`. The
22
+ * read-only legacy parser at `…/legacy/promptfoo-grader-output-legacy.ts`
23
+ * (against `LegacyGraderJudgment`) is the named consumer for already-
24
+ * stored historical reports through GRAD-06 cutover.
25
+ *
26
+ * @see docs/decisions/D0049-shared-confidence-contract.md
27
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
28
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
29
+ */
30
+ import type { JudgmentId } from "./branded-ids.js";
31
+ import type { Confidence } from "./confidence.js";
32
+ /**
33
+ * Role enum for doc citations attached to a grader judgment (GRAD-02).
34
+ * Closed string-literal union — Phase 3 may extend.
35
+ */
36
+ export type DocCitationRole = "supports" | "contradicts" | "missing" | "irrelevant";
37
+ /**
38
+ * A single doc the grader cited while reasoning. `documentId` is the
39
+ * canonical D0052 reference (id, not slug); `slug` is a human-readable
40
+ * annotation only. `hallucinated` is set true at adapter time when the
41
+ * `slug` does not resolve against the task's `contextDocs` set.
42
+ */
43
+ export interface DocCitation {
44
+ /** Canonical D0052 document ref (id, not slug). */
45
+ documentId: string;
46
+ /** Optional human-readable annotation. Never the identity. */
47
+ slug?: string;
48
+ role: DocCitationRole;
49
+ /** True when `slug` is not in the resolvable-set. */
50
+ hallucinated?: boolean;
51
+ }
52
+ /**
53
+ * Per-criterion sub-judgment — one entry per task-criterion bullet
54
+ * (Doc 03 §"per-criterion sub-judgments"). The `criterionId` is the
55
+ * stable identifier declared on the task's `criteria` array (Phase 2
56
+ * GRAD-01 schema-sync), not synthesized at grade time.
57
+ */
58
+ export interface CriterionSubJudgment {
59
+ /** Stable criterion identifier — matches `CriterionRef.id` from the task definition (D0052). */
60
+ criterionId: string;
61
+ met: boolean;
62
+ /** ≤280 chars — quote or paraphrase. */
63
+ evidence: string;
64
+ /** Grader self-confidence on this single criterion (D0049). */
65
+ confidence: Confidence;
66
+ }
67
+ /**
68
+ * The structured grader judgment — Phase 3 GRAD-05 shape.
69
+ *
70
+ * Existing pipeline core (Doc 03 §"existing, unchanged"): `taskId`,
71
+ * `modelId`, `dimension`, `reason`, `score`. The pre-existing
72
+ * `outputFailure?` remains optional. The
73
+ * `contextDocs? (legacy alias: canonicalDocs)` annotation (StoredJudgment
74
+ * extension) lives on the storage extension type, not here.
75
+ *
76
+ * Additive in Phase 1; required from Phase 3 GRAD-05: `judgmentId`,
77
+ * `subJudgments`, `docCitations`, `failureMode`, `confidence`,
78
+ * `hallucinationCheckedAgainst`, `metadata`. The corresponding Zod
79
+ * schema in `packages/eval/src/adapters/grader-outputs/promptfoo-grader-output.ts`
80
+ * is `.strict()` with `graderJudgmentsVersion = "1.0.0"`.
81
+ */
82
+ export interface GraderJudgment {
83
+ /** Rubric template name (e.g. "task-completion", "code-correctness"). */
84
+ dimension: string;
85
+ /** The model that produced the response being graded. */
86
+ modelId: string;
87
+ /**
88
+ * True when the model failed to produce meaningful output (empty
89
+ * response, API error, or refusal). Distinguishes infrastructure
90
+ * failures from genuinely incorrect responses — a score of 0 from no
91
+ * output is fundamentally different from a score of 0 from wrong
92
+ * output.
93
+ */
94
+ outputFailure?: boolean;
95
+ /** The grader's natural-language reasoning. */
96
+ reason: string;
97
+ /** Numeric score in [0, 100] (normalized). */
98
+ score: number;
99
+ /** The task this judgment belongs to. */
100
+ taskId: string;
101
+ /**
102
+ * D0052 granular branded id. Required from Phase 3 GRAD-05 — every
103
+ * grader emission carries one.
104
+ */
105
+ judgmentId: JudgmentId;
106
+ /** Per-criterion sub-judgments. */
107
+ subJudgments: CriterionSubJudgment[];
108
+ /** Doc citations with role + hallucinated flag. */
109
+ docCitations: DocCitation[];
110
+ /**
111
+ * Per-dimension failure mode. Phase 3 GRAD-03 stamps the taxonomy
112
+ * literal at the runtime grader-prompt; the value is a free-form
113
+ * string for forward compat with future taxonomy extensions.
114
+ */
115
+ failureMode: string;
116
+ /** Grader self-confidence per D0049. */
117
+ confidence: Confidence;
118
+ /** Hallucination cross-check (Pitfall #11) — union of task.context.docs and run.documentManifest. */
119
+ hallucinationCheckedAgainst: string[];
120
+ /** Metadata about the grader run. */
121
+ metadata: {
122
+ graderModel: string;
123
+ graderJudgmentsVersion: string;
124
+ };
125
+ }
@@ -0,0 +1,30 @@
1
+ /**
2
+ * GraderJudgment core domain types — canonical shapes for structured
3
+ * grader output (Doc 03, GRAD-02).
4
+ *
5
+ * Authored INDEPENDENTLY of any Zod schema (D0045 doctrine — Plan 02's
6
+ * `GraderJudgmentSchema` `satisfies` against this type, not the other
7
+ * way around). A tautological `satisfies z.ZodType<z.infer<typeof
8
+ * GraderJudgmentSchema>>` is forbidden.
9
+ *
10
+ * Phase 1 retained the existing pipeline core (`taskId`, `modelId`,
11
+ * `dimension`, `reason`, `score`, `outputFailure?`) as required for
12
+ * backward compat with Phase 0 callers (Doc 03 §"existing, unchanged")
13
+ * and added the GRAD-02 additive fields (`judgmentId`, `subJudgments`,
14
+ * `docCitations`, `failureMode`, `confidence`,
15
+ * `hallucinationCheckedAgainst`, `metadata`) as additive in Phase 1;
16
+ * required from Phase 3 GRAD-05.
17
+ *
18
+ * Phase 3 GRAD-05 has flipped the additive fields to required (this
19
+ * file) and the corresponding Zod schema in
20
+ * `packages/eval/src/adapters/grader-outputs/promptfoo-grader-output.ts`
21
+ * is `.strict()` with `graderJudgmentsVersion = "1.0.0"`. The
22
+ * read-only legacy parser at `…/legacy/promptfoo-grader-output-legacy.ts`
23
+ * (against `LegacyGraderJudgment`) is the named consumer for already-
24
+ * stored historical reports through GRAD-06 cutover.
25
+ *
26
+ * @see docs/decisions/D0049-shared-confidence-contract.md
27
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
28
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
29
+ */
30
+ export {};
@@ -13,6 +13,7 @@ import type { DocumentRef as _DocumentRef, EvalMode, RunContext } from "../../ai
13
13
  import type { ArtifactType } from "../artifact-registry.js";
14
14
  import type { SymbolPreflightReport } from "./symbol-preflight-report.js";
15
15
  import type { AssociationValues, RunId } from "./branded-ids.js";
16
+ import type { GraderJudgment } from "./grader-judgment.js";
16
17
  export type { ActualScoreEntry, ComponentResult, TestResult, UrlMetadata, } from "./scoring-input.js";
17
18
  export type { DocumentRef, RunContext, RunTrigger } from "../../ailf-shared/index.d.ts";
18
19
  export type { StoredBaseline, StoredReport, StoredRun, StoredTaskResult, StoredTrace, SchemaVersioned, } from "./storage-schema.js";
@@ -32,9 +33,13 @@ export type { SymbolPreflightDeduction, SymbolPreflightFinding, SymbolPreflightR
32
33
  export { DEFAULT_PREFLIGHT_CODE_CORRECTNESS_WEIGHT, type PreflightRubricContext, type PreflightScoringConfig, } from "./preflight-scoring.js";
33
34
  export type { Confidence, ConfidenceDerivation } from "./confidence.js";
34
35
  export { CONVENTIONAL_DERIVATIONS, isConfidence } from "./confidence.js";
35
- export type { ArtifactId, AssociationAxis, AssociationValues, Brand, EntryKey, Err, FixtureId, IdValidationError, NewReportId, Ok, ProviderId, PromptId, Result, ResultId, RubricId, RunFingerprint, RunId, SuiteId, TaskId, TaskSlug, TraceId, } from "./branded-ids.js";
36
- export { err, fixtureId, generateRunId, ok, providerId, resultId, runId, suiteId, taskId, traceId, } from "./branded-ids.js";
37
- export type { AgentHarnessTaskDefinition, ContentLakeAuthorableMode, ContentLakeAuthorableTask, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PromptVars, PathDocRef, PerspectiveDocRef, ReservedPromptVarKey, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./generalized-task.js";
36
+ export type { ArtifactId, AssociationAxis, AssociationValues, Brand, EntryKey, Err, FixtureId, IdValidationError, JudgmentId, NewReportId, Ok, ProviderId, PromptId, Result, ResultId, RubricId, RunFingerprint, RunId, SuiteId, TaskId, TaskSlug, TraceId, } from "./branded-ids.js";
37
+ export { err, fixtureId, generateJudgmentId, generateRunId, judgmentId, ok, providerId, resultId, runId, suiteId, taskId, traceId, } from "./branded-ids.js";
38
+ export type { AgentHarnessTaskDefinition, ContentLakeAuthorableMode, ContentLakeAuthorableTask, CriterionRef, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PromptVars, PathDocRef, PerspectiveDocRef, ReservedPromptVarKey, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./generalized-task.js";
39
+ export type { ActionSuggestion, AreaSummaryBody, CardMeta, CardType, Diagnosis, DiagnosisCard, DocAttributionSpotlightBody, FailureModeSummaryBody, JudgmentRef, LowConfidenceAttributionBody, NoIssuesBody, RegressionVsBaselineBody, TopRecommendationsBody, VersionedInputs, WeakestAreaBody, } from "./diagnosis.js";
40
+ export type { AttributionMeta, DocAttribution, JudgmentAttribution, } from "./attribution.js";
41
+ export type { CriterionSubJudgment, DocCitation, DocCitationRole, GraderJudgment, } from "./grader-judgment.js";
42
+ export type { LegacyGraderJudgment } from "./legacy-grader-judgment.js";
38
43
  type DocumentRef = _DocumentRef;
39
44
  /** Aggregated retrieval metrics for a feature area */
40
45
  export interface AreaRetrievalMetrics {
@@ -128,8 +133,31 @@ export interface FailureModeReport {
128
133
  /** Total judgments analyzed */
129
134
  totalJudgments: number;
130
135
  }
131
- /** Failure mode classification for a low-scoring judgment */
132
- export type FailureModeType = "api-error" | "incorrect-docs" | "missing-docs" | "model-limitation" | "outdated-docs" | "poor-structure" | "unclassified";
136
+ /**
137
+ * Failure mode classification for a low-scoring judgment.
138
+ *
139
+ * Open-set string (Plan 03-02 per-dimension taxonomies introduce modes
140
+ * outside the original literacy enum: `false-floor`, `spec-mismatch`,
141
+ * `tool-misuse`, `factual-error`, `hallucination`, etc. — the grader
142
+ * is told these are legal answers via the rubric prompt). The legacy
143
+ * literacy enum survives as `LegacyFailureModeType` for the
144
+ * report-aggregation helpers that need stable bucket ordering and
145
+ * icon tables; consumers that only care about presence/absence treat
146
+ * `FailureModeType` as `string`.
147
+ */
148
+ export type FailureModeType = string;
149
+ /**
150
+ * Closed enum of the original literacy failure modes — used by the
151
+ * report formatters that iterate buckets in a stable order. Adding to
152
+ * this list is a deliberate extension; modes outside it still flow
153
+ * through the report (per-area `modes` record), just without a
154
+ * pre-allocated bucket in `summary`.
155
+ */
156
+ export type LegacyFailureModeType = "api-error" | "incorrect-docs" | "missing-docs" | "model-limitation" | "outdated-docs" | "poor-structure" | "unclassified";
157
+ /** Set of canonical legacy modes — exported for report-formatter use. */
158
+ export declare const LEGACY_FAILURE_MODES: readonly LegacyFailureModeType[];
159
+ /** Type guard for legacy modes. */
160
+ export declare function isLegacyFailureMode(mode: string): mode is LegacyFailureModeType;
133
161
  /** Per-feature-area score breakdown */
134
162
  export interface FeatureScore {
135
163
  /**
@@ -261,30 +289,16 @@ export interface GapEstimate {
261
289
  /** Specific remediation description */
262
290
  remediation: string;
263
291
  }
264
- /** A single grader judgment one per assertion per test */
265
- export interface GraderJudgment {
266
- /** The rubric template used (task-completion, code-correctness, doc-coverage) */
267
- dimension: string;
268
- /** The model that produced the response being graded */
269
- modelId: string;
292
+ /** Enriched grader judgment with stored documentation context refs. */
293
+ export interface StoredJudgment extends GraderJudgment {
270
294
  /**
271
- * True when the model failed to produce meaningful output (empty response,
272
- * API error, or refusal). Distinguishes infrastructure failures from
273
- * genuinely incorrect responses a score of 0 from no output is
274
- * fundamentally different from a score of 0 from wrong output.
295
+ * Documentation context the task expected the model to use.
296
+ *
297
+ * Legacy alias `canonicalDocs` may appear on stored reports written
298
+ * before Phase 2 readers should tolerate both. Writers (the pipeline)
299
+ * always emit `contextDocs`.
275
300
  */
276
- outputFailure?: boolean;
277
- /** The grader's natural language reasoning */
278
- reason: string;
279
- /** The numeric score (0–100) */
280
- score: number;
281
- /** The task this judgment belongs to */
282
- taskId: string;
283
- }
284
- /** Enriched grader judgment with canonical doc references, stored in reports */
285
- export interface StoredJudgment extends GraderJudgment {
286
- /** Canonical docs that the task expected the model to use */
287
- canonicalDocs?: DocumentRef[];
301
+ contextDocs?: DocumentRef[];
288
302
  }
289
303
  /**
290
304
  * Per-test result stored in reports for drill-down and audit.
@@ -296,8 +310,11 @@ export interface StoredJudgment extends GraderJudgment {
296
310
  export interface StoredTestResult {
297
311
  /** Resolved feature area (from __featureArea or description) */
298
312
  area: string;
299
- /** Canonical docs the task expected the model to use */
300
- canonicalDocs?: DocumentRef[];
313
+ /**
314
+ * Documentation context the task expected the model to use.
315
+ * Legacy alias `canonicalDocs` may appear on pre-Phase-2 reports.
316
+ */
317
+ contextDocs?: DocumentRef[];
301
318
  /** Weighted composite score (gold variant only) */
302
319
  compositeScore?: number;
303
320
  /** Per-test cost (USD) */
@@ -349,6 +366,40 @@ export interface StoredTestResult {
349
366
  }
350
367
  /** Grader consistency diagnostics — does not affect scores, reported alongside */
351
368
  export interface GraderReliability {
369
+ /**
370
+ * Plan 03-03 — count of grader-emission vs ceiling cross-check disagreements.
371
+ *
372
+ * Incremented by the live pipeline when `validateFailureMode(...)` returns
373
+ * `level: "medium"` (the grader's emitted `failureMode` does not agree with
374
+ * the ceiling-decomposition mode). Surfaces calibration drift over time
375
+ * without affecting scores. Optional — undefined when the run did not
376
+ * exercise the failure-mode validator (e.g., grader-consistency-only paths).
377
+ *
378
+ * @see docs/decisions/D0049-shared-confidence-contract.md
379
+ */
380
+ failureModeCalibration?: number;
381
+ /**
382
+ * Plan 03-03 — count of strict-schema parse failures during grader-output
383
+ * extraction. Wired at the parse-fail branch in `extractGraderJudgments`;
384
+ * incremented when `GraderJudgmentSchema.safeParse` rejects a payload and
385
+ * the pipeline drops to the Phase 1 minimal-shape fallback.
386
+ *
387
+ * Plan 03-04 will tighten the strict schema (`.strict()` + GRAD-02 fields
388
+ * required) and graders will emit the structured wire format in earnest;
389
+ * this counter measures pre-hard-fail drift.
390
+ */
391
+ parseFailures?: number;
392
+ /**
393
+ * Phase 4 ATTR-01 — count of grader citations whose `slug` was not
394
+ * in the resolvable-set (`hallucinationCheckedAgainst`).
395
+ * Incremented by `computeJudgmentAttribution(...)` for every
396
+ * citation that fails the hallucination short-circuit (Success
397
+ * Criterion #5). A counter, not a ratio — consumers compute the
398
+ * rate by dividing by total-citations if needed.
399
+ *
400
+ * @see docs/decisions/D0049-shared-confidence-contract.md
401
+ */
402
+ hallucinationCount?: number;
352
403
  /** Inter-grader agreement (from multi-grader comparison) — Phase 3 */
353
404
  agreement?: {
354
405
  /** Models compared against the primary grader */
@@ -18,7 +18,21 @@ export { InMemoryPluginRegistry } from "./plugin-registry.js";
18
18
  export { evalModeType } from "./eval-mode-config.js";
19
19
  export { DEFAULT_PREFLIGHT_CODE_CORRECTNESS_WEIGHT, } from "./preflight-scoring.js";
20
20
  export { CONVENTIONAL_DERIVATIONS, isConfidence } from "./confidence.js";
21
- export { err, fixtureId, generateRunId, ok, providerId, resultId, runId, suiteId, taskId, traceId, } from "./branded-ids.js";
21
+ export { err, fixtureId, generateJudgmentId, generateRunId, judgmentId, ok, providerId, resultId, runId, suiteId, taskId, traceId, } from "./branded-ids.js";
22
+ /** Set of canonical legacy modes — exported for report-formatter use. */
23
+ export const LEGACY_FAILURE_MODES = [
24
+ "api-error",
25
+ "incorrect-docs",
26
+ "missing-docs",
27
+ "model-limitation",
28
+ "outdated-docs",
29
+ "poor-structure",
30
+ "unclassified",
31
+ ];
32
+ /** Type guard for legacy modes. */
33
+ export function isLegacyFailureMode(mode) {
34
+ return LEGACY_FAILURE_MODES.includes(mode);
35
+ }
22
36
  // ---------------------------------------------------------------------------
23
37
  // Comparison (Approach 2: structured comparison output)
24
38
  // ---------------------------------------------------------------------------
@@ -0,0 +1,55 @@
1
+ /**
2
+ * LegacyGraderJudgment — Phase 1 superset core only, with NO GRAD-02
3
+ * additive surface. Used by the read-only legacy parser at
4
+ * `packages/eval/src/adapters/grader-outputs/legacy/` for historical
5
+ * pre-Phase-3 reports.
6
+ *
7
+ * Reports are immutable events — once a Report is written to Content
8
+ * Lake the structured grader-judgment shape it captures cannot be
9
+ * back-filled. The legacy parser exists so historical-report rendering
10
+ * paths can keep deserializing pre-Phase-3 output through Phase 7
11
+ * (GRAD-06 cutover removes Studio's `reason`-only fallback rendering
12
+ * paths and the legacy adapter alongside).
13
+ *
14
+ * Authored INDEPENDENTLY of any Zod schema (D0045 doctrine — the
15
+ * legacy schema in
16
+ * `packages/eval/src/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.ts`
17
+ * `satisfies z.ZodType<LegacyGraderJudgment>` against this type, not
18
+ * the other way around). A tautological
19
+ * `satisfies z.ZodType<z.infer<typeof Schema>>` is forbidden.
20
+ *
21
+ * Invariant — live grader output that fails the strict
22
+ * `GraderJudgmentSchema` MUST NOT fall back to this schema. Drop to
23
+ * `failureMode: "unclassified"` instead. The legacy parser is invoked
24
+ * ONLY by historical-report rendering paths.
25
+ *
26
+ * @see ./grader-judgment.ts — the Phase 1+ structured shape (live path)
27
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
28
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
29
+ */
30
+ /**
31
+ * The Phase 1 free-prose grader judgment as historical reports captured
32
+ * it. Mirrors the existing-pipeline-core surface of {@link GraderJudgment}
33
+ * (the required fields) and the pre-existing optional `outputFailure`
34
+ * flag. NO GRAD-02 additive fields (`subJudgments`, `docCitations`,
35
+ * `failureMode`, `confidence`, `hallucinationCheckedAgainst`,
36
+ * `metadata`) — those are by construction absent on pre-Phase-3 output.
37
+ */
38
+ export interface LegacyGraderJudgment {
39
+ /** Rubric template name (e.g. "task-completion", "code-correctness"). */
40
+ dimension: string;
41
+ /** The model that produced the response being graded. */
42
+ modelId: string;
43
+ /**
44
+ * True when the model failed to produce meaningful output (empty
45
+ * response, API error, or refusal). Same semantics as
46
+ * {@link GraderJudgment.outputFailure}.
47
+ */
48
+ outputFailure?: boolean;
49
+ /** The grader's natural-language reasoning (free-prose Phase 1 shape). */
50
+ reason: string;
51
+ /** Numeric score in [0, 100] (normalized). */
52
+ score: number;
53
+ /** The task this judgment belongs to. */
54
+ taskId: string;
55
+ }
@@ -0,0 +1,30 @@
1
+ /**
2
+ * LegacyGraderJudgment — Phase 1 superset core only, with NO GRAD-02
3
+ * additive surface. Used by the read-only legacy parser at
4
+ * `packages/eval/src/adapters/grader-outputs/legacy/` for historical
5
+ * pre-Phase-3 reports.
6
+ *
7
+ * Reports are immutable events — once a Report is written to Content
8
+ * Lake the structured grader-judgment shape it captures cannot be
9
+ * back-filled. The legacy parser exists so historical-report rendering
10
+ * paths can keep deserializing pre-Phase-3 output through Phase 7
11
+ * (GRAD-06 cutover removes Studio's `reason`-only fallback rendering
12
+ * paths and the legacy adapter alongside).
13
+ *
14
+ * Authored INDEPENDENTLY of any Zod schema (D0045 doctrine — the
15
+ * legacy schema in
16
+ * `packages/eval/src/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.ts`
17
+ * `satisfies z.ZodType<LegacyGraderJudgment>` against this type, not
18
+ * the other way around). A tautological
19
+ * `satisfies z.ZodType<z.infer<typeof Schema>>` is forbidden.
20
+ *
21
+ * Invariant — live grader output that fails the strict
22
+ * `GraderJudgmentSchema` MUST NOT fall back to this schema. Drop to
23
+ * `failureMode: "unclassified"` instead. The legacy parser is invoked
24
+ * ONLY by historical-report rendering paths.
25
+ *
26
+ * @see ./grader-judgment.ts — the Phase 1+ structured shape (live path)
27
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
28
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
29
+ */
30
+ export {};
@@ -84,6 +84,7 @@ export interface PipelineRequest {
84
84
  dataset?: string;
85
85
  debug?: PipelineRequestDebug | boolean;
86
86
  executor?: PipelineRequestCallerExecutor;
87
+ borderlineReplications?: number;
87
88
  gapAnalysis?: boolean;
88
89
  graderContext?: "rubric-only" | "with-docs";
89
90
  graderReplications?: number;
@@ -47,6 +47,14 @@ export interface RepoPublishConfig {
47
47
  /** Execution-tier knobs — replaces the retired `--concurrency` / `--api-url` flags. */
48
48
  export interface RepoExecutionConfig {
49
49
  apiUrl?: string;
50
+ /**
51
+ * Plan 03-04 GRAD-04 — replications per borderline judgment for the
52
+ * intra-grader consensus pass. Default 3 (set in composition-root).
53
+ * A judgment is "borderline" when its score lies within ±5 of any
54
+ * severity boundary (30/50/60). Non-borderline judgments are not
55
+ * re-graded.
56
+ */
57
+ borderlineReplications?: number;
50
58
  concurrency?: number;
51
59
  gapAnalysis?: boolean;
52
60
  graderReplications?: number;
@@ -8,7 +8,7 @@
8
8
  * Attachable at every level of the report hierarchy:
9
9
  * - ScoreSummary.documentManifest — all docs used in the evaluation
10
10
  * - FeatureScore.documents — docs used for a specific area
11
- * - StoredJudgment.canonicalDocs — docs expected for a specific task
11
+ * - StoredJudgment.contextDocs (legacy alias: canonicalDocs) — docs expected for a specific task
12
12
  */
13
13
  export interface DocumentRef {
14
14
  /**
@@ -59,6 +59,7 @@ export interface RemoteConfigSlice {
59
59
  perspectiveOverride?: string;
60
60
  graderContext?: "rubric-only" | "with-docs";
61
61
  graderReplications?: number;
62
+ borderlineReplications?: number;
62
63
  gapAnalysisEnabled?: boolean;
63
64
  noRemoteCache?: boolean;
64
65
  /**
@@ -130,6 +130,9 @@ export async function buildRemoteRequest(options) {
130
130
  if (config.graderReplications) {
131
131
  raw.graderReplications = config.graderReplications;
132
132
  }
133
+ if (config.borderlineReplications) {
134
+ raw.borderlineReplications = config.borderlineReplications;
135
+ }
133
136
  if (config.gapAnalysisEnabled)
134
137
  raw.gapAnalysis = true;
135
138
  if (config.noRemoteCache)
@@ -0,0 +1,35 @@
1
+ /**
2
+ * attribution-meta-writer.ts — Zod schema for the run-scoped
3
+ * attribution metadata artifact (ATTR-01) emitted by Phase 4 and read
4
+ * back alongside the per-entry attribution objects.
5
+ *
6
+ * The schema asserts `satisfies z.ZodType<AttributionMeta>` against the
7
+ * canonical domain type in `packages/core/src/types/attribution.ts`
8
+ * (D0045 / W0187) — drift is a build error.
9
+ *
10
+ * `embeddingModel` is REQUIRED (Pitfall #6): silently downgrading to a
11
+ * default has caused regressions in adjacent codebases — model swaps
12
+ * MUST invalidate cached weights.
13
+ *
14
+ * Phase 1 lands the SHAPE only — no compute, no file I/O.
15
+ *
16
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
17
+ * @see docs/design-docs/actionability-ladder/04-per-document-attribution-ensemble.md
18
+ */
19
+ import { z } from "zod";
20
+ /**
21
+ * Canonical schema for {@link AttributionMeta}. Persisted at
22
+ * `runs/{runId}/attribution/_meta.json` (or whatever bulk path the
23
+ * Phase 4 descriptor pins) and parsed on read.
24
+ */
25
+ export declare const AttributionMetaSchema: z.ZodObject<{
26
+ ensembleVersion: z.ZodString;
27
+ embeddingModel: z.ZodString;
28
+ calibrationSetVersion: z.ZodOptional<z.ZodString>;
29
+ weights: z.ZodObject<{
30
+ citation: z.ZodNumber;
31
+ canonical: z.ZodNumber;
32
+ retrieved: z.ZodNumber;
33
+ }, z.core.$strip>;
34
+ }, z.core.$strip>;
35
+ export type { AttributionMeta } from "../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,34 @@
1
+ /**
2
+ * attribution-meta-writer.ts — Zod schema for the run-scoped
3
+ * attribution metadata artifact (ATTR-01) emitted by Phase 4 and read
4
+ * back alongside the per-entry attribution objects.
5
+ *
6
+ * The schema asserts `satisfies z.ZodType<AttributionMeta>` against the
7
+ * canonical domain type in `packages/core/src/types/attribution.ts`
8
+ * (D0045 / W0187) — drift is a build error.
9
+ *
10
+ * `embeddingModel` is REQUIRED (Pitfall #6): silently downgrading to a
11
+ * default has caused regressions in adjacent codebases — model swaps
12
+ * MUST invalidate cached weights.
13
+ *
14
+ * Phase 1 lands the SHAPE only — no compute, no file I/O.
15
+ *
16
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
17
+ * @see docs/design-docs/actionability-ladder/04-per-document-attribution-ensemble.md
18
+ */
19
+ import { z } from "zod";
20
+ /**
21
+ * Canonical schema for {@link AttributionMeta}. Persisted at
22
+ * `runs/{runId}/attribution/_meta.json` (or whatever bulk path the
23
+ * Phase 4 descriptor pins) and parsed on read.
24
+ */
25
+ export const AttributionMetaSchema = z.object({
26
+ ensembleVersion: z.string().min(1),
27
+ embeddingModel: z.string().min(1),
28
+ calibrationSetVersion: z.string().optional(),
29
+ weights: z.object({
30
+ citation: z.number(),
31
+ canonical: z.number(),
32
+ retrieved: z.number(),
33
+ }),
34
+ });
@@ -0,0 +1,9 @@
1
+ /**
2
+ * attribution adapter barrel — named re-exports only (W0124 / D0045).
3
+ *
4
+ * The attribution schemas live here so they enter the D0045
5
+ * `pnpm check-trust-boundary-satisfies` SCAN_ROOTS gate.
6
+ */
7
+ export { JudgmentAttributionSchema } from "./per-entry-attribution-writer.js";
8
+ export { AttributionMetaSchema } from "./attribution-meta-writer.js";
9
+ export type { AttributionMeta, DocAttribution, JudgmentAttribution, } from "../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,8 @@
1
+ /**
2
+ * attribution adapter barrel — named re-exports only (W0124 / D0045).
3
+ *
4
+ * The attribution schemas live here so they enter the D0045
5
+ * `pnpm check-trust-boundary-satisfies` SCAN_ROOTS gate.
6
+ */
7
+ export { JudgmentAttributionSchema } from "./per-entry-attribution-writer.js";
8
+ export { AttributionMetaSchema } from "./attribution-meta-writer.js";
@@ -0,0 +1,56 @@
1
+ /**
2
+ * per-entry-attribution-writer.ts — Zod schema for the per-judgment
3
+ * attribution artifact (ATTR-01) emitted by Phase 4's
4
+ * `ComputeAttributionStep` and read back by Phase 5's diagnosis runner.
5
+ *
6
+ * The schema asserts `satisfies z.ZodType<JudgmentAttribution>` against
7
+ * the canonical domain type in `packages/core/src/types/attribution.ts`
8
+ * (D0045 / W0187) — drift between schema and type is a build error.
9
+ *
10
+ * Phase 1 lands the SHAPE only — no compute, no file I/O. Phase 4 wires
11
+ * the writer; Phase 5 wires the reader. Both `satisfies` against this
12
+ * single source-of-truth schema.
13
+ *
14
+ * `hallucinationCheckedAgainst` is REQUIRED (Pitfall #11): consumers
15
+ * must be able to audit citation grounding without re-deriving the
16
+ * resolvable-set. The canonical task field is `contextDocs`; do NOT
17
+ * invent `expectedDocs` / `usedDocs` synonyms.
18
+ *
19
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
20
+ * @see docs/decisions/D0049-shared-confidence-contract.md
21
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
22
+ * @see docs/design-docs/actionability-ladder/04-per-document-attribution-ensemble.md
23
+ */
24
+ import { z } from "zod";
25
+ /**
26
+ * Canonical schema for {@link JudgmentAttribution}. Persisted at
27
+ * `runs/{runId}/attribution/{entryKey}.json` (Phase 4) and parsed by
28
+ * the diagnosis runner on read (Phase 5).
29
+ */
30
+ export declare const JudgmentAttributionSchema: z.ZodObject<{
31
+ judgmentRef: z.ZodString;
32
+ taskId: z.ZodString;
33
+ modelId: z.ZodString;
34
+ dimension: z.ZodString;
35
+ attributions: z.ZodArray<z.ZodObject<{
36
+ documentId: z.ZodString;
37
+ slug: z.ZodOptional<z.ZodString>;
38
+ score: z.ZodNumber;
39
+ signals: z.ZodObject<{
40
+ citation: z.ZodOptional<z.ZodNumber>;
41
+ canonical: z.ZodOptional<z.ZodNumber>;
42
+ retrieved: z.ZodOptional<z.ZodNumber>;
43
+ }, z.core.$strip>;
44
+ confidence: z.ZodObject<{
45
+ level: z.ZodEnum<{
46
+ low: "low";
47
+ medium: "medium";
48
+ high: "high";
49
+ }>;
50
+ signalsPresent: z.ZodNumber;
51
+ derivation: z.ZodString;
52
+ }, z.core.$strip>;
53
+ }, z.core.$strip>>;
54
+ hallucinationCheckedAgainst: z.ZodArray<z.ZodString>;
55
+ }, z.core.$strip>;
56
+ export type { DocAttribution, JudgmentAttribution } from "../../_vendor/ailf-core/index.d.ts";