@sanity/ailf 5.0.0 → 6.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/config/airbyte/ai_literacy_framework.connector.yaml +276 -0
  2. package/config/bigquery/views/synthesis_parse_failure_rate_7d.sql +42 -0
  3. package/config/diagnosis-cards.ts +318 -0
  4. package/config/models.ts +12 -0
  5. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.d.ts +13 -0
  6. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.js +16 -0
  7. package/dist/_vendor/ailf-core/grader/failure-modes/common.d.ts +14 -0
  8. package/dist/_vendor/ailf-core/grader/failure-modes/common.js +18 -0
  9. package/dist/_vendor/ailf-core/grader/failure-modes/index.d.ts +45 -0
  10. package/dist/_vendor/ailf-core/grader/failure-modes/index.js +109 -0
  11. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.d.ts +13 -0
  12. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.js +17 -0
  13. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.d.ts +13 -0
  14. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.js +17 -0
  15. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.d.ts +13 -0
  16. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.js +17 -0
  17. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  18. package/dist/_vendor/ailf-core/index.js +4 -0
  19. package/dist/_vendor/ailf-core/ports/context.d.ts +12 -0
  20. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -0
  21. package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -0
  22. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.d.ts +41 -0
  23. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.js +40 -0
  24. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.d.ts +7 -0
  25. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.js +131 -0
  26. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.d.ts +7 -0
  27. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.js +230 -0
  28. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.d.ts +7 -0
  29. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.js +155 -0
  30. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.d.ts +17 -0
  31. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.js +43 -0
  32. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.d.ts +46 -0
  33. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.js +108 -0
  34. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.d.ts +28 -0
  35. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.js +140 -0
  36. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.d.ts +49 -0
  37. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.js +65 -0
  38. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.d.ts +27 -0
  39. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.js +93 -0
  40. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.d.ts +32 -0
  41. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.js +71 -0
  42. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.d.ts +44 -0
  43. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.js +130 -0
  44. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.d.ts +41 -0
  45. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +111 -0
  46. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.d.ts +43 -0
  47. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.js +118 -0
  48. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.d.ts +72 -0
  49. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.js +286 -0
  50. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.d.ts +17 -0
  51. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.js +58 -0
  52. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.d.ts +10 -0
  53. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.js +10 -0
  54. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.d.ts +15 -0
  55. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.js +53 -0
  56. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.d.ts +14 -0
  57. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.js +63 -0
  58. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.d.ts +16 -0
  59. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.js +78 -0
  60. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.d.ts +18 -0
  61. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.js +74 -0
  62. package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +10 -0
  63. package/dist/_vendor/ailf-core/services/diagnosis/registry.js +10 -0
  64. package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +119 -2
  65. package/dist/_vendor/ailf-core/services/diagnosis-runner.js +136 -2
  66. package/dist/_vendor/ailf-core/services/index.d.ts +5 -1
  67. package/dist/_vendor/ailf-core/services/index.js +15 -2
  68. package/dist/_vendor/ailf-core/services/llm-client-factory.d.ts +64 -0
  69. package/dist/_vendor/ailf-core/services/llm-client-factory.js +54 -0
  70. package/dist/_vendor/ailf-core/types/diagnosis.d.ts +115 -10
  71. package/dist/_vendor/ailf-core/types/diagnosis.js +3 -1
  72. package/dist/_vendor/ailf-core/types/index.d.ts +8 -1
  73. package/dist/_vendor/ailf-core/types/repo-config.d.ts +16 -0
  74. package/dist/_vendor/ailf-core/types/synthesis-telemetry.d.ts +101 -0
  75. package/dist/_vendor/ailf-core/types/synthesis-telemetry.js +18 -0
  76. package/dist/adapters/config-sources/file-config-adapter.js +8 -6
  77. package/dist/adapters/llm/fake-llm-client.d.ts +20 -0
  78. package/dist/adapters/llm/fake-llm-client.js +38 -1
  79. package/dist/adapters/llm/index.d.ts +1 -1
  80. package/dist/adapters/llm/index.js +1 -1
  81. package/dist/adapters/llm/openai-llm-client.js +59 -5
  82. package/dist/adapters/llm/retry.d.ts +18 -0
  83. package/dist/adapters/llm/retry.js +21 -0
  84. package/dist/adapters/synthesis/synthesis-telemetry-schema.d.ts +49 -0
  85. package/dist/adapters/synthesis/synthesis-telemetry-schema.js +55 -0
  86. package/dist/adapters/task-sources/content-lake-task-source.js +10 -5
  87. package/dist/adapters/task-sources/repo-schemas.d.ts +7 -0
  88. package/dist/adapters/task-sources/repo-schemas.js +10 -0
  89. package/dist/cli-program.js +3 -0
  90. package/dist/commands/interpret.d.ts +70 -0
  91. package/dist/commands/interpret.js +221 -0
  92. package/dist/commands/pipeline-action.d.ts +44 -0
  93. package/dist/commands/pipeline-action.js +193 -1
  94. package/dist/commands/run.d.ts +2 -0
  95. package/dist/commands/run.js +2 -0
  96. package/dist/composition-root.d.ts +21 -23
  97. package/dist/composition-root.js +107 -41
  98. package/dist/config/diagnosis-cards.ts +318 -0
  99. package/dist/config/models.ts +12 -0
  100. package/dist/grader/agent-harness.d.ts +5 -10
  101. package/dist/grader/agent-harness.js +5 -13
  102. package/dist/grader/common.d.ts +5 -13
  103. package/dist/grader/common.js +5 -17
  104. package/dist/grader/index.d.ts +15 -29
  105. package/dist/grader/index.js +15 -66
  106. package/dist/grader/knowledge-probe.d.ts +5 -10
  107. package/dist/grader/knowledge-probe.js +5 -14
  108. package/dist/grader/literacy.d.ts +5 -9
  109. package/dist/grader/literacy.js +5 -13
  110. package/dist/grader/mcp.d.ts +5 -10
  111. package/dist/grader/mcp.js +5 -14
  112. package/dist/orchestration/pipeline-orchestrator.js +3 -0
  113. package/dist/report-store.d.ts +26 -0
  114. package/dist/report-store.js +63 -0
  115. package/package.json +2 -2
@@ -0,0 +1,230 @@
1
+ /**
2
+ * failure-mode-summary.test.ts — Tests 5-9 for deterministic failure-mode-summary card.
3
+ *
4
+ * D0042: vitest only.
5
+ * D-05: schema must reject cross-dimension (dimension, failureMode) pairs.
6
+ */
7
+ import { describe, expect, it } from "vitest";
8
+ import { FailureModeSummaryBodySchema, generateFailureModeSummary, } from "../failure-mode-summary.js";
9
+ import { failureModesForDimension } from "../../../../grader/failure-modes/index.js";
10
+ // Compile-time gate
11
+ const _satisfiesCheck = FailureModeSummaryBodySchema;
12
+ const silentLogger = {
13
+ debug: () => { },
14
+ info: () => { },
15
+ warn: () => { },
16
+ error: () => { },
17
+ step: () => { },
18
+ section: () => { },
19
+ table: () => { },
20
+ };
21
+ const noopProgress = {
22
+ phaseStart: () => { },
23
+ phaseProgress: () => { },
24
+ phaseComplete: () => { },
25
+ };
26
+ function makeCtx() {
27
+ return {
28
+ llm: undefined,
29
+ model: "anthropic:claude-sonnet-4-6",
30
+ logger: silentLogger,
31
+ progress: noopProgress,
32
+ versions: {
33
+ graderJudgmentsVersion: "1.0.0",
34
+ ensembleVersion: "1.0.0",
35
+ diagnosisVersion: "0.1.0",
36
+ cardVersion: "1.0.0",
37
+ },
38
+ runId: "run-001",
39
+ reportId: "report-001",
40
+ };
41
+ }
42
+ function makeReport(failureModes) {
43
+ return {
44
+ id: "report-001",
45
+ completedAt: "2026-01-01T00:00:00Z",
46
+ durationMs: 1000,
47
+ provenance: {
48
+ runId: "run-001",
49
+ mode: "standard",
50
+ areas: [],
51
+ taskIds: [],
52
+ models: [],
53
+ graderModel: "gpt-4o",
54
+ source: "local",
55
+ evalFingerprint: "",
56
+ trigger: "manual",
57
+ git: undefined,
58
+ },
59
+ summary: {
60
+ belowCritical: [],
61
+ lowestArea: "groq",
62
+ lowestScore: 50,
63
+ overall: {
64
+ avgCeilingScore: 80,
65
+ avgScore: 70,
66
+ avgDocLift: 5,
67
+ avgDocQualityGap: 20,
68
+ avgFloorScore: 65,
69
+ negativeDocLiftCount: 0,
70
+ },
71
+ scores: [
72
+ {
73
+ feature: "groq",
74
+ ceilingScore: 80,
75
+ codeCorrectness: 70,
76
+ docCoverage: 65,
77
+ docLift: 5,
78
+ docQualityGap: 20,
79
+ floorScore: 65,
80
+ negativeDocLift: false,
81
+ taskCompletion: 70,
82
+ testCount: 10,
83
+ totalCost: 0.05,
84
+ totalScore: 70,
85
+ },
86
+ ],
87
+ failureModes,
88
+ },
89
+ };
90
+ }
91
+ describe("FailureModeSummaryBodySchema — field structure (Test 5)", () => {
92
+ it("includes dimension, failureMode, count, sampleSize, summary fields", () => {
93
+ const canonicalMode = failureModesForDimension("task-completion")[0];
94
+ const result = FailureModeSummaryBodySchema.safeParse({
95
+ summary: "test summary",
96
+ dimension: "task-completion",
97
+ failureMode: canonicalMode,
98
+ count: 5,
99
+ sampleSize: 20,
100
+ });
101
+ expect(result.success).toBe(true);
102
+ // compile-time gate
103
+ expect(_satisfiesCheck).toBeDefined();
104
+ });
105
+ });
106
+ describe("FailureModeSummaryBodySchema — D-05 refine rejects cross-dimension (Test 6)", () => {
107
+ it("rejects mcp-tool-not-found for task-completion dimension", () => {
108
+ // "mcp-tool-not-found" doesn't exist in the taxonomy but we want to test
109
+ // cross-dimension hallucination. Use a valid MCP mode in a literacy dimension.
110
+ const mcpOnlyMode = "spec-mismatch"; // valid for mcp-behavior, not task-completion
111
+ const result = FailureModeSummaryBodySchema.safeParse({
112
+ summary: "test",
113
+ dimension: "task-completion",
114
+ failureMode: mcpOnlyMode,
115
+ count: 1,
116
+ sampleSize: 5,
117
+ });
118
+ expect(result.success).toBe(false);
119
+ });
120
+ });
121
+ describe("FailureModeSummaryBodySchema — accepts canonical pair (Test 7)", () => {
122
+ it("accepts a valid (dimension, failureMode) pair from the taxonomy", () => {
123
+ const canonicalMode = failureModesForDimension("task-completion")[0];
124
+ const result = FailureModeSummaryBodySchema.safeParse({
125
+ summary: "test summary about missing docs",
126
+ dimension: "task-completion",
127
+ failureMode: canonicalMode,
128
+ count: 3,
129
+ sampleSize: 15,
130
+ });
131
+ expect(result.success).toBe(true);
132
+ });
133
+ });
134
+ describe("generateFailureModeSummary — happy path (Test 8)", () => {
135
+ it("returns a ready card with the dominant failure mode for a report with failure modes", async () => {
136
+ const canonicalMode = failureModesForDimension("task-completion")[0];
137
+ const report = makeReport({
138
+ counts: { "task-completion": 7 },
139
+ topTitles: [
140
+ {
141
+ id: `task-completion::${canonicalMode}`,
142
+ category: canonicalMode,
143
+ severity: "high",
144
+ title: canonicalMode,
145
+ count: 7,
146
+ },
147
+ ],
148
+ totalJudgments: 20,
149
+ classificationRate: 0.35,
150
+ });
151
+ const card = await generateFailureModeSummary(report, makeCtx());
152
+ expect(card.status).toBe("ready");
153
+ if (card.status === "ready") {
154
+ expect(card.cardType).toBe("failure-mode-summary");
155
+ expect(card.meta.cardVersion).toBe("failure-mode-summary@0.1.0");
156
+ const body = card.body;
157
+ expect(body.count).toBeGreaterThan(0);
158
+ }
159
+ });
160
+ });
161
+ describe("generateFailureModeSummary — empty failure modes (Test 9)", () => {
162
+ it("returns missing card when report has no failure modes", async () => {
163
+ const report = makeReport(undefined);
164
+ const card = await generateFailureModeSummary(report, makeCtx());
165
+ expect(card.status).toBe("missing");
166
+ if (card.status === "missing") {
167
+ expect(card.cardType).toBe("failure-mode-summary");
168
+ expect(card.reason).toContain("no failure modes");
169
+ }
170
+ });
171
+ });
172
+ describe("generateFailureModeSummary — cross-cutting mode resolution", () => {
173
+ // `missing-docs` appears in both LITERACY_FAILURE_MODES and MCP_FAILURE_MODES.
174
+ // Before the per-EvalMode preference table, linear scan of CANONICAL_DIMENSIONS
175
+ // always resolved it to task-completion, mislabelling MCP-only runs.
176
+ function withMode(report, mode) {
177
+ return {
178
+ ...report,
179
+ provenance: { ...report.provenance, mode },
180
+ };
181
+ }
182
+ it("resolves 'missing-docs' to mcp-behavior on an mcp-server report", async () => {
183
+ const base = makeReport({
184
+ counts: { "mcp-behavior": 7 },
185
+ topTitles: [
186
+ {
187
+ id: "mcp-behavior::missing-docs",
188
+ category: "missing-docs",
189
+ severity: "high",
190
+ title: "missing-docs",
191
+ count: 7,
192
+ },
193
+ ],
194
+ totalJudgments: 20,
195
+ classificationRate: 0.35,
196
+ });
197
+ const report = withMode(base, "mcp-server");
198
+ const card = await generateFailureModeSummary(report, makeCtx());
199
+ expect(card.status).toBe("ready");
200
+ if (card.status === "ready") {
201
+ const body = card.body;
202
+ expect(body.dimension).toBe("mcp-behavior");
203
+ expect(body.failureMode).toBe("missing-docs");
204
+ }
205
+ });
206
+ it("resolves 'missing-docs' to task-completion on a literacy report", async () => {
207
+ const base = makeReport({
208
+ counts: { "task-completion": 7 },
209
+ topTitles: [
210
+ {
211
+ id: "task-completion::missing-docs",
212
+ category: "missing-docs",
213
+ severity: "high",
214
+ title: "missing-docs",
215
+ count: 7,
216
+ },
217
+ ],
218
+ totalJudgments: 20,
219
+ classificationRate: 0.35,
220
+ });
221
+ const report = withMode(base, "literacy");
222
+ const card = await generateFailureModeSummary(report, makeCtx());
223
+ expect(card.status).toBe("ready");
224
+ if (card.status === "ready") {
225
+ const body = card.body;
226
+ expect(body.dimension).toBe("task-completion");
227
+ expect(body.failureMode).toBe("missing-docs");
228
+ }
229
+ });
230
+ });
@@ -0,0 +1,7 @@
1
+ /**
2
+ * no-issues.test.ts — Tests 10-12 for the deterministic no-issues card.
3
+ *
4
+ * D0042: vitest only.
5
+ * AI-SPEC §1b failure-mode #7: sycophancy guard — firing rate ≤30%.
6
+ */
7
+ export {};
@@ -0,0 +1,155 @@
1
+ /**
2
+ * no-issues.test.ts — Tests 10-12 for the deterministic no-issues card.
3
+ *
4
+ * D0042: vitest only.
5
+ * AI-SPEC §1b failure-mode #7: sycophancy guard — firing rate ≤30%.
6
+ */
7
+ import { describe, expect, it } from "vitest";
8
+ import { NO_ISSUES_THRESHOLD, NoIssuesBodySchema, generateNoIssues, } from "../no-issues.js";
9
+ // Compile-time gate
10
+ const _satisfiesCheck = NoIssuesBodySchema;
11
+ const silentLogger = {
12
+ debug: () => { },
13
+ info: () => { },
14
+ warn: () => { },
15
+ error: () => { },
16
+ step: () => { },
17
+ section: () => { },
18
+ table: () => { },
19
+ };
20
+ const noopProgress = {
21
+ phaseStart: () => { },
22
+ phaseProgress: () => { },
23
+ phaseComplete: () => { },
24
+ };
25
+ function makeCtx() {
26
+ return {
27
+ llm: undefined,
28
+ model: "anthropic:claude-sonnet-4-6",
29
+ logger: silentLogger,
30
+ progress: noopProgress,
31
+ versions: {
32
+ graderJudgmentsVersion: "1.0.0",
33
+ ensembleVersion: "1.0.0",
34
+ diagnosisVersion: "0.1.0",
35
+ cardVersion: "1.0.0",
36
+ },
37
+ runId: "run-001",
38
+ reportId: "report-001",
39
+ };
40
+ }
41
+ function makeReportWithScores(scores) {
42
+ return {
43
+ id: "report-001",
44
+ completedAt: "2026-01-01T00:00:00Z",
45
+ durationMs: 1000,
46
+ provenance: {
47
+ runId: "run-001",
48
+ mode: "standard",
49
+ areas: [],
50
+ taskIds: [],
51
+ models: [],
52
+ graderModel: "gpt-4o",
53
+ source: "local",
54
+ evalFingerprint: "",
55
+ trigger: "manual",
56
+ git: undefined,
57
+ },
58
+ summary: {
59
+ belowCritical: [],
60
+ lowestArea: scores.length ? `area-0` : "",
61
+ lowestScore: scores.length ? Math.min(...scores) : 0,
62
+ overall: {
63
+ avgCeilingScore: 90,
64
+ avgScore: scores.length
65
+ ? scores.reduce((a, b) => a + b, 0) / scores.length
66
+ : 0,
67
+ avgDocLift: 5,
68
+ avgDocQualityGap: 10,
69
+ avgFloorScore: 80,
70
+ negativeDocLiftCount: 0,
71
+ },
72
+ scores: scores.map((s, i) => ({
73
+ feature: `area-${i}`,
74
+ ceilingScore: 95,
75
+ codeCorrectness: s,
76
+ docCoverage: s,
77
+ docLift: 5,
78
+ docQualityGap: 5,
79
+ floorScore: s - 5,
80
+ negativeDocLift: false,
81
+ taskCompletion: s,
82
+ testCount: 10,
83
+ totalCost: 0.05,
84
+ totalScore: s,
85
+ })),
86
+ },
87
+ };
88
+ }
89
+ describe("NO_ISSUES_THRESHOLD (Test 10)", () => {
90
+ it("is exported as a const with a numeric value", () => {
91
+ expect(typeof NO_ISSUES_THRESHOLD).toBe("number");
92
+ expect(NO_ISSUES_THRESHOLD).toBeGreaterThan(0);
93
+ expect(NO_ISSUES_THRESHOLD).toBeLessThanOrEqual(100);
94
+ // compile-time gate
95
+ expect(_satisfiesCheck).toBeDefined();
96
+ });
97
+ });
98
+ describe("generateNoIssues — ready when all areas ≥ threshold (Test 11a)", () => {
99
+ it("returns ready card with thresholdScore when all areas score ≥ threshold", async () => {
100
+ const scores = [
101
+ NO_ISSUES_THRESHOLD,
102
+ NO_ISSUES_THRESHOLD + 5,
103
+ NO_ISSUES_THRESHOLD + 10,
104
+ ];
105
+ const report = makeReportWithScores(scores);
106
+ const card = await generateNoIssues(report, makeCtx());
107
+ expect(card.status).toBe("ready");
108
+ if (card.status === "ready") {
109
+ expect(card.cardType).toBe("no-issues");
110
+ expect(card.meta.cardVersion).toBe("no-issues@0.1.0");
111
+ const body = card.body;
112
+ expect(body.thresholdScore).toBe(NO_ISSUES_THRESHOLD);
113
+ expect(body.summary).toBeTruthy();
114
+ }
115
+ });
116
+ });
117
+ describe("generateNoIssues — missing when any area below threshold (Test 11b)", () => {
118
+ it("returns missing when at least one area scores below the threshold", async () => {
119
+ const scores = [NO_ISSUES_THRESHOLD - 1, NO_ISSUES_THRESHOLD + 10];
120
+ const report = makeReportWithScores(scores);
121
+ const card = await generateNoIssues(report, makeCtx());
122
+ expect(card.status).toBe("missing");
123
+ if (card.status === "missing") {
124
+ expect(card.cardType).toBe("no-issues");
125
+ expect(card.reason).toContain("below threshold");
126
+ }
127
+ });
128
+ });
129
+ describe("generateNoIssues — calibration firing rate ≤30% (Test 12)", () => {
130
+ it("fires ≤30% on a 10-report fixture set spanning low to high scores", async () => {
131
+ // 10 reports with scores from 0 to 100 in 10-point increments
132
+ // Only reports where ALL areas are ≥ NO_ISSUES_THRESHOLD should fire
133
+ const fixtureSet = [
134
+ makeReportWithScores([0, 10]), // far below threshold
135
+ makeReportWithScores([20, 30]), // below threshold
136
+ makeReportWithScores([40, 50]), // below threshold
137
+ makeReportWithScores([60, 65]), // likely below threshold (≤85)
138
+ makeReportWithScores([70, 75]), // below threshold if threshold=85
139
+ makeReportWithScores([75, 80]), // below threshold if threshold=85
140
+ makeReportWithScores([80, 82]), // below threshold if threshold=85
141
+ makeReportWithScores([90, 92]), // above threshold
142
+ makeReportWithScores([95, 97]), // above threshold
143
+ makeReportWithScores([98, 99]), // above threshold
144
+ ];
145
+ const ctx = makeCtx();
146
+ let firingCount = 0;
147
+ for (const report of fixtureSet) {
148
+ const card = await generateNoIssues(report, ctx);
149
+ if (card.status === "ready")
150
+ firingCount++;
151
+ }
152
+ const firingRate = firingCount / fixtureSet.length;
153
+ expect(firingRate).toBeLessThanOrEqual(0.3);
154
+ });
155
+ });
@@ -0,0 +1,17 @@
1
+ /**
2
+ * area-summary card — deterministic projection over Report.summary.scores.
3
+ *
4
+ * Pure computation, no LLM call. Produces a human-readable sentence
5
+ * summarizing the number of evaluated areas and their mean composite score.
6
+ *
7
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
8
+ * mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
9
+ *
10
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §3 lines 569-587
11
+ */
12
+ import { z } from "zod";
13
+ import type { CardGenerator } from "../../diagnosis-runner.js";
14
+ export declare const AreaSummaryBodySchema: z.ZodObject<{
15
+ summary: z.ZodString;
16
+ }, z.core.$strip>;
17
+ export declare const generateAreaSummary: CardGenerator;
@@ -0,0 +1,43 @@
1
+ /**
2
+ * area-summary card — deterministic projection over Report.summary.scores.
3
+ *
4
+ * Pure computation, no LLM call. Produces a human-readable sentence
5
+ * summarizing the number of evaluated areas and their mean composite score.
6
+ *
7
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
8
+ * mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
9
+ *
10
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §3 lines 569-587
11
+ */
12
+ import { z } from "zod";
13
+ // ---------------------------------------------------------------------------
14
+ // Body schema (D0045 trust boundary — satisfies required)
15
+ // ---------------------------------------------------------------------------
16
+ export const AreaSummaryBodySchema = z.object({
17
+ summary: z.string().min(1).max(500),
18
+ });
19
+ // ---------------------------------------------------------------------------
20
+ // Generator
21
+ // ---------------------------------------------------------------------------
22
+ export const generateAreaSummary = async (report) => {
23
+ const scores = report.summary.scores;
24
+ if (!scores || scores.length === 0) {
25
+ return {
26
+ status: "missing",
27
+ cardType: "area-summary",
28
+ reason: "report has no areas",
29
+ };
30
+ }
31
+ const meanScore = scores.reduce((sum, s) => sum + s.totalScore, 0) / scores.length;
32
+ const summary = `Across ${scores.length} areas, mean score ${meanScore.toFixed(1)}.`;
33
+ const body = AreaSummaryBodySchema.parse({ summary });
34
+ return {
35
+ status: "ready",
36
+ cardType: "area-summary",
37
+ body,
38
+ meta: {
39
+ cardVersion: "area-summary@0.1.0",
40
+ generatedAt: new Date().toISOString(),
41
+ },
42
+ };
43
+ };
@@ -0,0 +1,46 @@
1
+ /**
2
+ * doc-attribution-spotlight card — LLM-driven doc-citation spotlight.
3
+ *
4
+ * Model: claude-sonnet-4-6 (routine per AI-SPEC §4 model routing)
5
+ * Version: doc-attribution-spotlight@0.1.0
6
+ *
7
+ * Landmine 11: reads `ctx.judgmentAttributions` (NOT Report.summary).
8
+ * Returns `status: "missing"` when attributions are undefined or empty.
9
+ *
10
+ * Mitigations:
11
+ * - failure-mode #5: docCitations[].docSlug refined against the manifest
12
+ * allow-list so hallucinated slugs fail Zod parse
13
+ *
14
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
15
+ * mandatory.
16
+ *
17
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-RESEARCH.md (Landmine 11)
18
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
19
+ */
20
+ import { z } from "zod";
21
+ import type { CardGenerator } from "../../diagnosis-runner.js";
22
+ /**
23
+ * Module-level static shape. Per-call adds the allow-list refine on docSlug.
24
+ */
25
+ export declare const DocAttributionSpotlightBodySchema: z.ZodObject<{
26
+ summary: z.ZodString;
27
+ docCitations: z.ZodArray<z.ZodObject<{
28
+ docSlug: z.ZodString;
29
+ confidence: z.ZodObject<{
30
+ level: z.ZodEnum<{
31
+ low: "low";
32
+ medium: "medium";
33
+ high: "high";
34
+ }>;
35
+ signalsPresent: z.ZodNumber;
36
+ derivation: z.ZodString;
37
+ }, z.core.$strip>;
38
+ role: z.ZodEnum<{
39
+ missing: "missing";
40
+ supports: "supports";
41
+ contradicts: "contradicts";
42
+ irrelevant: "irrelevant";
43
+ }>;
44
+ }, z.core.$strip>>;
45
+ }, z.core.$strip>;
46
+ export declare const generateDocAttributionSpotlight: CardGenerator;
@@ -0,0 +1,108 @@
1
+ /**
2
+ * doc-attribution-spotlight card — LLM-driven doc-citation spotlight.
3
+ *
4
+ * Model: claude-sonnet-4-6 (routine per AI-SPEC §4 model routing)
5
+ * Version: doc-attribution-spotlight@0.1.0
6
+ *
7
+ * Landmine 11: reads `ctx.judgmentAttributions` (NOT Report.summary).
8
+ * Returns `status: "missing"` when attributions are undefined or empty.
9
+ *
10
+ * Mitigations:
11
+ * - failure-mode #5: docCitations[].docSlug refined against the manifest
12
+ * allow-list so hallucinated slugs fail Zod parse
13
+ *
14
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
15
+ * mandatory.
16
+ *
17
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-RESEARCH.md (Landmine 11)
18
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
19
+ */
20
+ import { z } from "zod";
21
+ import { ConfidenceSchema } from "../../../schemas/confidence-schema.js";
22
+ import { modelId as mkModelId } from "../../../ports/llm-client.js";
23
+ import { buildDocAttributionSpotlightPrompt, buildDocSlugAllowList, } from "../prompt-builders.js";
24
+ // ---------------------------------------------------------------------------
25
+ // Body schema (D0045 trust boundary — satisfies required)
26
+ // ---------------------------------------------------------------------------
27
+ /**
28
+ * Module-level static shape. Per-call adds the allow-list refine on docSlug.
29
+ */
30
+ export const DocAttributionSpotlightBodySchema = z.object({
31
+ summary: z.string().min(1).max(800),
32
+ docCitations: z
33
+ .array(z.object({
34
+ docSlug: z.string().min(1),
35
+ confidence: ConfidenceSchema,
36
+ role: z.enum(["supports", "contradicts", "missing", "irrelevant"]),
37
+ }))
38
+ .min(1)
39
+ .max(5),
40
+ });
41
+ // ---------------------------------------------------------------------------
42
+ // Generator
43
+ // ---------------------------------------------------------------------------
44
+ const CARD_MODEL = mkModelId("anthropic:claude-sonnet-4-6");
45
+ export const generateDocAttributionSpotlight = async (report, ctx) => {
46
+ // C1: no LLM → missing
47
+ if (!ctx.llm) {
48
+ return {
49
+ status: "missing",
50
+ cardType: "doc-attribution-spotlight",
51
+ reason: "no LLMClient wired",
52
+ };
53
+ }
54
+ // D1: Landmine 11 — short-circuit BEFORE calling LLM when no attribution data
55
+ if (!ctx.judgmentAttributions || ctx.judgmentAttributions.length === 0) {
56
+ return {
57
+ status: "missing",
58
+ cardType: "doc-attribution-spotlight",
59
+ reason: "no attribution data for this run",
60
+ };
61
+ }
62
+ // Build allow-list from the runtime report
63
+ const allowList = buildDocSlugAllowList(report);
64
+ // Per-call schema with docSlug allow-list refine (AI-SPEC §3 Pitfall 1)
65
+ const PerCallSchema = z.object({
66
+ summary: z.string().min(1).max(800),
67
+ docCitations: z
68
+ .array(z.object({
69
+ docSlug: z
70
+ .string()
71
+ .min(1)
72
+ .refine((slug) => allowList.has(slug), {
73
+ message: "docCitations[].docSlug is not in the report document manifest allow-list",
74
+ }),
75
+ confidence: ConfidenceSchema,
76
+ role: z.enum(["supports", "contradicts", "missing", "irrelevant"]),
77
+ }))
78
+ .min(1)
79
+ .max(5),
80
+ });
81
+ const prompt = buildDocAttributionSpotlightPrompt(report, ctx.judgmentAttributions);
82
+ // Destructure `cost` and `model` from the LLMClient return —
83
+ // already provided per llm-client.ts:139-144, previously discarded.
84
+ const { value, usage, cost, model } = await ctx.llm.completeStructured({
85
+ model: CARD_MODEL,
86
+ prompt: `${prompt.system}\n\n${prompt.user}`,
87
+ schema: PerCallSchema,
88
+ temperature: 0.1,
89
+ maxTokens: 2000,
90
+ context: {
91
+ feature: "diagnosis",
92
+ runId: ctx.runId,
93
+ cardId: "doc-attribution-spotlight",
94
+ },
95
+ });
96
+ return {
97
+ status: "ready",
98
+ cardType: "doc-attribution-spotlight",
99
+ body: value,
100
+ meta: {
101
+ cardVersion: "doc-attribution-spotlight@0.1.0",
102
+ tokenUsage: { input: usage.promptTokens, output: usage.completionTokens },
103
+ generatedAt: new Date().toISOString(),
104
+ cost,
105
+ model,
106
+ },
107
+ };
108
+ };
@@ -0,0 +1,28 @@
1
+ /**
2
+ * failure-mode-summary card — deterministic projection over Report.summary.failureModes.
3
+ *
4
+ * Pure computation, no LLM call. Identifies the dominant failure mode
5
+ * across all dimensions in the report's slim failure-mode summary.
6
+ *
7
+ * D-05: `.refine(buildFailureModeRefinement())` rejects cross-dimension
8
+ * (dimension, failureMode) pairs that the schema otherwise would accept —
9
+ * turning a "Zod-passes, semantically wrong" LLM output into a
10
+ * `parseFailed: true` degraded card. For this deterministic card, the
11
+ * refinement also defends against bad Report data.
12
+ *
13
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
14
+ * mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
15
+ *
16
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-CONTEXT.md (D-05)
17
+ * @see packages/core/src/services/diagnosis/card-validators.ts
18
+ */
19
+ import { z } from "zod";
20
+ import type { CardGenerator } from "../../diagnosis-runner.js";
21
+ export declare const FailureModeSummaryBodySchema: z.ZodObject<{
22
+ summary: z.ZodString;
23
+ dimension: z.ZodString;
24
+ failureMode: z.ZodString;
25
+ count: z.ZodNumber;
26
+ sampleSize: z.ZodNumber;
27
+ }, z.core.$strip>;
28
+ export declare const generateFailureModeSummary: CardGenerator;