@sanity/ailf 4.6.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/canonical/grader-references/agent-harness-tools.yaml +42 -0
  2. package/canonical/grader-references/knowledge-probe-recall.yaml +36 -0
  3. package/canonical/grader-references/mcp-server-spec.yaml +51 -0
  4. package/canonical/grader-references/portable-text.yaml +48 -0
  5. package/config/diagnosis-cards.ts +318 -0
  6. package/config/models.ts +12 -0
  7. package/config/rubrics.ts +38 -2
  8. package/dist/_vendor/ailf-core/artifact-registry.d.ts +60 -2
  9. package/dist/_vendor/ailf-core/artifact-registry.js +288 -7
  10. package/dist/_vendor/ailf-core/examples/index.d.ts +125 -26
  11. package/dist/_vendor/ailf-core/examples/index.js +146 -47
  12. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.d.ts +13 -0
  13. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.js +16 -0
  14. package/dist/_vendor/ailf-core/grader/failure-modes/common.d.ts +14 -0
  15. package/dist/_vendor/ailf-core/grader/failure-modes/common.js +18 -0
  16. package/dist/_vendor/ailf-core/grader/failure-modes/index.d.ts +45 -0
  17. package/dist/_vendor/ailf-core/grader/failure-modes/index.js +109 -0
  18. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.d.ts +13 -0
  19. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.js +17 -0
  20. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.d.ts +13 -0
  21. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.js +17 -0
  22. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.d.ts +13 -0
  23. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.js +17 -0
  24. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  25. package/dist/_vendor/ailf-core/index.js +4 -0
  26. package/dist/_vendor/ailf-core/ports/context.d.ts +8 -0
  27. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +15 -0
  28. package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +40 -0
  29. package/dist/_vendor/ailf-core/schemas/branded-string.js +45 -0
  30. package/dist/_vendor/ailf-core/schemas/confidence-schema.d.ts +36 -0
  31. package/dist/_vendor/ailf-core/schemas/confidence-schema.js +32 -0
  32. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  33. package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -4
  34. package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
  35. package/dist/_vendor/ailf-core/schemas/index.js +9 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +34 -8
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +23 -1
  40. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.d.ts +41 -0
  41. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.js +40 -0
  42. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.d.ts +7 -0
  43. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.js +131 -0
  44. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.d.ts +7 -0
  45. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.js +171 -0
  46. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.d.ts +7 -0
  47. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.js +155 -0
  48. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.d.ts +17 -0
  49. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.js +43 -0
  50. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.d.ts +46 -0
  51. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.js +104 -0
  52. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.d.ts +28 -0
  53. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.js +96 -0
  54. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.d.ts +39 -0
  55. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.js +52 -0
  56. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.d.ts +27 -0
  57. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.js +77 -0
  58. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.d.ts +32 -0
  59. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.js +71 -0
  60. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.d.ts +44 -0
  61. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.js +126 -0
  62. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.d.ts +41 -0
  63. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +107 -0
  64. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.d.ts +43 -0
  65. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.js +114 -0
  66. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.d.ts +72 -0
  67. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.js +273 -0
  68. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.d.ts +17 -0
  69. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.js +58 -0
  70. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.d.ts +10 -0
  71. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.js +10 -0
  72. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.d.ts +15 -0
  73. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.js +53 -0
  74. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.d.ts +14 -0
  75. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.js +63 -0
  76. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.d.ts +16 -0
  77. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.js +78 -0
  78. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.d.ts +16 -0
  79. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.js +86 -0
  80. package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +50 -0
  81. package/dist/_vendor/ailf-core/services/diagnosis/registry.js +35 -0
  82. package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +136 -0
  83. package/dist/_vendor/ailf-core/services/diagnosis-runner.js +153 -0
  84. package/dist/_vendor/ailf-core/services/index.d.ts +6 -0
  85. package/dist/_vendor/ailf-core/services/index.js +18 -0
  86. package/dist/_vendor/ailf-core/services/llm-client-factory.d.ts +64 -0
  87. package/dist/_vendor/ailf-core/services/llm-client-factory.js +54 -0
  88. package/dist/_vendor/ailf-core/services/report-to-markdown.js +3 -2
  89. package/dist/_vendor/ailf-core/types/attribution.d.ts +82 -0
  90. package/dist/_vendor/ailf-core/types/attribution.js +18 -0
  91. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +26 -1
  92. package/dist/_vendor/ailf-core/types/branded-ids.js +80 -4
  93. package/dist/_vendor/ailf-core/types/confidence.d.ts +1 -1
  94. package/dist/_vendor/ailf-core/types/confidence.js +7 -0
  95. package/dist/_vendor/ailf-core/types/diagnosis.d.ts +271 -0
  96. package/dist/_vendor/ailf-core/types/diagnosis.js +19 -0
  97. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +16 -1
  98. package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +125 -0
  99. package/dist/_vendor/ailf-core/types/grader-judgment.js +30 -0
  100. package/dist/_vendor/ailf-core/types/index.d.ts +80 -29
  101. package/dist/_vendor/ailf-core/types/index.js +15 -1
  102. package/dist/_vendor/ailf-core/types/legacy-grader-judgment.d.ts +55 -0
  103. package/dist/_vendor/ailf-core/types/legacy-grader-judgment.js +30 -0
  104. package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
  105. package/dist/_vendor/ailf-core/types/repo-config.d.ts +8 -0
  106. package/dist/_vendor/ailf-shared/document-ref.d.ts +1 -1
  107. package/dist/adapters/api-client/build-request.d.ts +1 -0
  108. package/dist/adapters/api-client/build-request.js +3 -0
  109. package/dist/adapters/attribution/attribution-meta-writer.d.ts +35 -0
  110. package/dist/adapters/attribution/attribution-meta-writer.js +34 -0
  111. package/dist/adapters/attribution/index.d.ts +9 -0
  112. package/dist/adapters/attribution/index.js +8 -0
  113. package/dist/adapters/attribution/per-entry-attribution-writer.d.ts +56 -0
  114. package/dist/adapters/attribution/per-entry-attribution-writer.js +49 -0
  115. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  116. package/dist/adapters/grader-outputs/index.d.ts +10 -0
  117. package/dist/adapters/grader-outputs/index.js +8 -0
  118. package/dist/adapters/grader-outputs/legacy/index.d.ts +11 -0
  119. package/dist/adapters/grader-outputs/legacy/index.js +10 -0
  120. package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.d.ts +49 -0
  121. package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.js +48 -0
  122. package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +102 -0
  123. package/dist/adapters/grader-outputs/promptfoo-grader-output.js +93 -0
  124. package/dist/adapters/index.d.ts +3 -0
  125. package/dist/adapters/index.js +4 -0
  126. package/dist/adapters/llm/fake-llm-client.d.ts +20 -0
  127. package/dist/adapters/llm/fake-llm-client.js +38 -1
  128. package/dist/adapters/llm/openai-llm-client.js +52 -3
  129. package/dist/adapters/task-sources/content-lake-task-source.d.ts +5 -1
  130. package/dist/adapters/task-sources/content-lake-task-source.js +28 -2
  131. package/dist/adapters/task-sources/repo-schemas.d.ts +79 -11
  132. package/dist/adapters/task-sources/repo-schemas.js +19 -2
  133. package/dist/cli-program.js +3 -0
  134. package/dist/commands/calculate-scores.js +1 -1
  135. package/dist/commands/explain-handler.js +1 -1
  136. package/dist/commands/interpret.d.ts +50 -0
  137. package/dist/commands/interpret.js +212 -0
  138. package/dist/commands/lookup-doc.d.ts +1 -1
  139. package/dist/commands/lookup-doc.js +3 -3
  140. package/dist/commands/pipeline-action.d.ts +6 -0
  141. package/dist/commands/pipeline-action.js +2 -0
  142. package/dist/commands/remote-pipeline.js +1 -0
  143. package/dist/composition-root.d.ts +57 -23
  144. package/dist/composition-root.js +155 -41
  145. package/dist/config/diagnosis-cards.ts +318 -0
  146. package/dist/config/models.ts +12 -0
  147. package/dist/config/rubrics.ts +38 -2
  148. package/dist/grader/agent-harness.d.ts +9 -0
  149. package/dist/grader/agent-harness.js +9 -0
  150. package/dist/grader/common.d.ts +9 -0
  151. package/dist/grader/common.js +9 -0
  152. package/dist/grader/index.d.ts +24 -0
  153. package/dist/grader/index.js +24 -0
  154. package/dist/grader/knowledge-probe.d.ts +9 -0
  155. package/dist/grader/knowledge-probe.js +9 -0
  156. package/dist/grader/literacy.d.ts +9 -0
  157. package/dist/grader/literacy.js +9 -0
  158. package/dist/grader/mcp.d.ts +9 -0
  159. package/dist/grader/mcp.js +9 -0
  160. package/dist/orchestration/build-app-context.js +1 -0
  161. package/dist/orchestration/build-step-sequence.js +5 -0
  162. package/dist/orchestration/steps/calculate-scores-step.js +23 -1
  163. package/dist/orchestration/steps/compute-attribution-step.d.ts +44 -0
  164. package/dist/orchestration/steps/compute-attribution-step.js +279 -0
  165. package/dist/orchestration/steps/gap-analysis-step.js +35 -7
  166. package/dist/orchestration/steps/index.d.ts +1 -0
  167. package/dist/orchestration/steps/index.js +1 -0
  168. package/dist/pipeline/attribution.d.ts +15 -0
  169. package/dist/pipeline/attribution.js +18 -9
  170. package/dist/pipeline/borderline-consensus-runner.d.ts +63 -0
  171. package/dist/pipeline/borderline-consensus-runner.js +124 -0
  172. package/dist/pipeline/borderline-detector.d.ts +24 -0
  173. package/dist/pipeline/borderline-detector.js +26 -0
  174. package/dist/pipeline/calculate-scores.d.ts +114 -3
  175. package/dist/pipeline/calculate-scores.js +426 -24
  176. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  177. package/dist/pipeline/compiler/literacy-bridge.js +35 -17
  178. package/dist/pipeline/compiler/rubric-resolution.d.ts +15 -0
  179. package/dist/pipeline/compiler/rubric-resolution.js +9 -1
  180. package/dist/pipeline/compute-attribution.d.ts +80 -0
  181. package/dist/pipeline/compute-attribution.js +196 -0
  182. package/dist/pipeline/failure-modes.d.ts +52 -17
  183. package/dist/pipeline/failure-modes.js +178 -117
  184. package/dist/pipeline/map-request-to-config.js +1 -0
  185. package/package.json +7 -5
@@ -0,0 +1,126 @@
1
+ /**
2
+ * regression-vs-baseline card — LLM-driven run comparison card.
3
+ *
4
+ * Model: claude-opus-4-6 (high-stakes per AI-SPEC §4 model routing)
5
+ * Version: regression-vs-baseline@0.1.0
6
+ *
7
+ * DIAG-05: emits ONLY when `ctx.baseline` is set. When baseline is absent,
8
+ * returns `status: "missing", reason: "no --compare baseline supplied"`.
9
+ *
10
+ * Mitigations:
11
+ * - failure-mode #1: `buildRegressionVsBaselinePrompt` computes deltas in JS
12
+ * BEFORE the LLM call; schema refine asserts sign-consistency (R3)
13
+ *
14
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
15
+ * mandatory.
16
+ *
17
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §3 lines 603-664
18
+ */
19
+ import { z } from "zod";
20
+ import { modelId as mkModelId } from "../../../ports/llm-client.js";
21
+ import { buildRegressionVsBaselinePrompt } from "../prompt-builders.js";
22
+ // ---------------------------------------------------------------------------
23
+ // Helper: direction sign check
24
+ // ---------------------------------------------------------------------------
25
+ /**
26
+ * Map direction label to its expected sign. Returns the expected sign:
27
+ * +1 for improved, -1 for regressed, 0 for unchanged.
28
+ */
29
+ function directionSign(direction) {
30
+ if (direction === "improved")
31
+ return 1;
32
+ if (direction === "regressed")
33
+ return -1;
34
+ return 0;
35
+ }
36
+ // ---------------------------------------------------------------------------
37
+ // Body schema (D0045 trust boundary — satisfies required)
38
+ // ---------------------------------------------------------------------------
39
+ /**
40
+ * Module-level static shape only. Per-call adds:
41
+ * - sign-consistency refine (R3): Math.sign(pointsDelta) === directionSign(direction)
42
+ */
43
+ export const RegressionVsBaselineBodySchema = z.object({
44
+ summary: z.string().min(1).max(800),
45
+ deltas: z
46
+ .array(z.object({
47
+ area: z.string().min(1),
48
+ direction: z.enum(["improved", "regressed", "unchanged"]),
49
+ pointsDelta: z.number(),
50
+ drivers: z.array(z.string()),
51
+ }))
52
+ .max(10),
53
+ overallTrend: z.enum(["net-improved", "net-regressed", "mixed", "stable"]),
54
+ });
55
+ // ---------------------------------------------------------------------------
56
+ // Generator
57
+ // ---------------------------------------------------------------------------
58
+ const CARD_MODEL = mkModelId("anthropic:claude-opus-4-6");
59
+ export const generateRegressionVsBaseline = async (report, ctx) => {
60
+ // C1: no LLM → missing
61
+ if (!ctx.llm) {
62
+ return {
63
+ status: "missing",
64
+ cardType: "regression-vs-baseline",
65
+ reason: "no LLMClient wired",
66
+ };
67
+ }
68
+ // R1: DIAG-05 no-auto-comparison — only emits when baseline is supplied
69
+ if (!ctx.baseline) {
70
+ return {
71
+ status: "missing",
72
+ cardType: "regression-vs-baseline",
73
+ reason: "no --compare baseline supplied",
74
+ };
75
+ }
76
+ // Compute deltas in JS BEFORE the LLM call (failure-mode #1 mitigation).
77
+ // The prompt embeds the precomputed numbers so the LLM cannot fabricate deltas.
78
+ const { system, user } = buildRegressionVsBaselinePrompt(report, ctx.baseline);
79
+ // Per-call schema: additive R3 sign-consistency refine
80
+ // (AI-SPEC §3 Pitfall 1 — schema local to generator, not module scope)
81
+ const PerCallSchema = z
82
+ .object({
83
+ summary: z.string().min(1).max(800),
84
+ deltas: z
85
+ .array(z.object({
86
+ area: z.string().min(1),
87
+ direction: z.enum(["improved", "regressed", "unchanged"]),
88
+ pointsDelta: z.number(),
89
+ drivers: z.array(z.string()),
90
+ }))
91
+ .max(10),
92
+ overallTrend: z.enum([
93
+ "net-improved",
94
+ "net-regressed",
95
+ "mixed",
96
+ "stable",
97
+ ]),
98
+ })
99
+ // R3: sign-consistency guardrail — LLM must not invert direction vs delta
100
+ .refine((body) => body.deltas.every((d) => Math.sign(d.pointsDelta) === directionSign(d.direction)), {
101
+ message: "direction label must match sign of pointsDelta (improved=positive, regressed=negative, unchanged=zero)",
102
+ path: ["deltas"],
103
+ });
104
+ const { value, usage } = await ctx.llm.completeStructured({
105
+ model: CARD_MODEL,
106
+ prompt: `${system}\n\n${user}`,
107
+ schema: PerCallSchema,
108
+ temperature: 0.1,
109
+ maxTokens: 2000,
110
+ context: {
111
+ feature: "diagnosis",
112
+ runId: ctx.runId,
113
+ cardId: "regression-vs-baseline",
114
+ },
115
+ });
116
+ return {
117
+ status: "ready",
118
+ cardType: "regression-vs-baseline",
119
+ body: value,
120
+ meta: {
121
+ cardVersion: "regression-vs-baseline@0.1.0",
122
+ tokenUsage: { input: usage.promptTokens, output: usage.completionTokens },
123
+ generatedAt: new Date().toISOString(),
124
+ },
125
+ };
126
+ };
@@ -0,0 +1,41 @@
1
+ /**
2
+ * top-recommendations card — LLM-driven actionable suggestion generator.
3
+ *
4
+ * Model: claude-opus-4-6 (high-stakes per AI-SPEC §4 model routing)
5
+ * Version: top-recommendations@0.1.0
6
+ *
7
+ * Mitigations:
8
+ * - failure-mode #2: few-shot examples in system prompt; body ≥40 chars
9
+ * - failure-mode #5: docSlug refined against report manifest allow-list
10
+ *
11
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
12
+ * mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
13
+ *
14
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §3
15
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §4
16
+ */
17
+ import { z } from "zod";
18
+ import type { CardGenerator } from "../../diagnosis-runner.js";
19
+ /**
20
+ * Module-level schema: static shape check only.
21
+ * Per-call: an additive `.refine()` over the allow-list (built inside the
22
+ * generator so it closes over the runtime `report`).
23
+ *
24
+ * AI-SPEC §3 Pitfall 1: per-call schemas are LOCAL to the generator,
25
+ * NOT at module scope.
26
+ */
27
+ export declare const TopRecommendationsBodySchema: z.ZodObject<{
28
+ summary: z.ZodString;
29
+ suggestions: z.ZodArray<z.ZodObject<{
30
+ title: z.ZodString;
31
+ body: z.ZodString;
32
+ priority: z.ZodEnum<{
33
+ low: "low";
34
+ medium: "medium";
35
+ high: "high";
36
+ }>;
37
+ docSlug: z.ZodString;
38
+ sectionHeading: z.ZodNullable<z.ZodString>;
39
+ }, z.core.$strip>>;
40
+ }, z.core.$strip>;
41
+ export declare const generateTopRecommendations: CardGenerator;
@@ -0,0 +1,107 @@
1
+ /**
2
+ * top-recommendations card — LLM-driven actionable suggestion generator.
3
+ *
4
+ * Model: claude-opus-4-6 (high-stakes per AI-SPEC §4 model routing)
5
+ * Version: top-recommendations@0.1.0
6
+ *
7
+ * Mitigations:
8
+ * - failure-mode #2: few-shot examples in system prompt; body ≥40 chars
9
+ * - failure-mode #5: docSlug refined against report manifest allow-list
10
+ *
11
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
12
+ * mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
13
+ *
14
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §3
15
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §4
16
+ */
17
+ import { z } from "zod";
18
+ import { modelId as mkModelId } from "../../../ports/llm-client.js";
19
+ import { buildTopRecommendationsPrompt, buildDocSlugAllowList, } from "../prompt-builders.js";
20
+ // ---------------------------------------------------------------------------
21
+ // Body schema (D0045 trust boundary — satisfies required)
22
+ // ---------------------------------------------------------------------------
23
+ /**
24
+ * Module-level schema: static shape check only.
25
+ * Per-call: an additive `.refine()` over the allow-list (built inside the
26
+ * generator so it closes over the runtime `report`).
27
+ *
28
+ * AI-SPEC §3 Pitfall 1: per-call schemas are LOCAL to the generator,
29
+ * NOT at module scope.
30
+ */
31
+ export const TopRecommendationsBodySchema = z.object({
32
+ summary: z.string().min(1).max(500),
33
+ suggestions: z
34
+ .array(z.object({
35
+ title: z.string().min(1).max(200),
36
+ body: z
37
+ .string()
38
+ .min(40, "suggestion.body must be ≥40 characters")
39
+ .refine((b) => /`[^`]+`/.test(b), "suggestion.body must contain at least one backtick-delimited artifact"),
40
+ priority: z.enum(["high", "medium", "low"]),
41
+ docSlug: z.string().min(1),
42
+ sectionHeading: z.string().nullable(),
43
+ }))
44
+ .min(1)
45
+ .max(5),
46
+ });
47
+ // ---------------------------------------------------------------------------
48
+ // Generator
49
+ // ---------------------------------------------------------------------------
50
+ const CARD_MODEL = mkModelId("anthropic:claude-opus-4-6");
51
+ export const generateTopRecommendations = async (report, ctx) => {
52
+ // C1: no LLM → missing
53
+ if (!ctx.llm) {
54
+ return {
55
+ status: "missing",
56
+ cardType: "top-recommendations",
57
+ reason: "no LLMClient wired",
58
+ };
59
+ }
60
+ // Build allow-list from the runtime report
61
+ const allowList = buildDocSlugAllowList(report);
62
+ // Per-call schema: additive docSlug allow-list refine (AI-SPEC §3 Pitfall 1)
63
+ const PerCallSchema = z.object({
64
+ summary: z.string().min(1).max(500),
65
+ suggestions: z
66
+ .array(z.object({
67
+ title: z.string().min(1).max(200),
68
+ body: z
69
+ .string()
70
+ .min(40, "suggestion.body must be ≥40 characters")
71
+ .refine((b) => /`[^`]+`/.test(b), "suggestion.body must contain at least one backtick-delimited artifact"),
72
+ priority: z.enum(["high", "medium", "low"]),
73
+ docSlug: z
74
+ .string()
75
+ .min(1)
76
+ .refine((slug) => allowList.has(slug), {
77
+ message: "suggestion.docSlug is not in the report document manifest allow-list",
78
+ }),
79
+ sectionHeading: z.string().nullable(),
80
+ }))
81
+ .min(1)
82
+ .max(5),
83
+ });
84
+ const prompt = buildTopRecommendationsPrompt(report, allowList);
85
+ const { value, usage } = await ctx.llm.completeStructured({
86
+ model: CARD_MODEL,
87
+ prompt: `${prompt.system}\n\n${prompt.user}`,
88
+ schema: PerCallSchema,
89
+ temperature: 0.1,
90
+ maxTokens: 2000,
91
+ context: {
92
+ feature: "diagnosis",
93
+ runId: ctx.runId,
94
+ cardId: "top-recommendations",
95
+ },
96
+ });
97
+ return {
98
+ status: "ready",
99
+ cardType: "top-recommendations",
100
+ body: value,
101
+ meta: {
102
+ cardVersion: "top-recommendations@0.1.0",
103
+ tokenUsage: { input: usage.promptTokens, output: usage.completionTokens },
104
+ generatedAt: new Date().toISOString(),
105
+ },
106
+ };
107
+ };
@@ -0,0 +1,43 @@
1
+ /**
2
+ * weakest-area card — LLM-driven weakest area identification.
3
+ *
4
+ * Model: claude-sonnet-4-6 (routine per AI-SPEC §4 model routing)
5
+ * Version: weakest-area@0.1.0
6
+ *
7
+ * Mitigations:
8
+ * - failure-mode #3: sampleSize = report.areas[area].judgmentCount (per-call refine W2)
9
+ * - failure-mode #3: sampleSize < 10 → confidence.level must be "low" (per-call refine W3)
10
+ * - failure-mode #4: taxonomy drift → buildFailureModeRefinement() Zod predicate
11
+ *
12
+ * Per-call schemas live INSIDE the generator (AI-SPEC §3 Pitfall 1 — no
13
+ * module-scope mutables that change across calls).
14
+ *
15
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
16
+ * mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
17
+ *
18
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §3
19
+ */
20
+ import { z } from "zod";
21
+ import type { CardGenerator } from "../../diagnosis-runner.js";
22
+ /**
23
+ * Module-level schema asserts the static shape + taxonomy constraint (D-05).
24
+ * Per-call schemas add the sampleSize (W2) and small-sample confidence (W3)
25
+ * refinements inside the generator, since they need runtime `report` data.
26
+ */
27
+ export declare const WeakestAreaBodySchema: z.ZodObject<{
28
+ summary: z.ZodString;
29
+ area: z.ZodString;
30
+ dimension: z.ZodString;
31
+ failureMode: z.ZodString;
32
+ sampleSize: z.ZodNumber;
33
+ confidence: z.ZodObject<{
34
+ level: z.ZodEnum<{
35
+ low: "low";
36
+ medium: "medium";
37
+ high: "high";
38
+ }>;
39
+ signalsPresent: z.ZodNumber;
40
+ derivation: z.ZodString;
41
+ }, z.core.$strip>;
42
+ }, z.core.$strip>;
43
+ export declare const generateWeakestArea: CardGenerator;
@@ -0,0 +1,114 @@
1
+ /**
2
+ * weakest-area card — LLM-driven weakest area identification.
3
+ *
4
+ * Model: claude-sonnet-4-6 (routine per AI-SPEC §4 model routing)
5
+ * Version: weakest-area@0.1.0
6
+ *
7
+ * Mitigations:
8
+ * - failure-mode #3: sampleSize = report.areas[area].judgmentCount (per-call refine W2)
9
+ * - failure-mode #3: sampleSize < 10 → confidence.level must be "low" (per-call refine W3)
10
+ * - failure-mode #4: taxonomy drift → buildFailureModeRefinement() Zod predicate
11
+ *
12
+ * Per-call schemas live INSIDE the generator (AI-SPEC §3 Pitfall 1 — no
13
+ * module-scope mutables that change across calls).
14
+ *
15
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
16
+ * mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
17
+ *
18
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §3
19
+ */
20
+ import { z } from "zod";
21
+ import { ConfidenceSchema } from "../../../schemas/confidence-schema.js";
22
+ import { modelId as mkModelId } from "../../../ports/llm-client.js";
23
+ import { buildFailureModeRefinement } from "../card-validators.js";
24
+ import { buildWeakestAreaPrompt } from "../prompt-builders.js";
25
+ // ---------------------------------------------------------------------------
26
+ // Module-level schema: static shape + D-05 taxonomy refine
27
+ // ---------------------------------------------------------------------------
28
+ /**
29
+ * Module-level schema asserts the static shape + taxonomy constraint (D-05).
30
+ * Per-call schemas add the sampleSize (W2) and small-sample confidence (W3)
31
+ * refinements inside the generator, since they need runtime `report` data.
32
+ */
33
+ export const WeakestAreaBodySchema = z
34
+ .object({
35
+ summary: z.string().min(1).max(800),
36
+ area: z.string().min(1),
37
+ dimension: z.string().min(1),
38
+ failureMode: z.string().min(1),
39
+ sampleSize: z.number().int().nonnegative(),
40
+ confidence: ConfidenceSchema,
41
+ })
42
+ .refine(buildFailureModeRefinement(), {
43
+ message: "failureMode is not in the canonical taxonomy for this dimension",
44
+ path: ["failureMode"],
45
+ });
46
+ // ---------------------------------------------------------------------------
47
+ // Generator
48
+ // ---------------------------------------------------------------------------
49
+ const CARD_MODEL = mkModelId("anthropic:claude-sonnet-4-6");
50
+ export const generateWeakestArea = async (report, ctx) => {
51
+ // C1: no LLM → missing
52
+ if (!ctx.llm) {
53
+ return {
54
+ status: "missing",
55
+ cardType: "weakest-area",
56
+ reason: "no LLMClient wired",
57
+ };
58
+ }
59
+ const scores = report.summary.scores ?? [];
60
+ if (scores.length === 0) {
61
+ return {
62
+ status: "missing",
63
+ cardType: "weakest-area",
64
+ reason: "report has no areas",
65
+ };
66
+ }
67
+ // Per-call schema: close over report to get sampleSize + confidence constraints
68
+ // (AI-SPEC §3 Pitfall 1 — never hoist to module scope)
69
+ const PerCallSchema = z
70
+ .object({
71
+ summary: z.string().min(1).max(800),
72
+ area: z.string().min(1),
73
+ dimension: z.string().min(1),
74
+ failureMode: z.string().min(1),
75
+ sampleSize: z.number().int().nonnegative(),
76
+ confidence: ConfidenceSchema,
77
+ })
78
+ .refine(buildFailureModeRefinement(), {
79
+ message: "failureMode is not in the canonical taxonomy for this dimension",
80
+ path: ["failureMode"],
81
+ })
82
+ // W3: small-sample forces confidence.level = "low"
83
+ .refine((body) => {
84
+ if (body.sampleSize < 10)
85
+ return body.confidence.level === "low";
86
+ return true;
87
+ }, {
88
+ message: 'When sampleSize < 10, confidence.level must be "low"',
89
+ path: ["confidence", "level"],
90
+ });
91
+ const prompt = buildWeakestAreaPrompt(report);
92
+ const { value, usage } = await ctx.llm.completeStructured({
93
+ model: CARD_MODEL,
94
+ prompt: `${prompt.system}\n\n${prompt.user}`,
95
+ schema: PerCallSchema,
96
+ temperature: 0.1,
97
+ maxTokens: 2000,
98
+ context: {
99
+ feature: "diagnosis",
100
+ runId: ctx.runId,
101
+ cardId: "weakest-area",
102
+ },
103
+ });
104
+ return {
105
+ status: "ready",
106
+ cardType: "weakest-area",
107
+ body: value,
108
+ meta: {
109
+ cardVersion: "weakest-area@0.1.0",
110
+ tokenUsage: { input: usage.promptTokens, output: usage.completionTokens },
111
+ generatedAt: new Date().toISOString(),
112
+ },
113
+ };
114
+ };
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Deterministic prompt-builder helpers for the 5 LLM diagnosis cards.
3
+ *
4
+ * Each function takes typed inputs and returns `{ system, user }` strings
5
+ * (+ a `deltas` side-channel for regression-vs-baseline). The same inputs
6
+ * always produce the same output — no randomness, no side effects.
7
+ *
8
+ * Per AI-SPEC §4: input tokens are bounded by truncation; the user message
9
+ * projects only the fields each card needs.
10
+ *
11
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §4
12
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §3 lines 603-664
13
+ */
14
+ import type { JudgmentAttribution } from "../../types/attribution.js";
15
+ import type { Report } from "../../types/index.js";
16
+ /**
17
+ * Build the allow-list of doc slugs for a report. This is the union of:
18
+ * - report.summary.documentManifest[].slug
19
+ * - per-score documents[].slug
20
+ *
21
+ * Only slugs (not documentIds) appear in the allow-list because the cards
22
+ * reference human-readable slugs. DocumentId-only entries are skipped.
23
+ */
24
+ export declare function buildDocSlugAllowList(report: Report): Set<string>;
25
+ /**
26
+ * Projects the 3 weakest areas + top failure modes + doc-slug allow-list
27
+ * into the user message (~2500 input tokens).
28
+ */
29
+ export declare function buildTopRecommendationsPrompt(report: Report, allowList: Set<string>): {
30
+ system: string;
31
+ user: string;
32
+ };
33
+ /**
34
+ * Projects the single weakest area + full failure-mode breakdown.
35
+ */
36
+ export declare function buildWeakestAreaPrompt(report: Report): {
37
+ system: string;
38
+ user: string;
39
+ };
40
+ /**
41
+ * Filters to low-confidence entries (or top-N if none low), produces a table
42
+ * for the LLM (~2000 input tokens).
43
+ *
44
+ * Caller short-circuits on empty `judgmentAttributions` BEFORE calling this.
45
+ */
46
+ export declare function buildLowConfidenceAttributionPrompt(report: Report, judgmentAttributions: JudgmentAttribution[]): {
47
+ system: string;
48
+ user: string;
49
+ };
50
+ /**
51
+ * Aggregates attributions by documentId, picks top-5 by aggregate score,
52
+ * emits a table for the LLM.
53
+ *
54
+ * Caller short-circuits on empty `judgmentAttributions` BEFORE calling this.
55
+ */
56
+ export declare function buildDocAttributionSpotlightPrompt(report: Report, judgmentAttributions: JudgmentAttribution[]): {
57
+ system: string;
58
+ user: string;
59
+ };
60
+ /**
61
+ * Computes per-area deltas in JS (failure-mode #1 mitigation), then builds
62
+ * the user message. Returns `deltas` as a side-channel for the per-call
63
+ * schema `.refine()`.
64
+ */
65
+ export declare function buildRegressionVsBaselinePrompt(report: Report, baseline: Report): {
66
+ system: string;
67
+ user: string;
68
+ deltas: {
69
+ area: string;
70
+ pointsDelta: number;
71
+ }[];
72
+ };