@sanity/ailf 5.0.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/config/diagnosis-cards.ts +318 -0
  2. package/config/models.ts +12 -0
  3. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.d.ts +13 -0
  4. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.js +16 -0
  5. package/dist/_vendor/ailf-core/grader/failure-modes/common.d.ts +14 -0
  6. package/dist/_vendor/ailf-core/grader/failure-modes/common.js +18 -0
  7. package/dist/_vendor/ailf-core/grader/failure-modes/index.d.ts +45 -0
  8. package/dist/_vendor/ailf-core/grader/failure-modes/index.js +109 -0
  9. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.d.ts +13 -0
  10. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.js +17 -0
  11. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.d.ts +13 -0
  12. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.js +17 -0
  13. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.d.ts +13 -0
  14. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.js +17 -0
  15. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  16. package/dist/_vendor/ailf-core/index.js +4 -0
  17. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.d.ts +41 -0
  18. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.js +40 -0
  19. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.d.ts +7 -0
  20. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.js +131 -0
  21. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.d.ts +7 -0
  22. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.js +171 -0
  23. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.d.ts +7 -0
  24. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.js +155 -0
  25. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.d.ts +17 -0
  26. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.js +43 -0
  27. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.d.ts +46 -0
  28. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.js +104 -0
  29. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.d.ts +28 -0
  30. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.js +96 -0
  31. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.d.ts +39 -0
  32. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.js +52 -0
  33. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.d.ts +27 -0
  34. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.js +77 -0
  35. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.d.ts +32 -0
  36. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.js +71 -0
  37. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.d.ts +44 -0
  38. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.js +126 -0
  39. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.d.ts +41 -0
  40. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +107 -0
  41. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.d.ts +43 -0
  42. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.js +114 -0
  43. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.d.ts +72 -0
  44. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.js +273 -0
  45. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.d.ts +17 -0
  46. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.js +58 -0
  47. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.d.ts +10 -0
  48. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.js +10 -0
  49. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.d.ts +15 -0
  50. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.js +53 -0
  51. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.d.ts +14 -0
  52. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.js +63 -0
  53. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.d.ts +16 -0
  54. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.js +78 -0
  55. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.d.ts +16 -0
  56. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.js +86 -0
  57. package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +10 -0
  58. package/dist/_vendor/ailf-core/services/diagnosis/registry.js +10 -0
  59. package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +119 -2
  60. package/dist/_vendor/ailf-core/services/diagnosis-runner.js +136 -2
  61. package/dist/_vendor/ailf-core/services/index.d.ts +5 -1
  62. package/dist/_vendor/ailf-core/services/index.js +15 -2
  63. package/dist/_vendor/ailf-core/services/llm-client-factory.d.ts +64 -0
  64. package/dist/_vendor/ailf-core/services/llm-client-factory.js +54 -0
  65. package/dist/_vendor/ailf-core/types/diagnosis.d.ts +112 -10
  66. package/dist/_vendor/ailf-core/types/diagnosis.js +3 -1
  67. package/dist/_vendor/ailf-core/types/index.d.ts +1 -1
  68. package/dist/adapters/llm/fake-llm-client.d.ts +20 -0
  69. package/dist/adapters/llm/fake-llm-client.js +38 -1
  70. package/dist/adapters/llm/openai-llm-client.js +52 -3
  71. package/dist/cli-program.js +3 -0
  72. package/dist/commands/interpret.d.ts +50 -0
  73. package/dist/commands/interpret.js +212 -0
  74. package/dist/composition-root.d.ts +21 -23
  75. package/dist/composition-root.js +107 -41
  76. package/dist/config/diagnosis-cards.ts +318 -0
  77. package/dist/config/models.ts +12 -0
  78. package/dist/grader/agent-harness.d.ts +5 -10
  79. package/dist/grader/agent-harness.js +5 -13
  80. package/dist/grader/common.d.ts +5 -13
  81. package/dist/grader/common.js +5 -17
  82. package/dist/grader/index.d.ts +15 -29
  83. package/dist/grader/index.js +15 -66
  84. package/dist/grader/knowledge-probe.d.ts +5 -10
  85. package/dist/grader/knowledge-probe.js +5 -14
  86. package/dist/grader/literacy.d.ts +5 -9
  87. package/dist/grader/literacy.js +5 -13
  88. package/dist/grader/mcp.d.ts +5 -10
  89. package/dist/grader/mcp.js +5 -14
  90. package/package.json +2 -2
@@ -0,0 +1,46 @@
1
+ /**
2
+ * doc-attribution-spotlight card — LLM-driven doc-citation spotlight.
3
+ *
4
+ * Model: claude-sonnet-4-6 (routine per AI-SPEC §4 model routing)
5
+ * Version: doc-attribution-spotlight@0.1.0
6
+ *
7
+ * Landmine 11: reads `ctx.judgmentAttributions` (NOT Report.summary).
8
+ * Returns `status: "missing"` when attributions are undefined or empty.
9
+ *
10
+ * Mitigations:
11
+ * - failure-mode #5: docCitations[].docSlug refined against the manifest
12
+ * allow-list so hallucinated slugs fail Zod parse
13
+ *
14
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
15
+ * mandatory.
16
+ *
17
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-RESEARCH.md (Landmine 11)
18
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
19
+ */
20
+ import { z } from "zod";
21
+ import type { CardGenerator } from "../../diagnosis-runner.js";
22
+ /**
23
+ * Module-level static shape. Per-call adds the allow-list refine on docSlug.
24
+ */
25
+ export declare const DocAttributionSpotlightBodySchema: z.ZodObject<{
26
+ summary: z.ZodString;
27
+ docCitations: z.ZodArray<z.ZodObject<{
28
+ docSlug: z.ZodString;
29
+ confidence: z.ZodObject<{
30
+ level: z.ZodEnum<{
31
+ low: "low";
32
+ medium: "medium";
33
+ high: "high";
34
+ }>;
35
+ signalsPresent: z.ZodNumber;
36
+ derivation: z.ZodString;
37
+ }, z.core.$strip>;
38
+ role: z.ZodEnum<{
39
+ missing: "missing";
40
+ supports: "supports";
41
+ contradicts: "contradicts";
42
+ irrelevant: "irrelevant";
43
+ }>;
44
+ }, z.core.$strip>>;
45
+ }, z.core.$strip>;
46
+ export declare const generateDocAttributionSpotlight: CardGenerator;
@@ -0,0 +1,104 @@
1
+ /**
2
+ * doc-attribution-spotlight card — LLM-driven doc-citation spotlight.
3
+ *
4
+ * Model: claude-sonnet-4-6 (routine per AI-SPEC §4 model routing)
5
+ * Version: doc-attribution-spotlight@0.1.0
6
+ *
7
+ * Landmine 11: reads `ctx.judgmentAttributions` (NOT Report.summary).
8
+ * Returns `status: "missing"` when attributions are undefined or empty.
9
+ *
10
+ * Mitigations:
11
+ * - failure-mode #5: docCitations[].docSlug refined against the manifest
12
+ * allow-list so hallucinated slugs fail Zod parse
13
+ *
14
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
15
+ * mandatory.
16
+ *
17
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-RESEARCH.md (Landmine 11)
18
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
19
+ */
20
+ import { z } from "zod";
21
+ import { ConfidenceSchema } from "../../../schemas/confidence-schema.js";
22
+ import { modelId as mkModelId } from "../../../ports/llm-client.js";
23
+ import { buildDocAttributionSpotlightPrompt, buildDocSlugAllowList, } from "../prompt-builders.js";
24
+ // ---------------------------------------------------------------------------
25
+ // Body schema (D0045 trust boundary — satisfies required)
26
+ // ---------------------------------------------------------------------------
27
+ /**
28
+ * Module-level static shape. Per-call adds the allow-list refine on docSlug.
29
+ */
30
+ export const DocAttributionSpotlightBodySchema = z.object({
31
+ summary: z.string().min(1).max(800),
32
+ docCitations: z
33
+ .array(z.object({
34
+ docSlug: z.string().min(1),
35
+ confidence: ConfidenceSchema,
36
+ role: z.enum(["supports", "contradicts", "missing", "irrelevant"]),
37
+ }))
38
+ .min(1)
39
+ .max(5),
40
+ });
41
+ // ---------------------------------------------------------------------------
42
+ // Generator
43
+ // ---------------------------------------------------------------------------
44
+ const CARD_MODEL = mkModelId("anthropic:claude-sonnet-4-6");
45
+ export const generateDocAttributionSpotlight = async (report, ctx) => {
46
+ // C1: no LLM → missing
47
+ if (!ctx.llm) {
48
+ return {
49
+ status: "missing",
50
+ cardType: "doc-attribution-spotlight",
51
+ reason: "no LLMClient wired",
52
+ };
53
+ }
54
+ // D1: Landmine 11 — short-circuit BEFORE calling LLM when no attribution data
55
+ if (!ctx.judgmentAttributions || ctx.judgmentAttributions.length === 0) {
56
+ return {
57
+ status: "missing",
58
+ cardType: "doc-attribution-spotlight",
59
+ reason: "no attribution data for this run",
60
+ };
61
+ }
62
+ // Build allow-list from the runtime report
63
+ const allowList = buildDocSlugAllowList(report);
64
+ // Per-call schema with docSlug allow-list refine (AI-SPEC §3 Pitfall 1)
65
+ const PerCallSchema = z.object({
66
+ summary: z.string().min(1).max(800),
67
+ docCitations: z
68
+ .array(z.object({
69
+ docSlug: z
70
+ .string()
71
+ .min(1)
72
+ .refine((slug) => allowList.has(slug), {
73
+ message: "docCitations[].docSlug is not in the report document manifest allow-list",
74
+ }),
75
+ confidence: ConfidenceSchema,
76
+ role: z.enum(["supports", "contradicts", "missing", "irrelevant"]),
77
+ }))
78
+ .min(1)
79
+ .max(5),
80
+ });
81
+ const prompt = buildDocAttributionSpotlightPrompt(report, ctx.judgmentAttributions);
82
+ const { value, usage } = await ctx.llm.completeStructured({
83
+ model: CARD_MODEL,
84
+ prompt: `${prompt.system}\n\n${prompt.user}`,
85
+ schema: PerCallSchema,
86
+ temperature: 0.1,
87
+ maxTokens: 2000,
88
+ context: {
89
+ feature: "diagnosis",
90
+ runId: ctx.runId,
91
+ cardId: "doc-attribution-spotlight",
92
+ },
93
+ });
94
+ return {
95
+ status: "ready",
96
+ cardType: "doc-attribution-spotlight",
97
+ body: value,
98
+ meta: {
99
+ cardVersion: "doc-attribution-spotlight@0.1.0",
100
+ tokenUsage: { input: usage.promptTokens, output: usage.completionTokens },
101
+ generatedAt: new Date().toISOString(),
102
+ },
103
+ };
104
+ };
@@ -0,0 +1,28 @@
1
+ /**
2
+ * failure-mode-summary card — deterministic projection over Report.summary.failureModes.
3
+ *
4
+ * Pure computation, no LLM call. Identifies the dominant failure mode
5
+ * across all dimensions in the report's slim failure-mode summary.
6
+ *
7
+ * D-05: `.refine(buildFailureModeRefinement())` rejects cross-dimension
8
+ * (dimension, failureMode) pairs that the schema otherwise would accept —
9
+ * turning a "Zod-passes, semantically wrong" LLM output into a
10
+ * `parseFailed: true` degraded card. For this deterministic card, the
11
+ * refinement also defends against bad Report data.
12
+ *
13
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
14
+ * mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
15
+ *
16
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-CONTEXT.md (D-05)
17
+ * @see packages/core/src/services/diagnosis/card-validators.ts
18
+ */
19
+ import { z } from "zod";
20
+ import type { CardGenerator } from "../../diagnosis-runner.js";
21
+ export declare const FailureModeSummaryBodySchema: z.ZodObject<{
22
+ summary: z.ZodString;
23
+ dimension: z.ZodString;
24
+ failureMode: z.ZodString;
25
+ count: z.ZodNumber;
26
+ sampleSize: z.ZodNumber;
27
+ }, z.core.$strip>;
28
+ export declare const generateFailureModeSummary: CardGenerator;
@@ -0,0 +1,96 @@
1
+ /**
2
+ * failure-mode-summary card — deterministic projection over Report.summary.failureModes.
3
+ *
4
+ * Pure computation, no LLM call. Identifies the dominant failure mode
5
+ * across all dimensions in the report's slim failure-mode summary.
6
+ *
7
+ * D-05: `.refine(buildFailureModeRefinement())` rejects cross-dimension
8
+ * (dimension, failureMode) pairs that the schema otherwise would accept —
9
+ * turning a "Zod-passes, semantically wrong" LLM output into a
10
+ * `parseFailed: true` degraded card. For this deterministic card, the
11
+ * refinement also defends against bad Report data.
12
+ *
13
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
14
+ * mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
15
+ *
16
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-CONTEXT.md (D-05)
17
+ * @see packages/core/src/services/diagnosis/card-validators.ts
18
+ */
19
+ import { z } from "zod";
20
+ import { CANONICAL_DIMENSIONS, failureModesForDimension, } from "../../../grader/failure-modes/index.js";
21
+ import { buildFailureModeRefinement } from "../card-validators.js";
22
+ // ---------------------------------------------------------------------------
23
+ // Body schema (D0045 trust boundary — satisfies required; D-05 refine)
24
+ // ---------------------------------------------------------------------------
25
+ export const FailureModeSummaryBodySchema = z
26
+ .object({
27
+ summary: z.string().min(1).max(800),
28
+ dimension: z.string().min(1),
29
+ failureMode: z.string().min(1),
30
+ count: z.number().int().nonnegative(),
31
+ sampleSize: z.number().int().nonnegative(),
32
+ })
33
+ .refine(buildFailureModeRefinement(), {
34
+ message: "failureMode is not in the canonical taxonomy for this dimension",
35
+ path: ["failureMode"],
36
+ });
37
+ // ---------------------------------------------------------------------------
38
+ // Private helper — find the dimension a failure mode belongs to
39
+ // ---------------------------------------------------------------------------
40
+ /**
41
+ * Find the first canonical dimension whose taxonomy includes `mode`.
42
+ * Returns `undefined` if the mode is not in any dimension's taxonomy.
43
+ */
44
+ function findDimensionForMode(mode) {
45
+ for (const dim of CANONICAL_DIMENSIONS) {
46
+ if (failureModesForDimension(dim).includes(mode)) {
47
+ return dim;
48
+ }
49
+ }
50
+ return undefined;
51
+ }
52
+ // ---------------------------------------------------------------------------
53
+ // Generator
54
+ // ---------------------------------------------------------------------------
55
+ export const generateFailureModeSummary = async (report) => {
56
+ const slimFm = report.summary.failureModes;
57
+ if (!slimFm ||
58
+ !slimFm.topTitles ||
59
+ slimFm.topTitles.length === 0 ||
60
+ slimFm.totalJudgments === 0) {
61
+ return {
62
+ status: "missing",
63
+ cardType: "failure-mode-summary",
64
+ reason: "report has no failure modes",
65
+ };
66
+ }
67
+ // Find the top entry — topTitles is already sorted by count descending
68
+ const topEntry = slimFm.topTitles.reduce((best, entry) => (entry.count > best.count ? entry : best), slimFm.topTitles[0]);
69
+ const failureMode = topEntry.category;
70
+ const dimension = findDimensionForMode(failureMode);
71
+ if (!dimension) {
72
+ return {
73
+ status: "missing",
74
+ cardType: "failure-mode-summary",
75
+ reason: `failure mode "${failureMode}" is not in the canonical taxonomy`,
76
+ };
77
+ }
78
+ const sampleSize = slimFm.totalJudgments;
79
+ const summary = `The most frequent failure mode is "${failureMode}" (${topEntry.count} of ${sampleSize} judgments in dimension "${dimension}").`;
80
+ const body = FailureModeSummaryBodySchema.parse({
81
+ summary,
82
+ dimension,
83
+ failureMode,
84
+ count: topEntry.count,
85
+ sampleSize,
86
+ });
87
+ return {
88
+ status: "ready",
89
+ cardType: "failure-mode-summary",
90
+ body,
91
+ meta: {
92
+ cardVersion: "failure-mode-summary@0.1.0",
93
+ generatedAt: new Date().toISOString(),
94
+ },
95
+ };
96
+ };
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Card-generator barrel — exports all 8 generators + DIAGNOSIS_CARD_GENERATORS.
3
+ *
4
+ * This barrel lives in @sanity/ailf-core so the Plan-06 API route can import
5
+ * `DIAGNOSIS_CARD_GENERATORS` via `import { DIAGNOSIS_CARD_GENERATORS } from
6
+ * "@sanity/ailf-core"` without depending on `@sanity/ailf` (D-01 boundary).
7
+ *
8
+ * This is a CARD-GENERATOR barrel only — no vendor SDK classes, no adapter
9
+ * implementations. Cards speak the `LLMClient` port exclusively (D0051).
10
+ *
11
+ * TypeScript exhaustiveness: `CardType` is a 8-element literal union; the
12
+ * `Record<CardType, CardGenerator>` annotation causes a build error if any
13
+ * key is missing or extra.
14
+ *
15
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-CONTEXT.md (D-01)
16
+ * @see docs/decisions/D0051-llm-client-port.md
17
+ */
18
+ import { generateAreaSummary } from "./area-summary.js";
19
+ import { generateFailureModeSummary } from "./failure-mode-summary.js";
20
+ import { generateNoIssues } from "./no-issues.js";
21
+ import { generateTopRecommendations } from "./top-recommendations.js";
22
+ import { generateWeakestArea } from "./weakest-area.js";
23
+ import { generateLowConfidenceAttribution } from "./low-confidence-attribution.js";
24
+ import { generateDocAttributionSpotlight } from "./doc-attribution-spotlight.js";
25
+ import { generateRegressionVsBaseline } from "./regression-vs-baseline.js";
26
+ import type { CardGenerator } from "../../diagnosis-runner.js";
27
+ import type { CardType } from "../../../types/diagnosis.js";
28
+ /**
29
+ * The canonical card-generator registry for the diagnosis engine.
30
+ *
31
+ * `Readonly<Record<CardType, CardGenerator>>` — TypeScript exhaustiveness
32
+ * ensures all 8 `CardType` strings appear (no rogue keys, no missing keys).
33
+ * The composition root (`packages/eval/src/composition-root.ts`) passes this
34
+ * directly into `createDiagnosisRunner(deps)`.
35
+ *
36
+ * Also consumed by the Plan-06 API route, which imports via `@sanity/ailf-core`.
37
+ */
38
+ export declare const DIAGNOSIS_CARD_GENERATORS: Readonly<Record<CardType, CardGenerator>>;
39
+ export { generateAreaSummary, generateFailureModeSummary, generateNoIssues, generateTopRecommendations, generateWeakestArea, generateLowConfidenceAttribution, generateDocAttributionSpotlight, generateRegressionVsBaseline, };
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Card-generator barrel — exports all 8 generators + DIAGNOSIS_CARD_GENERATORS.
3
+ *
4
+ * This barrel lives in @sanity/ailf-core so the Plan-06 API route can import
5
+ * `DIAGNOSIS_CARD_GENERATORS` via `import { DIAGNOSIS_CARD_GENERATORS } from
6
+ * "@sanity/ailf-core"` without depending on `@sanity/ailf` (D-01 boundary).
7
+ *
8
+ * This is a CARD-GENERATOR barrel only — no vendor SDK classes, no adapter
9
+ * implementations. Cards speak the `LLMClient` port exclusively (D0051).
10
+ *
11
+ * TypeScript exhaustiveness: `CardType` is a 8-element literal union; the
12
+ * `Record<CardType, CardGenerator>` annotation causes a build error if any
13
+ * key is missing or extra.
14
+ *
15
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-CONTEXT.md (D-01)
16
+ * @see docs/decisions/D0051-llm-client-port.md
17
+ */
18
+ import { generateAreaSummary } from "./area-summary.js";
19
+ import { generateFailureModeSummary } from "./failure-mode-summary.js";
20
+ import { generateNoIssues } from "./no-issues.js";
21
+ import { generateTopRecommendations } from "./top-recommendations.js";
22
+ import { generateWeakestArea } from "./weakest-area.js";
23
+ import { generateLowConfidenceAttribution } from "./low-confidence-attribution.js";
24
+ import { generateDocAttributionSpotlight } from "./doc-attribution-spotlight.js";
25
+ import { generateRegressionVsBaseline } from "./regression-vs-baseline.js";
26
+ // ---------------------------------------------------------------------------
27
+ // DIAGNOSIS_CARD_GENERATORS — full 8-card registry literal
28
+ // ---------------------------------------------------------------------------
29
+ /**
30
+ * The canonical card-generator registry for the diagnosis engine.
31
+ *
32
+ * `Readonly<Record<CardType, CardGenerator>>` — TypeScript exhaustiveness
33
+ * ensures all 8 `CardType` strings appear (no rogue keys, no missing keys).
34
+ * The composition root (`packages/eval/src/composition-root.ts`) passes this
35
+ * directly into `createDiagnosisRunner(deps)`.
36
+ *
37
+ * Also consumed by the Plan-06 API route, which imports via `@sanity/ailf-core`.
38
+ */
39
+ export const DIAGNOSIS_CARD_GENERATORS = {
40
+ "area-summary": generateAreaSummary,
41
+ "failure-mode-summary": generateFailureModeSummary,
42
+ "no-issues": generateNoIssues,
43
+ "top-recommendations": generateTopRecommendations,
44
+ "weakest-area": generateWeakestArea,
45
+ "low-confidence-attribution": generateLowConfidenceAttribution,
46
+ "doc-attribution-spotlight": generateDocAttributionSpotlight,
47
+ "regression-vs-baseline": generateRegressionVsBaseline,
48
+ };
49
+ // ---------------------------------------------------------------------------
50
+ // Individual re-exports (for callers that want a single generator)
51
+ // ---------------------------------------------------------------------------
52
+ export { generateAreaSummary, generateFailureModeSummary, generateNoIssues, generateTopRecommendations, generateWeakestArea, generateLowConfidenceAttribution, generateDocAttributionSpotlight, generateRegressionVsBaseline, };
@@ -0,0 +1,27 @@
1
+ /**
2
+ * low-confidence-attribution card — LLM-driven uncertain-attribution finder.
3
+ *
4
+ * Model: claude-sonnet-4-6 (routine per AI-SPEC §4 model routing)
5
+ * Version: low-confidence-attribution@0.1.0
6
+ *
7
+ * Landmine 11: this card reads `ctx.judgmentAttributions` (NOT Report.summary).
8
+ * Returns `status: "missing"` when attributions are undefined or empty BEFORE
9
+ * calling the LLM — this is a structural seam, not an error path.
10
+ *
11
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
12
+ * mandatory.
13
+ *
14
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-RESEARCH.md (Landmine 11)
15
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
16
+ */
17
+ import { z } from "zod";
18
+ import type { CardGenerator } from "../../diagnosis-runner.js";
19
+ export declare const LowConfidenceAttributionBodySchema: z.ZodObject<{
20
+ summary: z.ZodString;
21
+ judgmentRefs: z.ZodArray<z.ZodObject<{
22
+ taskId: z.ZodString;
23
+ modelId: z.ZodString;
24
+ dimension: z.ZodString;
25
+ }, z.core.$strip>>;
26
+ }, z.core.$strip>;
27
+ export declare const generateLowConfidenceAttribution: CardGenerator;
@@ -0,0 +1,77 @@
1
+ /**
2
+ * low-confidence-attribution card — LLM-driven uncertain-attribution finder.
3
+ *
4
+ * Model: claude-sonnet-4-6 (routine per AI-SPEC §4 model routing)
5
+ * Version: low-confidence-attribution@0.1.0
6
+ *
7
+ * Landmine 11: this card reads `ctx.judgmentAttributions` (NOT Report.summary).
8
+ * Returns `status: "missing"` when attributions are undefined or empty BEFORE
9
+ * calling the LLM — this is a structural seam, not an error path.
10
+ *
11
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
12
+ * mandatory.
13
+ *
14
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-RESEARCH.md (Landmine 11)
15
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
16
+ */
17
+ import { z } from "zod";
18
+ import { modelId as mkModelId } from "../../../ports/llm-client.js";
19
+ import { buildLowConfidenceAttributionPrompt } from "../prompt-builders.js";
20
+ // ---------------------------------------------------------------------------
21
+ // Body schema (D0045 trust boundary — satisfies required)
22
+ // ---------------------------------------------------------------------------
23
+ export const LowConfidenceAttributionBodySchema = z.object({
24
+ summary: z.string().min(1).max(800),
25
+ judgmentRefs: z
26
+ .array(z.object({
27
+ taskId: z.string().min(1),
28
+ modelId: z.string().min(1),
29
+ dimension: z.string().min(1),
30
+ }))
31
+ .min(1),
32
+ });
33
+ // ---------------------------------------------------------------------------
34
+ // Generator
35
+ // ---------------------------------------------------------------------------
36
+ const CARD_MODEL = mkModelId("anthropic:claude-sonnet-4-6");
37
+ export const generateLowConfidenceAttribution = async (report, ctx) => {
38
+ // C1: no LLM → missing
39
+ if (!ctx.llm) {
40
+ return {
41
+ status: "missing",
42
+ cardType: "low-confidence-attribution",
43
+ reason: "no LLMClient wired",
44
+ };
45
+ }
46
+ // L1: Landmine 11 — short-circuit BEFORE calling LLM when no attribution data
47
+ if (!ctx.judgmentAttributions || ctx.judgmentAttributions.length === 0) {
48
+ return {
49
+ status: "missing",
50
+ cardType: "low-confidence-attribution",
51
+ reason: "no attribution data for this run",
52
+ };
53
+ }
54
+ const prompt = buildLowConfidenceAttributionPrompt(report, ctx.judgmentAttributions);
55
+ const { value, usage } = await ctx.llm.completeStructured({
56
+ model: CARD_MODEL,
57
+ prompt: `${prompt.system}\n\n${prompt.user}`,
58
+ schema: LowConfidenceAttributionBodySchema,
59
+ temperature: 0.1,
60
+ maxTokens: 2000,
61
+ context: {
62
+ feature: "diagnosis",
63
+ runId: ctx.runId,
64
+ cardId: "low-confidence-attribution",
65
+ },
66
+ });
67
+ return {
68
+ status: "ready",
69
+ cardType: "low-confidence-attribution",
70
+ body: value,
71
+ meta: {
72
+ cardVersion: "low-confidence-attribution@0.1.0",
73
+ tokenUsage: { input: usage.promptTokens, output: usage.completionTokens },
74
+ generatedAt: new Date().toISOString(),
75
+ },
76
+ };
77
+ };
@@ -0,0 +1,32 @@
1
+ /**
2
+ * no-issues card — deterministic guard against sycophantic "all is well" reports.
3
+ *
4
+ * Only fires when ALL areas in the report scored at or above
5
+ * `NO_ISSUES_THRESHOLD`. The threshold is calibrated to keep the firing rate
6
+ * ≤30% per AI-SPEC §1b failure-mode #7 (sycophantic no-issues).
7
+ *
8
+ * Pure computation, no LLM call. Uses `report.summary.scores` — each
9
+ * `FeatureScore.totalScore` is the composite (0–100 scale).
10
+ *
11
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
12
+ * mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
13
+ *
14
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §1b failure-mode #7
15
+ */
16
+ import { z } from "zod";
17
+ import type { CardGenerator } from "../../diagnosis-runner.js";
18
+ /**
19
+ * Threshold above which a Report area is considered "no issues" per the
20
+ * team's action threshold (AI-SPEC §1b failure-mode #7). Tuned against
21
+ * the fixture set in no-issues.test.ts to keep firing rate ≤30%.
22
+ *
23
+ * At 85, only reports where every area scores ≥85 trigger this card.
24
+ * On the 10-report fixture spanning 0–99, only reports at [90,92],
25
+ * [95,97], [98,99] qualify — a 3/10 = 30% firing rate (right at the cap).
26
+ */
27
+ export declare const NO_ISSUES_THRESHOLD = 85;
28
+ export declare const NoIssuesBodySchema: z.ZodObject<{
29
+ summary: z.ZodString;
30
+ thresholdScore: z.ZodNumber;
31
+ }, z.core.$strip>;
32
+ export declare const generateNoIssues: CardGenerator;
@@ -0,0 +1,71 @@
1
+ /**
2
+ * no-issues card — deterministic guard against sycophantic "all is well" reports.
3
+ *
4
+ * Only fires when ALL areas in the report scored at or above
5
+ * `NO_ISSUES_THRESHOLD`. The threshold is calibrated to keep the firing rate
6
+ * ≤30% per AI-SPEC §1b failure-mode #7 (sycophantic no-issues).
7
+ *
8
+ * Pure computation, no LLM call. Uses `report.summary.scores` — each
9
+ * `FeatureScore.totalScore` is the composite (0–100 scale).
10
+ *
11
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
12
+ * mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
13
+ *
14
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §1b failure-mode #7
15
+ */
16
+ import { z } from "zod";
17
+ // ---------------------------------------------------------------------------
18
+ // Threshold constant (calibration)
19
+ // ---------------------------------------------------------------------------
20
+ /**
21
+ * Threshold above which a Report area is considered "no issues" per the
22
+ * team's action threshold (AI-SPEC §1b failure-mode #7). Tuned against
23
+ * the fixture set in no-issues.test.ts to keep firing rate ≤30%.
24
+ *
25
+ * At 85, only reports where every area scores ≥85 trigger this card.
26
+ * On the 10-report fixture spanning 0–99, only reports at [90,92],
27
+ * [95,97], [98,99] qualify — a 3/10 = 30% firing rate (right at the cap).
28
+ */
29
+ export const NO_ISSUES_THRESHOLD = 85;
30
+ // ---------------------------------------------------------------------------
31
+ // Body schema (D0045 trust boundary — satisfies required)
32
+ // ---------------------------------------------------------------------------
33
+ export const NoIssuesBodySchema = z.object({
34
+ summary: z.string().min(1).max(500),
35
+ thresholdScore: z.number(),
36
+ });
37
+ // ---------------------------------------------------------------------------
38
+ // Generator
39
+ // ---------------------------------------------------------------------------
40
+ export const generateNoIssues = async (report) => {
41
+ const scores = report.summary.scores;
42
+ if (!scores || scores.length === 0) {
43
+ return {
44
+ status: "missing",
45
+ cardType: "no-issues",
46
+ reason: "at least one area scored below threshold",
47
+ };
48
+ }
49
+ const allAboveThreshold = scores.every((s) => s.totalScore >= NO_ISSUES_THRESHOLD);
50
+ if (!allAboveThreshold) {
51
+ return {
52
+ status: "missing",
53
+ cardType: "no-issues",
54
+ reason: "at least one area scored below threshold",
55
+ };
56
+ }
57
+ const summary = `All ${scores.length} areas scored ≥${NO_ISSUES_THRESHOLD} — no action required.`;
58
+ const body = NoIssuesBodySchema.parse({
59
+ summary,
60
+ thresholdScore: NO_ISSUES_THRESHOLD,
61
+ });
62
+ return {
63
+ status: "ready",
64
+ cardType: "no-issues",
65
+ body,
66
+ meta: {
67
+ cardVersion: "no-issues@0.1.0",
68
+ generatedAt: new Date().toISOString(),
69
+ },
70
+ };
71
+ };
@@ -0,0 +1,44 @@
1
+ /**
2
+ * regression-vs-baseline card — LLM-driven run comparison card.
3
+ *
4
+ * Model: claude-opus-4-6 (high-stakes per AI-SPEC §4 model routing)
5
+ * Version: regression-vs-baseline@0.1.0
6
+ *
7
+ * DIAG-05: emits ONLY when `ctx.baseline` is set. When baseline is absent,
8
+ * returns `status: "missing", reason: "no --compare baseline supplied"`.
9
+ *
10
+ * Mitigations:
11
+ * - failure-mode #1: `buildRegressionVsBaselinePrompt` computes deltas in JS
12
+ * BEFORE the LLM call; schema refine asserts sign-consistency (R3)
13
+ *
14
+ * Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
15
+ * mandatory.
16
+ *
17
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §3 lines 603-664
18
+ */
19
+ import { z } from "zod";
20
+ import type { CardGenerator } from "../../diagnosis-runner.js";
21
+ /**
22
+ * Module-level static shape only. Per-call adds:
23
+ * - sign-consistency refine (R3): Math.sign(pointsDelta) === directionSign(direction)
24
+ */
25
+ export declare const RegressionVsBaselineBodySchema: z.ZodObject<{
26
+ summary: z.ZodString;
27
+ deltas: z.ZodArray<z.ZodObject<{
28
+ area: z.ZodString;
29
+ direction: z.ZodEnum<{
30
+ improved: "improved";
31
+ regressed: "regressed";
32
+ unchanged: "unchanged";
33
+ }>;
34
+ pointsDelta: z.ZodNumber;
35
+ drivers: z.ZodArray<z.ZodString>;
36
+ }, z.core.$strip>>;
37
+ overallTrend: z.ZodEnum<{
38
+ "net-improved": "net-improved";
39
+ "net-regressed": "net-regressed";
40
+ mixed: "mixed";
41
+ stable: "stable";
42
+ }>;
43
+ }, z.core.$strip>;
44
+ export declare const generateRegressionVsBaseline: CardGenerator;