npm - @sanity/ailf - Versions diffs - 5.0.0 → 6.0.0 - Mend

@sanity/ailf 5.0.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/dist/_vendor/ailf-core/types/diagnosis.d.ts CHANGED Viewed

@@ -8,12 +8,15 @@
  * discriminator inside the `ready` variant.
  *
  * Phase 1 lands placeholder body shapes; Phase 5 enriches each per
- * Doc 05 specs.
+ * AI-SPEC §3 and CONTEXT D-05/D-07. The `DiagnosisCard` discriminated
+ * union surface (arms + `cardType` literals) is stable — only the
+ * `body: <BodyInterface>` references resolve to richer shapes.
  *
  * @see docs/decisions/D0049-shared-confidence-contract.md
  * @see docs/decisions/D0052-judgment-ref-granularity.md
  * @see docs/decisions/D0050-artifact-registry-post-hoc-versioned-extension.md
  */
+import type { Confidence } from "./confidence.js";
 import type { RunId } from "./branded-ids.js";
 import type { ReportId } from "./index.js";
 /**
@@ -54,44 +57,139 @@ export interface CardMeta {
 }
 /**
  * A single actionable suggestion surfaced by a recommendations card.
- * The full Phase 5 shape may add fields (per Doc 05 specs); Phase 1
- * locks the minimum required surface.
+ *
+ * Phase 5 adds `docSlug` (the canonical doc page to rewrite) and
+ * `sectionHeading` (null when the suggestion targets the whole page)
+ * per AI-SPEC actionability-specificity rubric + failure-mode #2
+ * mitigation.
  */
 export interface ActionSuggestion {
     title: string;
     body: string;
     priority: "high" | "medium" | "low";
+    /** Canonical slug of the documentation page this suggestion targets. */
+    docSlug: string;
+    /**
+     * Heading within `docSlug` that should be revised, or `null` when the
+     * suggestion targets the page as a whole.
+     */
+    sectionHeading: string | null;
+}
+/**
+ * Minimal judgment reference per D0052 (taskId × modelId × dimension).
+ * Used by `LowConfidenceAttributionBody.judgmentRefs` to cite the
+ * specific judgments that drove a low-confidence finding.
+ */
+export interface JudgmentRef {
+    taskId: string;
+    modelId: string;
+    dimension: string;
 }
 /**
- * Phase 1 body placeholders. Each shape is intentionally minimal; Phase 5
- * card files enrich them per Doc 05 specs and assert
- * `satisfies z.ZodType<Extract<DiagnosisCard, { status: "ready"; cardType: "X" }>["body"]>`
- * against these declarations.
+ * Phase 5 enriched body shapes. Each keeps `summary: string` (load-bearing
+ * for CLI default render per AI-SPEC §6) and adds fields the corresponding
+ * Zod schema needs (asserting `satisfies z.ZodType<T>` in the card file).
  */
+/** area-summary: deterministic — keep only summary (no behavioral claims). */
 export interface AreaSummaryBody {
     summary: string;
 }
+/**
+ * failure-mode-summary: deterministic + D-05 dimension/failureMode gate.
+ * `count` = frequency in the report; `sampleSize` = judgment count for the
+ * dimension (per AI-SPEC failure-mode #3 mitigation).
+ */
 export interface FailureModeSummaryBody {
     summary: string;
+    /** Rubric dimension this summary targets (e.g. "task-completion"). */
+    dimension: string;
+    /** Canonical failure mode within this dimension. */
+    failureMode: string;
+    /** Number of judgments in this report with this failure mode. */
+    count: number;
+    /** Total judgments for this dimension — calibration denominator. */
+    sampleSize: number;
 }
+/**
+ * no-issues: deterministic + AI-SPEC failure-mode #7 sycophancy guard.
+ * `thresholdScore` surfaces the threshold used to qualify as "no issues"
+ * so readers can see the criterion behind the positive assessment.
+ */
 export interface NoIssuesBody {
     summary: string;
+    /** Minimum composite score that qualified this area as "no issues". */
+    thresholdScore: number;
 }
+/**
+ * top-recommendations: LLM-driven. `suggestions` reuses the enriched
+ * `ActionSuggestion` shape (docSlug + sectionHeading per AI-SPEC
+ * actionability-specificity rubric + failure-mode #2 mitigation).
+ */
 export interface TopRecommendationsBody {
     summary: string;
     suggestions: ActionSuggestion[];
 }
+/**
+ * weakest-area: LLM-driven. Adds area identification, dimension/failureMode
+ * context, and a small-sample calibration guard (AI-SPEC failure-mode #3).
+ */
 export interface WeakestAreaBody {
     summary: string;
+    /** Documentation area with the lowest composite score. */
+    area: string;
+    /** Primary dimension driving the low score. */
+    dimension: string;
+    /** Dominant failure mode in this area. */
+    failureMode: string;
+    /** Number of judgments sampled for this area — calibration denominator. */
+    sampleSize: number;
+    /** Calibrated confidence per D0049 (ensemble-stdev derivation). */
+    confidence: Confidence;
 }
-export interface LowConfidenceAttributionBody {
+/**
+ * regression-vs-baseline: LLM-driven. `deltas` is the per-area diff
+ * (JS-computed pre-call, max 10 entries); `drivers` is LLM prose;
+ * `overallTrend` is a 4-bucket summary per AI-SPEC §3 lines 605-613.
+ */
+export interface RegressionVsBaselineBody {
     summary: string;
+    /**
+     * Per-area score deltas (max 10). `drivers` carries the LLM's prose
+     * reasoning about what caused the change.
+     */
+    deltas: {
+        area: string;
+        direction: "improved" | "regressed" | "unchanged";
+        pointsDelta: number;
+        drivers: string[];
+    }[];
+    /** 4-bucket aggregate trend across all deltas. */
+    overallTrend: "net-improved" | "net-regressed" | "mixed" | "stable";
 }
-export interface DocAttributionSpotlightBody {
+/**
+ * low-confidence-attribution: LLM-driven. `judgmentRefs` cites the
+ * specific judgments (D0052 triple) that drove the low-confidence finding.
+ */
+export interface LowConfidenceAttributionBody {
     summary: string;
+    /** Judgment references (D0052) driving this low-confidence finding. */
+    judgmentRefs: JudgmentRef[];
 }
-export interface RegressionVsBaselineBody {
+/**
+ * doc-attribution-spotlight: LLM-driven. `docCitations` carries per-doc
+ * attribution roles and confidence calibration (AI-SPEC failure-mode #5).
+ */
+export interface DocAttributionSpotlightBody {
     summary: string;
+    /**
+     * Per-doc attribution records. `role` classifies how the doc contributed;
+     * `confidence` calibrates the attribution certainty (D0049).
+     */
+    docCitations: {
+        docSlug: string;
+        confidence: Confidence;
+        role: "supports" | "contradicts" | "missing" | "irrelevant";
+    }[];
 }
 /**
  * Outer-`status` discriminated union: 8 ready variants (one per
@@ -102,6 +200,10 @@ export interface RegressionVsBaselineBody {
  * No `not-yet-generated` variant — old-report fallback is a Phase 7
  * concern at the slim-shape boundary, handled at fetch-time, not in
  * `DiagnosisCard` itself.
+ *
+ * D-07: only the `body: <BodyInterface>` references resolve to richer
+ * shapes. The union arms, status literals, and cardType literals are
+ * identical to Phase 1.
  */
 export type DiagnosisCard = {
     status: "ready";

package/dist/_vendor/ailf-core/types/diagnosis.js CHANGED Viewed

@@ -8,7 +8,9 @@
  * discriminator inside the `ready` variant.
  *
  * Phase 1 lands placeholder body shapes; Phase 5 enriches each per
- * Doc 05 specs.
+ * AI-SPEC §3 and CONTEXT D-05/D-07. The `DiagnosisCard` discriminated
+ * union surface (arms + `cardType` literals) is stable — only the
+ * `body: <BodyInterface>` references resolve to richer shapes.
  *
  * @see docs/decisions/D0049-shared-confidence-contract.md
  * @see docs/decisions/D0052-judgment-ref-granularity.md

package/dist/_vendor/ailf-core/types/index.d.ts CHANGED Viewed

@@ -36,7 +36,7 @@ export { CONVENTIONAL_DERIVATIONS, isConfidence } from "./confidence.js";
 export type { ArtifactId, AssociationAxis, AssociationValues, Brand, EntryKey, Err, FixtureId, IdValidationError, JudgmentId, NewReportId, Ok, ProviderId, PromptId, Result, ResultId, RubricId, RunFingerprint, RunId, SuiteId, TaskId, TaskSlug, TraceId, } from "./branded-ids.js";
 export { err, fixtureId, generateJudgmentId, generateRunId, judgmentId, ok, providerId, resultId, runId, suiteId, taskId, traceId, } from "./branded-ids.js";
 export type { AgentHarnessTaskDefinition, ContentLakeAuthorableMode, ContentLakeAuthorableTask, CriterionRef, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PromptVars, PathDocRef, PerspectiveDocRef, ReservedPromptVarKey, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./generalized-task.js";
-export type { ActionSuggestion, AreaSummaryBody, CardMeta, CardType, Diagnosis, DiagnosisCard, DocAttributionSpotlightBody, FailureModeSummaryBody, LowConfidenceAttributionBody, NoIssuesBody, RegressionVsBaselineBody, TopRecommendationsBody, VersionedInputs, WeakestAreaBody, } from "./diagnosis.js";
+export type { ActionSuggestion, AreaSummaryBody, CardMeta, CardType, Diagnosis, DiagnosisCard, DocAttributionSpotlightBody, FailureModeSummaryBody, JudgmentRef, LowConfidenceAttributionBody, NoIssuesBody, RegressionVsBaselineBody, TopRecommendationsBody, VersionedInputs, WeakestAreaBody, } from "./diagnosis.js";
 export type { AttributionMeta, DocAttribution, JudgmentAttribution, } from "./attribution.js";
 export type { CriterionSubJudgment, DocCitation, DocCitationRole, GraderJudgment, } from "./grader-judgment.js";
 export type { LegacyGraderJudgment } from "./legacy-grader-judgment.js";

package/dist/adapters/llm/fake-llm-client.d.ts CHANGED Viewed

@@ -40,9 +40,29 @@ export declare class FakeLLMClient implements LLMClient {
     readonly calls: FakeCallRecord[];
     private readonly completeQueue;
     private readonly structuredQueue;
+    /**
+     * Per-cardId keyed responses. A single-value entry is returned on every
+     * call for that cardId (repeated calls always get the same response). An
+     * array-value entry is consumed in order; once exhausted, calls for that
+     * cardId fall back to the FIFO structuredQueue.
+     *
+     * This is the substrate Plan 07's 17-fixture eval matrix uses to wire
+     * deterministic responses to specific LLM cards.
+     */
+    private readonly keyedResponses;
     constructor(args?: {
         completeResponses?: FakeCompletionResponse[];
         structuredResponses?: FakeStructuredResponse[];
+        /**
+         * Optional keyed-response map. Keys are `cardId` values from
+         * `args.context.cardId`. When a call matches a key the keyed entry is
+         * used instead of the FIFO queue.
+         *
+         * - Single-value entry: same response on every call for this cardId.
+         * - Array-value entry: entries consumed in insertion order; falls back
+         *   to FIFO (or throws) when the array is exhausted.
+         */
+        keyedResponses?: Record<string, FakeStructuredResponse | FakeStructuredResponse[]>;
     });
     complete(args: LLMCompleteArgs): Promise<LLMCompletion>;
     completeStructured<T>(args: LLMCompleteStructuredArgs<T>): Promise<LLMStructuredCompletion<T>>;

package/dist/adapters/llm/fake-llm-client.js CHANGED Viewed

@@ -11,9 +11,25 @@ export class FakeLLMClient {
     calls = [];
     completeQueue;
     structuredQueue;
+    /**
+     * Per-cardId keyed responses. A single-value entry is returned on every
+     * call for that cardId (repeated calls always get the same response). An
+     * array-value entry is consumed in order; once exhausted, calls for that
+     * cardId fall back to the FIFO structuredQueue.
+     *
+     * This is the substrate Plan 07's 17-fixture eval matrix uses to wire
+     * deterministic responses to specific LLM cards.
+     */
+    keyedResponses;
     constructor(args = {}) {
         this.completeQueue = [...(args.completeResponses ?? [])];
         this.structuredQueue = [...(args.structuredResponses ?? [])];
+        // Deep-copy arrays so the caller's fixture data is not mutated.
+        const keyed = {};
+        for (const [key, val] of Object.entries(args.keyedResponses ?? {})) {
+            keyed[key] = Array.isArray(val) ? [...val] : val;
+        }
+        this.keyedResponses = keyed;
     }
     async complete(args) {
         this.calls.push({
@@ -37,13 +53,34 @@ export class FakeLLMClient {
         };
     }
     async completeStructured(args) {
+        // Record every call first so test assertions on this.calls are never
+        // affected by which branch (keyed vs FIFO) handles the response.
         this.calls.push({
             kind: "completeStructured",
             model: args.model,
             prompt: args.prompt,
             ...(args.context ? { context: args.context } : {}),
         });
-        const next = this.structuredQueue.shift();
+        let next;
+        const cardId = args.context?.cardId;
+        if (cardId !== undefined && cardId in this.keyedResponses) {
+            const entry = this.keyedResponses[cardId];
+            if (Array.isArray(entry)) {
+                // Array-value: consume one entry per call. When exhausted, fall
+                // through to the FIFO queue below.
+                if (entry.length > 0) {
+                    next = entry.shift();
+                }
+            }
+            else {
+                // Single-value: return the same response on every call.
+                next = entry;
+            }
+        }
+        if (next === undefined) {
+            // FIFO fallback (existing behavior)
+            next = this.structuredQueue.shift();
+        }
         if (!next) {
             throw new Error("FakeLLMClient: no more queued structured responses (call exceeded queue)");
         }

package/dist/adapters/llm/openai-llm-client.js CHANGED Viewed

@@ -10,6 +10,7 @@
  * the adapter never reads `process.env`. The composition root maps env vars
  * to typed constructor args.
  */
+import { z } from "zod";
 import { OpenAIChatResponseSchema, splitModelId, } from "../../_vendor/ailf-core/index.js";
 import { DEFAULT_RETRY_POLICY, parseRetryAfterSeconds, runWithRetry, } from "./retry.js";
 const DEFAULT_BASE_URL = "https://api.openai.com/v1/chat/completions";
@@ -67,10 +68,25 @@ export class OpenAILLMClient {
     }
     async completeStructured(args) {
         const { modelName } = splitModelId(args.model);
+        // Derive the JSON Schema from the caller's Zod schema. Zod v4 natively
+        // emits `additionalProperties: false` on every nested z.object node —
+        // this is required for OpenAI strict-mode.
+        const jsonSchema = z.toJSONSchema(args.schema, { target: "draft-2020-12" });
+        // OpenAI strict-mode requires the root to be a plain object schema (no
+        // anyOf/oneOf/allOf at the top level). Discriminated unions produce
+        // anyOf at the root — callers must wrap them in a discriminator object.
+        assertSchemaIsObjectRoot(jsonSchema, args.model);
         const body = buildBody(modelName, args.prompt, {
-            temperature: args.temperature,
-            maxTokens: args.maxTokens,
-            responseFormat: { type: "json_object" },
+            temperature: args.temperature ?? 0.1,
+            maxTokens: args.maxTokens ?? 2000,
+            responseFormat: {
+                type: "json_schema",
+                json_schema: {
+                    name: args.context?.cardId ?? "structured_output",
+                    schema: jsonSchema,
+                    strict: true,
+                },
+            },
         });
         const data = await this.callApi(body);
         const raw = data.choices?.[0]?.message?.content;
@@ -84,6 +100,9 @@ export class OpenAILLMClient {
         catch (err) {
             throw new Error(`OpenAI structured completion returned invalid JSON for model ${args.model}: ${err instanceof Error ? err.message : String(err)}`, { cause: err });
         }
+        // strict:true guarantees a valid-against-the-schema JSON document, but
+        // the Zod parse is still load-bearing — it brands the result as T and is
+        // the only contract the engine trusts (D0045 parse-don't-validate).
         const value = args.schema.parse(parsed);
         const usage = extractUsage(data.usage);
         const cost = this.computeCost(modelName, usage);
@@ -145,6 +164,36 @@ export class OpenAILLMClient {
             `cost_usd=${cost.toFixed(6)}`);
     }
 }
+/**
+ * Assert that the JSON Schema root is a plain object type.
+ *
+ * OpenAI strict-mode requires the root schema to be `{ type: "object" }`.
+ * A discriminated union (`z.union([...])`) produces `{ anyOf: [...] }` at
+ * the root — callers must wrap the union in a discriminator object before
+ * passing it to `completeStructured`.
+ *
+ * Per AI-SPEC §3 Pitfall 6 + T-05-03-01: caught at request-build time to
+ * avoid wasting API budget on a guaranteed 400.
+ */
+function assertSchemaIsObjectRoot(schema, modelId) {
+    if (typeof schema !== "object" || schema === null) {
+        throw new Error(`OpenAILLMClient: OpenAI strict-mode requires a single z.object at the ` +
+            `schema root for model ${modelId}; got non-object JSON Schema root.`);
+    }
+    const node = schema;
+    if (node.type !== "object") {
+        // Identify the kind so the error message is actionable.
+        const kind = "anyOf" in node
+            ? "z.union"
+            : "oneOf" in node
+                ? "z.discriminatedUnion"
+                : "allOf" in node
+                    ? "z.intersection"
+                    : String(node.type ?? "unknown");
+        throw new Error(`OpenAILLMClient: OpenAI strict-mode requires a single z.object at the ` +
+            `schema root; got ${kind}. Wrap the union in a discriminator object.`);
+    }
+}
 function buildBody(modelName, prompt, opts) {
     const body = {
         model: modelName,

package/dist/cli-program.js CHANGED Viewed

@@ -32,6 +32,7 @@ import { createFetchDocsCommand } from "./commands/fetch-docs.js";
 import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
 import { createGraderCommand } from "./commands/grader/index.js";
 import { createInitCommand } from "./commands/init.js";
+import { createInterpretCommand } from "./commands/interpret.js";
 import { createInteractiveCommand } from "./commands/interactive.js";
 import { createLookupDocCommand } from "./commands/lookup-doc.js";
 import { createMeasureRetrievalCommand } from "./commands/measure-retrieval.js";
@@ -110,6 +111,8 @@ export function buildCliProgram(opts) {
         .addCommand(createWeeklyDigestCommand())
         .addCommand(createCheckStalenessCommand());
     program.addCommand(reportCommand.helpGroup(CommandGroup.AnalysisReports));
+    // `ailf interpret <reportId>` — top-level (not nested under report) per AI-SPEC
+    program.addCommand(createInterpretCommand().helpGroup(CommandGroup.AnalysisReports));
     // ── Grader Reliability ────────────────────────────────────────────────
     program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
     // ── Setup & Configuration ─────────────────────────────────────────────

package/dist/commands/interpret.d.ts ADDED Viewed

@@ -0,0 +1,50 @@
+/**
+ * interpret command — generate a Diagnosis for a Report.
+ *
+ * Wraps `getDiagnosisRunner(ctx)` from the composition root in a Commander
+ * command for consistent CLI integration. Closest analog: compare.ts.
+ *
+ * Entry points:
+ *   ailf interpret <reportId>          — one-line-per-card summary
+ *   ailf interpret <reportId> --json   — full Diagnosis JSON
+ *   ailf interpret latest              — most recent report
+ *   ailf interpret <id> --compare <ref>  — DIAG-05 regression comparison
+ *   ailf interpret <id> --refresh      — bypass version-keyed cache
+ *
+ * @see packages/eval/src/commands/compare.ts — CLI factory analog
+ * @see packages/eval/src/composition-root.ts — getDiagnosisRunner
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §6
+ */
+import { Command } from "commander";
+import type { DiagnosisRunner, VersionedInputs } from "../_vendor/ailf-core/index.d.ts";
+interface MinimalReportStore {
+    read(id: string): Promise<unknown | null>;
+    latest(): Promise<unknown | null>;
+}
+export interface InterpretCommandOptions {
+    /**
+     * Override the runner factory for tests. When omitted, the command
+     * imports `getDiagnosisRunner` from the composition root at action time.
+     */
+    readonly runnerFactory?: (ctx: unknown) => DiagnosisRunner;
+    /**
+     * Override the store factory for tests. When omitted, the command
+     * creates the app context and uses `ctx.reportStore` at action time.
+     */
+    readonly storeFactory?: () => MinimalReportStore | null;
+    /**
+     * Override the versions resolver for tests. Receives the stored report
+     * record and returns the `VersionedInputs` needed by the runner.
+     * When omitted, the command derives versions from the report's metadata.
+     */
+    readonly versionsFromReport?: (report: unknown) => VersionedInputs;
+}
+/**
+ * Create the `ailf interpret <reportId>` Commander command.
+ *
+ * Accepts optional `InterpretCommandOptions` for testability — tests can
+ * inject a fake runner factory and store factory without touching module
+ * mocks (preferred per testing.md).
+ */
+export declare function createInterpretCommand(options?: InterpretCommandOptions): Command;
+export {};

package/dist/commands/interpret.js ADDED Viewed

@@ -0,0 +1,212 @@
+/**
+ * interpret command — generate a Diagnosis for a Report.
+ *
+ * Wraps `getDiagnosisRunner(ctx)` from the composition root in a Commander
+ * command for consistent CLI integration. Closest analog: compare.ts.
+ *
+ * Entry points:
+ *   ailf interpret <reportId>          — one-line-per-card summary
+ *   ailf interpret <reportId> --json   — full Diagnosis JSON
+ *   ailf interpret latest              — most recent report
+ *   ailf interpret <id> --compare <ref>  — DIAG-05 regression comparison
+ *   ailf interpret <id> --refresh      — bypass version-keyed cache
+ *
+ * @see packages/eval/src/commands/compare.ts — CLI factory analog
+ * @see packages/eval/src/composition-root.ts — getDiagnosisRunner
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §6
+ */
+import { dirname, resolve } from "path";
+import { fileURLToPath } from "url";
+import { Command } from "commander";
+import { addOutputDirOption } from "./shared/options.js";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
+// ---------------------------------------------------------------------------
+// Module-level root constant (same pattern as compare.ts)
+// ---------------------------------------------------------------------------
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const ROOT = resolve(__dirname, "..", "..");
+// ---------------------------------------------------------------------------
+// Card output formatting (AI-SPEC §6 graceful-degradation-visibility)
+// ---------------------------------------------------------------------------
+/**
+ * Visual status markers — locked visual contract per plan Test 7:
+ * ready: "✓", degraded: "⚠", missing: "—"
+ */
+const STATUS_ICONS = {
+    ready: "✓",
+    degraded: "⚠",
+    missing: "—",
+};
+function getCardSummaryText(card) {
+    if (card.status === "ready") {
+        return card.body.summary;
+    }
+    if (card.status === "degraded") {
+        return card.reason;
+    }
+    // missing
+    return card.reason;
+}
+/**
+ * Format a single card as a one-line summary string.
+ *
+ * Format: `<icon> <cardType>: <summary>`
+ * Per AI-SPEC §6: distinct icons for ready / degraded / missing.
+ */
+function formatCardSummaryLine(card) {
+    const icon = STATUS_ICONS[card.status];
+    const text = getCardSummaryText(card);
+    return `${icon} ${card.cardType}: ${text}`;
+}
+// ---------------------------------------------------------------------------
+// Default versions resolver
+// ---------------------------------------------------------------------------
+/**
+ * Derive VersionedInputs from a stored report record.
+ *
+ * The four-version chain is carried in `report.summary.versions` per the
+ * Phase 5 schema, with `diagnosisVersion` sourced from the runner's const.
+ * Falls back to hard-coded "unknown" values when the fields are not present
+ * (legacy reports without version metadata).
+ */
+function defaultVersionsFromReport(report) {
+    const rec = report;
+    const summary = rec.summary;
+    const versions = summary?.versions;
+    return {
+        graderJudgmentsVersion: typeof versions?.graderJudgmentsVersion === "string"
+            ? versions.graderJudgmentsVersion
+            : "unknown",
+        ensembleVersion: typeof versions?.ensembleVersion === "string"
+            ? versions.ensembleVersion
+            : "unknown",
+        diagnosisVersion: typeof versions?.diagnosisVersion === "string"
+            ? versions.diagnosisVersion
+            : "0.1.0",
+        cardVersion: typeof versions?.cardVersion === "string"
+            ? versions.cardVersion
+            : "0.1.0",
+    };
+}
+// ---------------------------------------------------------------------------
+// Command factory
+// ---------------------------------------------------------------------------
+/**
+ * Create the `ailf interpret <reportId>` Commander command.
+ *
+ * Accepts optional `InterpretCommandOptions` for testability — tests can
+ * inject a fake runner factory and store factory without touching module
+ * mocks (preferred per testing.md).
+ */
+export function createInterpretCommand(options = {}) {
+    const { runnerFactory, storeFactory, versionsFromReport } = options;
+    const cmd = new Command("interpret")
+        .description("Generate a Diagnosis for a Report — 8 typed cards explaining what's weak and what to do")
+        .argument("<reportId>", "Report ID (or 'latest' for the most recent)")
+        .option("-c, --compare <ref>", "Baseline report ID for regression-vs-baseline comparison")
+        .option("--refresh", "Bypass the version-keyed cache and recompute")
+        .option("--json", "Print full Diagnosis JSON instead of one-line-per-card summary")
+        .action(async (reportId, opts) => {
+        const outputDir = resolveOutputDir(opts.outputDir);
+        // ---------------------------------------------------------------------------
+        // Resolve store: injected factory (tests) or composition root (production)
+        // ---------------------------------------------------------------------------
+        let store;
+        let ctx;
+        if (storeFactory) {
+            store = storeFactory();
+            ctx = null;
+        }
+        else {
+            // Production path — lazy import to keep the module fast in tests
+            // Minimal config: report-read-only, no eval/fetch/publish.
+            const { createAppContext } = await import("../composition-root.js");
+            ctx = createAppContext({
+                compareEnabled: false,
+                gapAnalysisEnabled: false,
+                mode: "literacy",
+                noAutoScope: false,
+                noCache: true,
+                noRemoteCache: true,
+                outputDir,
+                publishEnabled: false,
+                rootDir: ROOT,
+                searchMode: "open",
+                skipEval: true,
+                skipFetch: true,
+                remote: false,
+                apiUrl: "https://ailf-api.sanity.build",
+            });
+            const prodCtx = ctx;
+            store = prodCtx.reportStore;
+        }
+        if (!store) {
+            process.stderr.write("Error: report store is not available\n");
+            process.exit(1);
+        }
+        // ---------------------------------------------------------------------------
+        // Resolve main report
+        // ---------------------------------------------------------------------------
+        const report = reportId === "latest"
+            ? await store.latest()
+            : await store.read(reportId);
+        if (!report) {
+            process.stderr.write(`Error: report not found: ${reportId}\n`);
+            process.exit(1);
+        }
+        // ---------------------------------------------------------------------------
+        // Optionally resolve baseline (DIAG-05)
+        // ---------------------------------------------------------------------------
+        let baseline;
+        if (opts.compare) {
+            baseline = await store.read(opts.compare);
+            if (!baseline) {
+                process.stderr.write(`Error: baseline report not found: ${opts.compare}\n`);
+                process.exit(1);
+            }
+        }
+        // ---------------------------------------------------------------------------
+        // Resolve versions
+        // ---------------------------------------------------------------------------
+        const versions = versionsFromReport
+            ? versionsFromReport(report)
+            : defaultVersionsFromReport(report);
+        // ---------------------------------------------------------------------------
+        // Build runner
+        // ---------------------------------------------------------------------------
+        let runner;
+        if (runnerFactory) {
+            runner = runnerFactory(ctx);
+        }
+        else {
+            const { getDiagnosisRunner } = await import("../composition-root.js");
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            runner = getDiagnosisRunner(ctx);
+        }
+        // ---------------------------------------------------------------------------
+        // Run diagnosis
+        // ---------------------------------------------------------------------------
+        const diagnosis = await runner.run({
+            // The report here is the eval's ReportStore record, which satisfies
+            // the Report interface for runner.run purposes (both carry id + provenance.runId).
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            report: report,
+            versions,
+            ...(baseline ? { baseline: baseline } : {}),
+            refresh: opts.refresh ?? false,
+        });
+        // ---------------------------------------------------------------------------
+        // Print output
+        // ---------------------------------------------------------------------------
+        if (opts.json) {
+            process.stdout.write(`${JSON.stringify(diagnosis, null, 2)}\n`);
+        }
+        else {
+            for (const card of diagnosis.cards) {
+                process.stdout.write(`${formatCardSummaryLine(card)}\n`);
+            }
+        }
+    });
+    addOutputDirOption(cmd);
+    return cmd;
+}