npm - @sanity/ailf - Versions diffs - 5.0.0 → 6.1.0 - Mend

@sanity/ailf 5.0.0 → 6.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

package/dist/grader/index.js CHANGED Viewed

@@ -1,75 +1,24 @@
 /**
  * Per-dimension failure-mode taxonomy barrel.
  *
+ * D-05: taxonomy data relocated to @sanity/ailf-core so card files in
+ * packages/core/src/services/diagnosis/cards/ can import without violating
+ * the core→eval import direction rule.
+ *
+ * This file is now a re-export shim — all behavior lives in
+ * packages/core/src/grader/failure-modes/. Existing eval-side callers
+ * (rubrics.ts, rubric-resolution.ts, calibration.test.ts) continue to
+ * work with zero source changes.
+ *
  * Named re-exports only (W0124 — never `export *`).
  *
  * Consumers:
- * - `packages/eval/config/rubrics.ts` — calls `failureModesForDimension()` to
- *   stamp a per-template legal-mode list onto every rubric template entry.
- * - `packages/eval/src/pipeline/compiler/rubric-resolution.ts` — reads
- *   `template.failureModes` at prompt-assembly time and announces the legal
- *   modes to the grader before the structured-shape footer (Plan 03-01).
- * - `packages/eval/src/grader/__tests__/calibration.test.ts` — fixture-driven
- *   ≥90% non-`unclassified` static calibration check (ROADMAP success
- *   criterion 1).
+ * - `packages/eval/config/rubrics.ts` — calls `failureModesForDimension()`
+ * - `packages/eval/src/pipeline/compiler/rubric-resolution.ts`
+ * - `packages/eval/src/grader/__tests__/calibration.test.ts`
  *
+ * @see packages/core/src/grader/failure-modes/index.ts — canonical location
  * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
- *      §"Per-dimension failure-mode taxonomies" (lines 239-283).
- * @see docs/decisions/D0005-grader-model-separation.md — single grader model;
- *      taxonomies travel with the rubric prompt for reproducibility.
- */
-export { COMMON_FAILURE_MODES } from "./common.js";
-export { LITERACY_FAILURE_MODES } from "./literacy.js";
-export { MCP_FAILURE_MODES } from "./mcp.js";
-export { KP_FAILURE_MODES } from "./knowledge-probe.js";
-export { AGENT_FAILURE_MODES } from "./agent-harness.js";
-import { COMMON_FAILURE_MODES } from "./common.js";
-import { LITERACY_FAILURE_MODES } from "./literacy.js";
-import { MCP_FAILURE_MODES } from "./mcp.js";
-import { KP_FAILURE_MODES } from "./knowledge-probe.js";
-import { AGENT_FAILURE_MODES } from "./agent-harness.js";
-/**
- * Return the legal failure-mode list for a given rubric dimension.
- *
- * Accepts both family-level keys (`mcp-behavior`, `knowledge-probe`,
- * `agent-harness`) and the per-template `dimension` strings used in
- * `config/rubrics.ts` (`task-completion`, `input-validation`,
- * `factual-correctness`, `process-quality`, …). The cross-cutting
- * `COMMON_FAILURE_MODES` is always included.
- *
- * Unknown dimensions fall through to `COMMON_FAILURE_MODES` only — safe
- * default, the grader can still pick `unclassified`.
+ * @see docs/decisions/D0005-grader-model-separation.md
  */
-export function failureModesForDimension(dimension) {
-    switch (dimension) {
-        // ── Literacy family ──────────────────────────────────────
-        case "task-completion":
-        case "code-correctness":
-        case "doc-coverage":
-            return [...COMMON_FAILURE_MODES, ...LITERACY_FAILURE_MODES];
-        // ── MCP family ───────────────────────────────────────────
-        // `mcp-behavior` is the family-level key (profile / depends-on
-        // shorthand). The per-template `dimension` strings are the four
-        // entries from config/rubrics.ts mcp-* templates.
-        case "mcp-behavior":
-        case "input-validation":
-        case "output-correctness":
-        case "error-handling":
-        case "security":
-            return [...COMMON_FAILURE_MODES, ...MCP_FAILURE_MODES];
-        // ── Knowledge-probe family ───────────────────────────────
-        case "knowledge-probe":
-        case "factual-correctness":
-        case "completeness":
-        case "currency":
-            return [...COMMON_FAILURE_MODES, ...KP_FAILURE_MODES];
-        // ── Agent-harness family ─────────────────────────────────
-        case "agent-harness":
-        case "process-quality":
-        case "agent-output":
-        case "tool-usage":
-            return [...COMMON_FAILURE_MODES, ...AGENT_FAILURE_MODES];
-        default:
-            return COMMON_FAILURE_MODES;
-    }
-}
+export { AGENT_FAILURE_MODES, CANONICAL_DIMENSIONS, COMMON_FAILURE_MODES, KP_FAILURE_MODES, LITERACY_FAILURE_MODES, MCP_FAILURE_MODES, failureModesForDimension, isCanonicalFailureMode, } from "../_vendor/ailf-core/index.js";

package/dist/grader/knowledge-probe.d.ts CHANGED Viewed

@@ -1,14 +1,9 @@
 /**
- * Knowledge-probe failure modes — valid for the `knowledge-probe` dimension
- * family (factual-correctness, completeness, currency).
+ * Knowledge-probe failure modes — re-export shim (D-05).
  *
- * Phase 3 GRAD-03 (Plan 03-02). Knowledge-probe failures track the model's
- * ability to recall facts about Sanity's surface area; the v0 modes
- * differentiate factual errors from omissions, currency drift, and
- * hallucination.
+ * Canonical data relocated to @sanity/ailf-core.
+ * Existing callers of this file continue to work unchanged.
  *
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
- *      §"Per-dimension failure-mode taxonomies" (lines 239-283).
+ * @see packages/core/src/grader/failure-modes/knowledge-probe.ts
  */
-export declare const KP_FAILURE_MODES: readonly ["factual-error", "incompleteness", "currency-violation", "hallucination"];
-export type KPFailureMode = (typeof KP_FAILURE_MODES)[number];
+export { KP_FAILURE_MODES, type KPFailureMode } from "../_vendor/ailf-core/index.d.ts";

package/dist/grader/knowledge-probe.js CHANGED Viewed

@@ -1,18 +1,9 @@
 /**
- * Knowledge-probe failure modes — valid for the `knowledge-probe` dimension
- * family (factual-correctness, completeness, currency).
+ * Knowledge-probe failure modes — re-export shim (D-05).
  *
- * Phase 3 GRAD-03 (Plan 03-02). Knowledge-probe failures track the model's
- * ability to recall facts about Sanity's surface area; the v0 modes
- * differentiate factual errors from omissions, currency drift, and
- * hallucination.
+ * Canonical data relocated to @sanity/ailf-core.
+ * Existing callers of this file continue to work unchanged.
  *
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
- *      §"Per-dimension failure-mode taxonomies" (lines 239-283).
+ * @see packages/core/src/grader/failure-modes/knowledge-probe.ts
  */
-export const KP_FAILURE_MODES = [
-    "factual-error", // assistant asserts something demonstrably false
-    "incompleteness", // assistant covers part of the answer; misses key piece
-    "currency-violation", // assistant cites stale facts beyond doc currency horizon
-    "hallucination", // assistant invents details not present in any doc
-];
+export { KP_FAILURE_MODES } from "../_vendor/ailf-core/index.js";

package/dist/grader/literacy.d.ts CHANGED Viewed

@@ -1,13 +1,9 @@
 /**
- * Literacy failure modes — valid for `task-completion`, `code-correctness`,
- * `doc-coverage` (the literacy dimension family).
+ * Literacy failure modes — re-export shim (D-05).
  *
- * Phase 3 GRAD-03 (Plan 03-02). The v0 list is the four canonical
- * documentation-quality failure modes. Conservative on purpose; calibration
- * (Plan 03-02 Task 3) reveals whether expansion is needed in a follow-on.
+ * Canonical data relocated to @sanity/ailf-core.
+ * Existing callers of this file continue to work unchanged.
  *
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
- *      §"Per-dimension failure-mode taxonomies" (lines 239-283).
+ * @see packages/core/src/grader/failure-modes/literacy.ts
  */
-export declare const LITERACY_FAILURE_MODES: readonly ["missing-docs", "outdated-docs", "incorrect-docs", "poor-structure"];
-export type LiteracyFailureMode = (typeof LITERACY_FAILURE_MODES)[number];
+export { LITERACY_FAILURE_MODES, type LiteracyFailureMode, } from "../_vendor/ailf-core/index.d.ts";

package/dist/grader/literacy.js CHANGED Viewed

@@ -1,17 +1,9 @@
 /**
- * Literacy failure modes — valid for `task-completion`, `code-correctness`,
- * `doc-coverage` (the literacy dimension family).
+ * Literacy failure modes — re-export shim (D-05).
  *
- * Phase 3 GRAD-03 (Plan 03-02). The v0 list is the four canonical
- * documentation-quality failure modes. Conservative on purpose; calibration
- * (Plan 03-02 Task 3) reveals whether expansion is needed in a follow-on.
+ * Canonical data relocated to @sanity/ailf-core.
+ * Existing callers of this file continue to work unchanged.
  *
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
- *      §"Per-dimension failure-mode taxonomies" (lines 239-283).
+ * @see packages/core/src/grader/failure-modes/literacy.ts
  */
-export const LITERACY_FAILURE_MODES = [
-    "missing-docs", // relevant doc didn't exist
-    "outdated-docs", // doc reflects an older API/version
-    "incorrect-docs", // doc states something factually wrong
-    "poor-structure", // doc exists but is hard to find or follow
-];
+export { LITERACY_FAILURE_MODES, } from "../_vendor/ailf-core/index.js";

package/dist/grader/mcp.d.ts CHANGED Viewed

@@ -1,14 +1,9 @@
 /**
- * MCP failure modes — valid for the `mcp-behavior` dimension family
- * (input-validation, output-correctness, error-handling, security).
+ * MCP failure modes — re-export shim (D-05).
  *
- * Phase 3 GRAD-03 (Plan 03-02). `missing-docs` is intentionally re-exported
- * from the literacy family — MCP server tasks frequently fail because the
- * MCP spec itself is under-documented; that's a literacy failure even when
- * surfaced through MCP grading.
+ * Canonical data relocated to @sanity/ailf-core.
+ * Existing callers of this file continue to work unchanged.
  *
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
- *      §"Per-dimension failure-mode taxonomies" (lines 239-283).
+ * @see packages/core/src/grader/failure-modes/mcp.ts
  */
-export declare const MCP_FAILURE_MODES: readonly ["spec-mismatch", "missing-error-handling", "over-privileged", "missing-docs"];
-export type MCPFailureMode = (typeof MCP_FAILURE_MODES)[number];
+export { MCP_FAILURE_MODES, type MCPFailureMode } from "../_vendor/ailf-core/index.d.ts";

package/dist/grader/mcp.js CHANGED Viewed

@@ -1,18 +1,9 @@
 /**
- * MCP failure modes — valid for the `mcp-behavior` dimension family
- * (input-validation, output-correctness, error-handling, security).
+ * MCP failure modes — re-export shim (D-05).
  *
- * Phase 3 GRAD-03 (Plan 03-02). `missing-docs` is intentionally re-exported
- * from the literacy family — MCP server tasks frequently fail because the
- * MCP spec itself is under-documented; that's a literacy failure even when
- * surfaced through MCP grading.
+ * Canonical data relocated to @sanity/ailf-core.
+ * Existing callers of this file continue to work unchanged.
  *
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
- *      §"Per-dimension failure-mode taxonomies" (lines 239-283).
+ * @see packages/core/src/grader/failure-modes/mcp.ts
  */
-export const MCP_FAILURE_MODES = [
-    "spec-mismatch", // tool/server output doesn't match published MCP spec
-    "missing-error-handling", // tool failure path under-documented or absent
-    "over-privileged", // tool exposes operations the doc didn't sanction
-    "missing-docs", // re-export from literacy (cross-cutting)
-];
+export { MCP_FAILURE_MODES } from "../_vendor/ailf-core/index.js";

package/dist/orchestration/pipeline-orchestrator.js CHANGED Viewed

@@ -275,6 +275,9 @@ export async function orchestratePipeline(ctx, steps) {
         belowCritical: state.belowCritical,
         durationMs,
         promptfooUrls: state.promptfooUrls,
+        // Phase 6 / DIAG-06 — thread reportId from state so the post-run hook in
+        // pipeline-action.ts can target the published Content Lake document.
+        reportId: state.reportId,
         steps: results,
         success: true,
         testSummary: state.testSummary,

package/dist/report-store.d.ts CHANGED Viewed

@@ -15,6 +15,7 @@
  * @see docs/design-docs/report-store/domain-model.md
  */
 import type { SanityClient } from "@sanity/client";
+import type { SynthesisCostTelemetry } from "./_vendor/ailf-core/index.d.ts";
 import type { ComparisonReport, ISOTimestamp, LineageQuery, Report, ReportId, ReportProvenance, ScoreSummary } from "./pipeline/types.js";
 /**
  * Result of an auto-comparison, bundling the ComparisonReport with the
@@ -89,6 +90,22 @@ export declare class ReportStore {
      * @see docs/design-docs/report-store/architecture.md — Auto-comparison
      */
     findComparableBaseline(query: LineageQuery): Promise<null | Report>;
+    /**
+     * Fetch the most recent report from the Content Lake.
+     *
+     * Mirrors the API gateway's `ReportStoreApi.latest()` signature
+     * (`packages/api/src/lib/sanity.ts`). Used by `ailf interpret latest`
+     * when no explicit report ID is supplied.
+     *
+     * @param query Optional narrowing by `mode` and/or `source.name`.
+     * @returns The most recent matching report, or null if none exist
+     *   or on API failure. Schema-validation errors are advisory (logged
+     *   and null-returned) per the same rationale as `findByFingerprint`.
+     */
+    latest(query?: {
+        mode?: string;
+        source?: string;
+    }): Promise<null | Report>;
     /**
      * Read a report by its ID.
      *
@@ -108,6 +125,15 @@ export declare class ReportStore {
      *   runtime schema gate. Schema drift is a bug, not an outage.
      */
     write(report: Report): Promise<null | ReportId>;
+    /**
+     * Patch synthesis telemetry onto a published report (Phase 6 / DIAG-06).
+     * Non-fatal on Sanity failure — the on-disk reportSnapshot artifact
+     * remains the source of truth. Mirrors `write()` (L379–411) for
+     * error handling.
+     *
+     * Document _id is `report-${reportId}` (see `toSanityReportDoc` L559).
+     */
+    patchSynthesis(reportId: ReportId, telemetry: SynthesisCostTelemetry): Promise<void>;
     /**
      * Query error arrays from the last N reports for chronic failure detection.
      *

package/dist/report-store.js CHANGED Viewed

@@ -207,6 +207,50 @@ export class ReportStore {
             return null;
         }
     }
+    /**
+     * Fetch the most recent report from the Content Lake.
+     *
+     * Mirrors the API gateway's `ReportStoreApi.latest()` signature
+     * (`packages/api/src/lib/sanity.ts`). Used by `ailf interpret latest`
+     * when no explicit report ID is supplied.
+     *
+     * @param query Optional narrowing by `mode` and/or `source.name`.
+     * @returns The most recent matching report, or null if none exist
+     *   or on API failure. Schema-validation errors are advisory (logged
+     *   and null-returned) per the same rationale as `findByFingerprint`.
+     */
+    async latest(query) {
+        try {
+            let groq = `*[_type == $type`;
+            const params = { type: REPORT_TYPE };
+            if (query?.mode) {
+                groq += ` && provenance.mode == $mode`;
+                params.mode = query.mode;
+            }
+            if (query?.source) {
+                groq += ` && provenance.source.name == $source`;
+                params.source = query.source;
+            }
+            groq += `] | order(completedAt desc) [0]`;
+            const doc = await this.client.fetch(groq, params);
+            return doc ? toReport(doc) : null;
+        }
+        catch (error) {
+            // Advisory lookup — a malformed prior doc must not break a read-only
+            // CLI invocation. Log and return null so the caller surfaces a
+            // user-friendly "no report found" error instead of a Zod stack trace.
+            if (error instanceof ReportSchemaValidationError) {
+                logAdvisoryQuerySchemaFailure({
+                    query: "latest",
+                    context: { mode: query?.mode, sourceName: query?.source },
+                    error,
+                });
+                return null;
+            }
+            console.warn(`  ⚠️  Failed to fetch latest report from Sanity: ${error instanceof Error ? error.message : String(error)}`);
+            return null;
+        }
+    }
     /**
      * Read a report by its ID.
      *
@@ -264,6 +308,25 @@ export class ReportStore {
             return null;
         }
     }
+    /**
+     * Patch synthesis telemetry onto a published report (Phase 6 / DIAG-06).
+     * Non-fatal on Sanity failure — the on-disk reportSnapshot artifact
+     * remains the source of truth. Mirrors `write()` (L379–411) for
+     * error handling.
+     *
+     * Document _id is `report-${reportId}` (see `toSanityReportDoc` L559).
+     */
+    async patchSynthesis(reportId, telemetry) {
+        try {
+            await this.client
+                .patch(`report-${reportId}`)
+                .set({ "summary.synthesis": { diagnosis: telemetry } })
+                .commit();
+        }
+        catch (error) {
+            console.warn(`  ⚠️  Failed to patch synthesis telemetry on report ${reportId}: ${error instanceof Error ? error.message : String(error)}`);
+        }
+    }
     /**
      * Query error arrays from the last N reports for chronic failure detection.
      *

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "5.0.0",
+  "version": "6.1.0",
   "private": false,
   "publishConfig": {
     "access": "public"
@@ -61,7 +61,7 @@
   },
   "scripts": {
     "build": "tsc && tsc -p tsconfig.scripts.json && tsx scripts/bundle-workspace-deps.ts",
-    "generate-configs": "tsx src/cli.ts generate-configs",
+    "generate-configs": "tsx src/cli.ts generate-configs && tsx scripts/generate-diagnosis-config.ts",
     "fetch-docs": "tsx src/cli.ts fetch-docs",
     "measure-retrieval": "tsx src/cli.ts measure-retrieval",
     "eval": "tsx src/cli.ts eval",