npm - @sanity/ailf - Versions diffs - 2.8.0 → 2.9.0 - Mend

@sanity/ailf 2.8.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/dist/_vendor/ailf-core/artifact-capture/association.d.ts +35 -0
package/dist/_vendor/ailf-core/artifact-capture/association.js +28 -0
package/dist/_vendor/ailf-core/artifact-registry.d.ts +124 -23
package/dist/_vendor/ailf-core/artifact-registry.js +724 -63
package/dist/_vendor/ailf-core/index.d.ts +2 -1
package/dist/_vendor/ailf-core/index.js +2 -1
package/dist/_vendor/ailf-core/ports/artifact-writer.d.ts +59 -20
package/dist/_vendor/ailf-core/ports/artifact-writer.js +33 -10
package/dist/_vendor/ailf-core/ports/context.d.ts +21 -2
package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +6 -6
package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
package/dist/_vendor/ailf-core/services/index.js +1 -0
package/dist/_vendor/ailf-core/services/slim-report-summary.d.ts +31 -0
package/dist/_vendor/ailf-core/services/slim-report-summary.js +217 -0
package/dist/_vendor/ailf-core/types/branded-ids.d.ts +33 -0
package/dist/_vendor/ailf-core/types/index.d.ts +202 -23
package/dist/artifact-capture/accumulating-artifact-writer.d.ts +50 -0
package/dist/artifact-capture/accumulating-artifact-writer.js +111 -0
package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +17 -4
package/dist/artifact-capture/api-gateway-artifact-writer.js +58 -7
package/dist/artifact-capture/emit-file.d.ts +28 -0
package/dist/artifact-capture/emit-file.js +56 -0
package/dist/artifact-capture/fanout-artifact-writer.d.ts +39 -0
package/dist/artifact-capture/fanout-artifact-writer.js +76 -0
package/dist/artifact-capture/filesystem-collector.d.ts +22 -4
package/dist/artifact-capture/filesystem-collector.js +48 -23
package/dist/artifact-capture/gcs-artifact-writer.d.ts +40 -3
package/dist/artifact-capture/gcs-artifact-writer.js +238 -14
package/dist/artifact-capture/local-fs-artifact-writer.d.ts +71 -0
package/dist/artifact-capture/local-fs-artifact-writer.js +273 -0
package/dist/commands/explain-handler.js +4 -0
package/dist/commands/pipeline-action.d.ts +5 -0
package/dist/commands/pipeline-action.js +56 -5
package/dist/commands/pipeline.d.ts +4 -0
package/dist/commands/pipeline.js +6 -2
package/dist/commands/publish.js +4 -1
package/dist/composition-root.d.ts +13 -10
package/dist/composition-root.js +74 -20
package/dist/orchestration/pipeline-orchestrator.d.ts +1 -1
package/dist/orchestration/pipeline-orchestrator.js +41 -30
package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -1
package/dist/orchestration/steps/calculate-scores-step.js +19 -19
package/dist/orchestration/steps/callback-step.d.ts +1 -1
package/dist/orchestration/steps/callback-step.js +6 -4
package/dist/orchestration/steps/compare-step.d.ts +1 -1
package/dist/orchestration/steps/compare-step.js +4 -2
package/dist/orchestration/steps/discovery-report-step.d.ts +1 -1
package/dist/orchestration/steps/discovery-report-step.js +4 -1
package/dist/orchestration/steps/fetch-docs-step.js +9 -15
package/dist/orchestration/steps/finalize-run-step.js +21 -7
package/dist/orchestration/steps/gap-analysis-step.js +34 -6
package/dist/orchestration/steps/generate-configs-step.d.ts +1 -1
package/dist/orchestration/steps/generate-configs-step.js +11 -11
package/dist/orchestration/steps/publish-report-step.d.ts +1 -1
package/dist/orchestration/steps/publish-report-step.js +24 -19
package/dist/orchestration/steps/readiness-step.d.ts +1 -1
package/dist/orchestration/steps/readiness-step.js +4 -1
package/dist/orchestration/steps/report-step.d.ts +1 -1
package/dist/orchestration/steps/report-step.js +6 -3
package/dist/orchestration/steps/run-eval-step.js +14 -9
package/dist/pipeline/compare.d.ts +2 -2
package/dist/pipeline/emit-eval-results.d.ts +38 -0
package/dist/pipeline/emit-eval-results.js +100 -0
package/package.json +1 -1

package/dist/_vendor/ailf-core/artifact-registry.js CHANGED Viewed

@@ -1,31 +1,62 @@
 /**
  * Artifact registry — single source of truth for AILF's external artifact types.
  *
- * Every artifact that lives in GCS declares itself here exactly once:
- * layout, path builder, entry schema, and (for per-entry layouts) key parser.
+ * Every artifact that lives in GCS (or on the local filesystem after W0050)
+ * declares itself here exactly once: association axes, layout, path builder,
+ * entry schema, mime, cap, and (for per-entry layouts) format/parse helpers.
  * Eval writers, the API Gateway's signing endpoint, and the Studio hook all
  * consume this same record.
  *
  * Adding a new artifact type = one entry here. No call-site changes needed in
  * the generic writer / signer / hook — they all iterate the registry.
  *
+ * ## Association axes (D0033 / W0049)
+ *
+ * Each descriptor declares the pipeline dimensions it is evidence about. At
+ * module load a structural invariant rejects descriptors that declare an
+ * unbounded axis (`task`, `model`, `trial`) but a `"bulk"` layout — such a
+ * shape would serialize as a single JSON array that blows past the object-
+ * size cap at scale. The invariant converts that class of mistake into a
+ * process-won't-start error rather than a silent data bug.
+ *
  * @see docs/decisions/D0032-run-anchored-artifact-store.md
- * @see docs/design-docs/run-artifact-store.md (§ Move 4 — Artifact Registry)
+ * @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
+ * @see docs/design-docs/unified-run-artifacts.md (§ M1, § M5)
  */
 import { z } from "zod";
 // ---------------------------------------------------------------------------
 // Path + key helpers
 // ---------------------------------------------------------------------------
-function bulkPath(slug) {
-    return (runId) => `runs/${runId}/${slug}.json`;
+/**
+ * File extension per MIME. Kept in sync with the `ArtifactMime` union at the
+ * type level — adding a new mime requires adding a case here, and the L1
+ * contract test in slice 4 verifies every descriptor's path ends with the
+ * correct extension for its mime.
+ */
+function mimeExtension(mime) {
+    switch (mime) {
+        case "application/json":
+            return "json";
+        case "application/x-ndjson":
+            return "ndjson";
+        case "text/markdown":
+            return "md";
+        case "application/yaml":
+            return "yaml";
+    }
 }
-function perEntryPath(slug) {
+function bulkPathBuilder(slug, mime) {
+    const ext = mimeExtension(mime);
+    return (runId) => `runs/${runId}/${slug}.${ext}`;
+}
+function perEntryPathBuilder(slug, mime) {
+    const ext = mimeExtension(mime);
     return (runId, entryKey) => {
         if (entryKey === undefined) {
             throw new Error(`Artifact "${slug}" uses per-entry layout; an entry key is required`);
         }
         const sanitized = sanitizeEntryKey(entryKey);
-        return `runs/${runId}/${slug}/${sanitized}.json`;
+        return `runs/${runId}/${slug}/${sanitized}.${ext}`;
     };
 }
 /**
@@ -34,41 +65,150 @@ function perEntryPath(slug) {
  *
  * - `::` → `--` so the wire separator doesn't show up in the filename.
  * - `/` → `_` so task names like "Content Lake with @sanity/client" don't
- *   create unintended GCS subdirectories (`.../test-outputs/@sanity/client…`)
- *   and so `ls` against the per-entry directory shows one row per entry.
+ *   create unintended GCS subdirectories and so `ls` against the per-entry
+ *   directory shows one row per entry.
  *
  * Single colons (`:`) are preserved — modelIds like
  * `anthropic:messages:claude-opus-4-6` are valid GCS object names.
  *
  * NOTE: this mapping is not bijective. A taskId containing literal `--`
  * combined with a modelId could in theory collide with one whose taskId
- * contains `::`, and `_` collides with `/`. In practice, production
- * taskIds don't exercise these combinations. If collision-safety becomes a
- * concern (e.g., user-provided free-form task names), switch to
- * percent-encoding or a hash-based scheme at the key boundary.
+ * contains `::`. In practice, production taskIds don't exercise these
+ * combinations.
  */
 function sanitizeEntryKey(key) {
     return key.replace(/::/g, "--").replace(/\//g, "_");
 }
 /**
- * Entry-key parser for artifacts keyed by `{taskId}::{modelId}` — testOutputs
- * today, other per-entry types in future.
+ * Reject ASCII control characters and DEL in raw entry keys.
+ *
+ * Used at the top of every `parseEntryKey`. An authenticated caller can
+ * otherwise embed CRLF in a URL-path entryKey, reach `console.warn` via
+ * the non-blocking upload-failure path, and forge lines in shared log
+ * aggregators (W0049 review finding I4 / security-audit LOW).
+ *
+ * GCS also rejects most control characters in object names, so this
+ * catches the class at the boundary rather than waiting for a 400 from
+ * the signed-URL PUT.
+ */
+// eslint-disable-next-line no-control-regex
+const CONTROL_CHAR_PATTERN = /[\x00-\x1f\x7f]/;
+function hasControlChars(raw) {
+    return CONTROL_CHAR_PATTERN.test(raw);
+}
+/**
+ * Order of axes in the canonical key representation. `run` is never included
+ * in the entry key because the run dimension lives in the path prefix
+ * (`runs/{runId}/…`).
+ */
+const KEY_AXIS_ORDER = [
+    "mode",
+    "task",
+    "model",
+    "grader",
+    "trial",
+    "category",
+];
+/**
+ * Build a filename-safe entry key from association values by concatenating
+ * the axis values in `KEY_AXIS_ORDER` with `--`. Axes not listed in the
+ * descriptor's `association.axes` are skipped.
  *
- * The separator is `::` (double colon). Either segment may contain single
- * colons: production model ids commonly look like
- * `anthropic:messages:claude-opus-4-6`. The constraint is that `::` must
- * appear exactly once and neither segment is empty, so the API Gateway can
- * return 400 on malformed input.
+ * For descriptors with bounded-only axes (only `run`) that are per-entry
+ * (e.g. `sinkResults`), `assoc.name` is used as the key.
+ */
+function formatKeyFromAxes(axes) {
+    const keyAxes = KEY_AXIS_ORDER.filter((a) => axes.includes(a));
+    return (assoc) => {
+        if (keyAxes.length === 0) {
+            if (!assoc.name) {
+                throw new Error(`formatEntryKey: descriptor with axes [${axes.join(", ")}] requires assoc.name`);
+            }
+            return sanitizeEntryKey(assoc.name);
+        }
+        const parts = keyAxes.map((axis) => {
+            const raw = assoc[axis];
+            if (raw === undefined || raw === null || raw === "") {
+                throw new Error(`formatEntryKey: missing required axis "${axis}" in association`);
+            }
+            return String(raw);
+        });
+        return sanitizeEntryKey(parts.join("--"));
+    };
+}
+/**
+ * Strict parser that accepts only the canonical N-segment key matching the
+ * descriptor's axis count (minus `run`). Used by most per-entry descriptors.
  */
-function parseTaskModelKey(key) {
-    const parts = key.split("::");
-    if (parts.length !== 2 || !parts[0] || !parts[1]) {
+function parseKeyByAxes(type, axes) {
+    const expected = KEY_AXIS_ORDER.filter((a) => axes.includes(a)).length;
+    return (raw) => {
+        if (hasControlChars(raw)) {
+            return {
+                ok: false,
+                reason: `Entry key for "${type}" must not contain control characters`,
+            };
+        }
+        const sanitized = sanitizeEntryKey(raw);
+        if (expected === 0) {
+            if (!sanitized) {
+                return {
+                    ok: false,
+                    reason: `Entry key for "${type}" must be a non-empty identifier`,
+                };
+            }
+            return { ok: true, sanitized: sanitized };
+        }
+        const parts = sanitized.split("--");
+        if (parts.length !== expected || parts.some((p) => !p)) {
+            return {
+                ok: false,
+                reason: `Entry key "${raw}" for "${type}" must match ${expected}-segment form (${KEY_AXIS_ORDER.filter((a) => axes.includes(a)).join("--")}) with non-empty segments`,
+            };
+        }
+        return { ok: true, sanitized: sanitized };
+    };
+}
+/**
+ * testOutputs parser. Accepts both the new 3-segment form
+ * (`mode--task--model`) and the legacy 2-segment form (`task--model`); the
+ * latter emits a one-time-per-process deprecation warning so noisy logs do
+ * not mask the signal.
+ *
+ * The separator is `--` (post-sanitization). The legacy wire format
+ * (`task::model`) still works because `sanitizeEntryKey` maps `::` → `--`
+ * before the split — producers that haven't migrated keep functioning.
+ *
+ * Deprecation scheduled for W0052.
+ */
+let warnedLegacyTestOutputsKey = false;
+function parseTestOutputsEntryKey(raw) {
+    if (hasControlChars(raw)) {
         return {
             ok: false,
-            reason: `Entry key "${key}" must match {taskId}::{modelId} with exactly one "::" separator and non-empty segments`,
+            reason: `Entry key for "testOutputs" must not contain control characters`,
         };
     }
-    return { ok: true, sanitized: sanitizeEntryKey(key) };
+    const sanitized = sanitizeEntryKey(raw);
+    const parts = sanitized.split("--");
+    if (parts.length === 3 && parts.every((p) => p.length > 0)) {
+        return { ok: true, sanitized: sanitized };
+    }
+    if (parts.length === 2 && parts.every((p) => p.length > 0)) {
+        if (!warnedLegacyTestOutputsKey) {
+            warnedLegacyTestOutputsKey = true;
+            console.warn("legacy testOutputs entry key (2-segment) is deprecated");
+        }
+        return { ok: true, sanitized: sanitized };
+    }
+    return {
+        ok: false,
+        reason: `Entry key "${raw}" for "testOutputs" must match {mode}--{task}--{model} (3-segment) or legacy {task}--{model} (2-segment) with non-empty segments`,
+    };
+}
+/** Test-only reset for the legacy-key warning flag. Not exported publicly. */
+export function __resetLegacyTestOutputsWarning() {
+    warnedLegacyTestOutputsKey = false;
 }
 // ---------------------------------------------------------------------------
 // Entry schemas
@@ -77,67 +217,436 @@ const testOutputEntrySchema = z.object({
     responseOutput: z.string(),
     responseOutputTruncated: z.boolean(),
 });
-// Aspirational: renderedPrompts / rawResults / traces / etc. currently have
-// loose shapes. Tighten per-type as consumers stabilize.
+/**
+ * Preview shape for `testOutputs` manifest entries (W0051 / D0033 M7). A
+ * Studio list view can render the truncated response + truncation flag
+ * without a signed-URL round trip.
+ */
+const testOutputPreviewSchema = z.object({
+    responsePreview: z.string().max(280),
+    truncated: z.boolean(),
+});
+/**
+ * Preview shape for `failureModes` per-category manifest entries (W0051
+ * Slice 2 / D0033 M7). `failureModes` is keyed by `{mode, category}` after
+ * Slice 2 — one entry per classified FailureModeType inside each mode.
+ *
+ * Severity is derived at extract time from the entry's `count` using the
+ * bucketing in `severityForCount`. The bucket thresholds are a first-pass
+ * heuristic; callers can override by supplying an explicit `severity` on
+ * the entry payload if more nuanced signals become available.
+ */
+const failureModePreviewSchema = z.object({
+    category: z.string(),
+    severity: z.enum(["low", "medium", "high", "critical"]),
+    titlePreview: z.string().max(120),
+});
+/**
+ * Preview shape for `graderJudgments` manifest entries (W0051 / D0033 M7).
+ * List views render score + a short reason excerpt; drill-down hydrates
+ * the full `{ reason, pass, dimensionScores }` from the external artifact.
+ *
+ * `score` is kept as `number` rather than `int(0..100)` so legacy judgments
+ * with fractional scores (pre-W0048 rubrics produced 0–1 values) don't
+ * reject at read time; the score-normalization step is upstream of emit.
+ */
+const graderJudgmentPreviewSchema = z.object({
+    score: z.number(),
+    reasonPreview: z.string().max(280),
+    dimensionScores: z.record(z.string(), z.number()).optional(),
+});
+// Aspirational: most payload shapes are still loose. Tightening per-type as
+// consumers stabilize is explicitly a W0050/W0051 concern — W0049 fixes the
+// structural shape around them without changing the payload contracts.
 const unknownEntry = z.unknown();
+/**
+ * Truncate a string to at most `maxChars` code units. Code-unit-based
+ * truncation is safe for the preview use-case — previews are display-bound
+ * approximations and a rare mid-surrogate cut manifests as a replacement
+ * glyph, not data corruption. Pure function.
+ */
+function truncateString(s, maxChars) {
+    return s.length <= maxChars ? s : s.slice(0, maxChars);
+}
+/**
+ * Narrow check that a value is a plain object whose entries are all number-
+ * valued strings. Used by preview extractors to safely include optional
+ * per-dimension score fields without trusting the caller's runtime shape.
+ */
+function isStringNumberRecord(value) {
+    if (value === null || typeof value !== "object" || Array.isArray(value))
+        return false;
+    for (const v of Object.values(value)) {
+        if (typeof v !== "number")
+            return false;
+    }
+    return true;
+}
+/**
+ * Bucket a classified-judgment count into a severity tier. Thresholds are
+ * count-based: a category with 10+ judgments is critical triage; 5–9 is
+ * high; 2–4 medium; 0–1 low. Exposed so producers can mirror this when
+ * supplying explicit severity values on entries that have richer signal
+ * (e.g. per-dimension aggregate).
+ */
+function severityForCount(count) {
+    if (count >= 10)
+        return "critical";
+    if (count >= 5)
+        return "high";
+    if (count >= 2)
+        return "medium";
+    return "low";
+}
+/**
+ * Render a FailureModeType-style kebab-case category id as Title Case for
+ * preview display (e.g. `"missing-docs"` → `"Missing Docs"`).
+ */
+function titleCaseCategory(id) {
+    return id
+        .split("-")
+        .map((w) => (w.length === 0 ? w : w[0].toUpperCase() + w.slice(1)))
+        .join(" ");
+}
+function buildDescriptor(input) {
+    const objectPath = input.layout === "bulk"
+        ? bulkPathBuilder(input.slug, input.mime)
+        : perEntryPathBuilder(input.slug, input.mime);
+    const formatEntryKey = input.layout === "per-entry" ? formatKeyFromAxes(input.axes) : undefined;
+    const parseEntryKey = input.layout === "per-entry"
+        ? (input.parseEntryKey ?? parseKeyByAxes(input.type, input.axes))
+        : undefined;
+    return {
+        type: input.type,
+        layout: input.layout,
+        slug: input.slug,
+        association: { axes: input.axes },
+        entrySchema: input.entrySchema,
+        mime: input.mime,
+        capBytes: input.capBytes,
+        truncation: input.truncation,
+        optional: input.optional,
+        objectPath,
+        formatEntryKey,
+        parseEntryKey,
+        manifestPreview: input.manifestPreview,
+    };
+}
 // ---------------------------------------------------------------------------
-// The registry
+// The registry — 21 live descriptors + 1 deprecated (evalResults)
 // ---------------------------------------------------------------------------
 /**
  * The canonical artifact descriptor for every artifact type. Iterate with
  * `Object.values(ARTIFACT_REGISTRY)` or look up by `ARTIFACT_REGISTRY[type]`.
+ *
+ * Axes, layout, and caps come from docs/design-docs/unified-run-artifacts.md
+ * § M5. The mapping is verified by the L1 contract tests.
  */
 export const ARTIFACT_REGISTRY = {
-    testOutputs: {
-        type: "testOutputs",
+    // -- Run-scoped bulk artifacts (one per run) ------------------------------
+    runManifest: buildDescriptor({
+        type: "runManifest",
+        slug: "run-manifest",
+        layout: "bulk",
+        axes: ["run"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 1_000_000,
+    }),
+    scoreSummary: buildDescriptor({
+        type: "scoreSummary",
+        slug: "score-summary",
+        layout: "bulk",
+        axes: ["run"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 1_000_000,
+    }),
+    pipelineResult: buildDescriptor({
+        type: "pipelineResult",
+        slug: "pipeline-result",
+        layout: "bulk",
+        axes: ["run"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 1_000_000,
+    }),
+    pipelineContext: buildDescriptor({
+        type: "pipelineContext",
+        slug: "pipeline-context",
+        layout: "bulk",
+        axes: ["run"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 64_000,
+    }),
+    documentManifest: buildDescriptor({
+        type: "documentManifest",
+        slug: "document-manifest",
+        layout: "bulk",
+        axes: ["run"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 256_000,
+    }),
+    prComment: buildDescriptor({
+        type: "prComment",
+        slug: "pr-comment",
+        layout: "bulk",
+        axes: ["run"],
+        entrySchema: z.string(),
+        mime: "text/markdown",
+        capBytes: 256_000,
+        optional: true,
+    }),
+    readinessReport: buildDescriptor({
+        type: "readinessReport",
+        slug: "readiness-report",
+        layout: "bulk",
+        axes: ["run"],
+        entrySchema: z.string(),
+        mime: "text/markdown",
+        capBytes: 256_000,
+        optional: true,
+    }),
+    reportSnapshot: buildDescriptor({
+        type: "reportSnapshot",
+        slug: "report-snapshot",
+        layout: "bulk",
+        axes: ["run"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 10_000_000,
+    }),
+    autoComparison: buildDescriptor({
+        type: "autoComparison",
+        slug: "auto-comparison",
+        layout: "bulk",
+        axes: ["run"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 4_000_000,
+        optional: true,
+    }),
+    gapReport: buildDescriptor({
+        type: "gapReport",
+        slug: "gap-report",
+        layout: "bulk",
+        axes: ["run"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 1_000_000,
+        optional: true,
+    }),
+    // -- Run-scoped per-entry artifacts (keyed by assoc.name) -----------------
+    sinkResults: buildDescriptor({
+        type: "sinkResults",
+        slug: "sink-results",
         layout: "per-entry",
-        slug: "test-outputs",
-        entrySchema: testOutputEntrySchema,
-        objectPath: perEntryPath("test-outputs"),
-        parseEntryKey: parseTaskModelKey,
-    },
-    renderedPrompts: {
+        axes: ["run"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 64_000,
+    }),
+    callbackRequest: buildDescriptor({
+        type: "callbackRequest",
+        slug: "callback-request",
+        layout: "per-entry",
+        axes: ["run"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 64_000,
+        optional: true,
+    }),
+    callbackResponse: buildDescriptor({
+        type: "callbackResponse",
+        slug: "callback-response",
+        layout: "per-entry",
+        axes: ["run"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 64_000,
+        optional: true,
+    }),
+    // -- Run × Mode ------------------------------------------------------------
+    configSnapshot: buildDescriptor({
+        type: "configSnapshot",
+        slug: "config-snapshot",
+        layout: "per-entry",
+        axes: ["run", "mode"],
+        entrySchema: z.string(),
+        mime: "application/yaml",
+        capBytes: 256_000,
+    }),
+    evalConfigGenerated: buildDescriptor({
+        type: "evalConfigGenerated",
+        slug: "eval-config-generated",
+        layout: "per-entry",
+        axes: ["run", "mode"],
+        entrySchema: z.string(),
+        mime: "application/yaml",
+        capBytes: 256_000,
+        optional: true,
+    }),
+    comparisonReport: buildDescriptor({
+        type: "comparisonReport",
+        slug: "comparison-report",
+        layout: "per-entry",
+        axes: ["run", "mode"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 1_000_000,
+        optional: true,
+    }),
+    discoveryReport: buildDescriptor({
+        type: "discoveryReport",
+        slug: "discovery-report",
+        layout: "per-entry",
+        axes: ["run", "mode"],
+        entrySchema: z.string(),
+        mime: "text/markdown",
+        capBytes: 1_000_000,
+        optional: true,
+    }),
+    failureModes: buildDescriptor({
+        type: "failureModes",
+        slug: "failure-modes",
+        layout: "per-entry",
+        axes: ["run", "mode", "category"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 1_000_000,
+        optional: true,
+        manifestPreview: {
+            schema: failureModePreviewSchema,
+            extract: (entry) => {
+                const e = entry;
+                const category = typeof e.category === "string" ? e.category : "unknown";
+                const count = typeof e.count === "number" ? e.count : 0;
+                const title = typeof e.title === "string" ? e.title : titleCaseCategory(category);
+                return {
+                    category,
+                    severity: severityForCount(count),
+                    titlePreview: truncateString(title, 120),
+                };
+            },
+            capBytes: 256,
+        },
+    }),
+    // -- Run × Mode × Task(+…) -------------------------------------------------
+    taskDefinitions: buildDescriptor({
+        type: "taskDefinitions",
+        slug: "task-definitions",
+        layout: "per-entry",
+        axes: ["run", "mode", "task"],
+        entrySchema: unknownEntry,
+        mime: "application/json",
+        capBytes: 256_000,
+    }),
+    renderedPrompts: buildDescriptor({
         type: "renderedPrompts",
-        layout: "bulk",
         slug: "rendered-prompts",
+        layout: "per-entry",
+        axes: ["run", "mode", "task", "model"],
         entrySchema: unknownEntry,
-        objectPath: bulkPath("rendered-prompts"),
-    },
-    rawResults: {
+        mime: "application/json",
+        capBytes: 1_000_000,
+    }),
+    rawResults: buildDescriptor({
         type: "rawResults",
-        layout: "bulk",
         slug: "raw-results",
+        layout: "per-entry",
+        axes: ["run", "mode", "task", "model"],
         entrySchema: unknownEntry,
-        objectPath: bulkPath("raw-results"),
-    },
-    graderPrompts: {
+        mime: "application/json",
+        capBytes: 1_000_000,
+    }),
+    testOutputs: buildDescriptor({
+        type: "testOutputs",
+        slug: "test-outputs",
+        layout: "per-entry",
+        axes: ["run", "mode", "task", "model"],
+        entrySchema: testOutputEntrySchema,
+        mime: "application/json",
+        capBytes: 1_000_000,
+        parseEntryKey: parseTestOutputsEntryKey,
+        manifestPreview: {
+            schema: testOutputPreviewSchema,
+            extract: (entry) => {
+                const e = entry;
+                const raw = typeof e.responseOutput === "string" ? e.responseOutput : "";
+                return {
+                    responsePreview: truncateString(raw, 280),
+                    truncated: typeof e.responseOutputTruncated === "boolean"
+                        ? e.responseOutputTruncated
+                        : false,
+                };
+            },
+            capBytes: 320,
+        },
+    }),
+    graderPrompts: buildDescriptor({
         type: "graderPrompts",
-        layout: "bulk",
         slug: "grader-prompts",
+        layout: "per-entry",
+        axes: ["run", "mode", "task", "model", "grader"],
         entrySchema: unknownEntry,
-        objectPath: bulkPath("grader-prompts"),
-    },
-    taskDefinitions: {
-        type: "taskDefinitions",
-        layout: "bulk",
-        slug: "task-definitions",
+        mime: "application/json",
+        capBytes: 512_000,
+    }),
+    graderJudgments: buildDescriptor({
+        type: "graderJudgments",
+        slug: "grader-judgments",
+        layout: "per-entry",
+        axes: ["run", "mode", "task", "model", "grader"],
         entrySchema: unknownEntry,
-        objectPath: bulkPath("task-definitions"),
-    },
-    evalResults: {
+        mime: "application/json",
+        capBytes: 512_000,
+        manifestPreview: {
+            schema: graderJudgmentPreviewSchema,
+            extract: (entry) => {
+                const e = entry;
+                const score = typeof e.score === "number" ? e.score : 0;
+                const reasonText = typeof e.reason === "string" ? e.reason : "";
+                const dimensionScores = isStringNumberRecord(e.dimensionScores)
+                    ? e.dimensionScores
+                    : undefined;
+                return {
+                    score,
+                    reasonPreview: truncateString(reasonText, 280),
+                    ...(dimensionScores === undefined ? {} : { dimensionScores }),
+                };
+            },
+            capBytes: 512,
+        },
+    }),
+    traces: buildDescriptor({
+        type: "traces",
+        slug: "traces",
+        layout: "per-entry",
+        axes: ["run", "mode", "task", "model", "trial"],
+        entrySchema: unknownEntry,
+        mime: "application/x-ndjson",
+        capBytes: 10_000_000,
+        truncation: "trial-oversize",
+    }),
+    /**
+     * @deprecated Emit removed in W0050 (no producer calls `emit("evalResults")`
+     * any more — `emit-eval-results.ts` decomposes the promptfoo aggregate into
+     * per-entry rawResults / renderedPrompts / graderPrompts / graderJudgments
+     * instead). Descriptor retained for read-compat on pre-W0050 reports until
+     * W0052 removes it entirely. No code path should re-introduce emission.
+     */
+    evalResults: buildDescriptor({
         type: "evalResults",
-        layout: "bulk",
         slug: "eval-results",
-        entrySchema: unknownEntry,
-        objectPath: bulkPath("eval-results"),
-    },
-    traces: {
-        type: "traces",
         layout: "bulk",
-        slug: "traces",
+        axes: ["run"],
         entrySchema: unknownEntry,
-        objectPath: bulkPath("traces"),
-    },
+        mime: "application/json",
+        capBytes: 10_000_000,
+        optional: true,
+    }),
 };
 /** All artifact types in declaration order. */
 export const ARTIFACT_TYPES = Object.keys(ARTIFACT_REGISTRY);
@@ -148,3 +657,155 @@ export const ARTIFACT_TYPES = Object.keys(ARTIFACT_REGISTRY);
 export function isArtifactType(value) {
     return value in ARTIFACT_REGISTRY;
 }
+// ---------------------------------------------------------------------------
+// Module-load invariant (D0033 / W0049)
+// ---------------------------------------------------------------------------
+/**
+ * Unbounded axes — dimensions whose cardinality grows with a run. A bulk
+ * artifact fanning across these cannot bound its payload; the registry
+ * forbids that shape at import time.
+ */
+const UNBOUNDED_AXES = [
+    "task",
+    "model",
+    "trial",
+];
+/**
+ * Structural check run against a single descriptor. Exported so L1 contract
+ * tests can construct an invalid descriptor inline and assert the throw.
+ */
+export function assertValidArtifactDescriptor(desc) {
+    const hasUnboundedAxis = desc.association.axes.some((a) => UNBOUNDED_AXES.includes(a));
+    if (hasUnboundedAxis && desc.layout !== "per-entry") {
+        throw new Error(`Artifact ${desc.type}: association contains unbounded axis (${desc.association.axes
+            .filter((a) => UNBOUNDED_AXES.includes(a))
+            .join(", ")}) but layout is "${desc.layout}". Unbounded axes require layout "per-entry".`);
+    }
+    if (desc.capBytes <= 0) {
+        throw new Error(`Artifact ${desc.type}: capBytes must be > 0 (got ${desc.capBytes})`);
+    }
+    if (desc.layout === "per-entry" && !desc.formatEntryKey) {
+        throw new Error(`Artifact ${desc.type}: per-entry descriptors must declare formatEntryKey`);
+    }
+}
+// Fire the invariant at import time — a bad descriptor kills the process
+// before any producer can silently serialize an oversized JSON array.
+for (const desc of Object.values(ARTIFACT_REGISTRY)) {
+    assertValidArtifactDescriptor(desc);
+}
+// ---------------------------------------------------------------------------
+// Manifest preview helper (W0051 / D0033 M7)
+// ---------------------------------------------------------------------------
+/**
+ * Build the inline preview for a manifest entry at write time. Returns
+ * `undefined` when the descriptor has no `manifestPreview` declaration,
+ * when extraction throws, when the schema rejects the extracted shape, or
+ * when cap-enforcement cannot bring the serialized preview under the
+ * descriptor's `capBytes` budget.
+ *
+ * Failure is non-fatal: preview is triage metadata, never critical-path data.
+ * The full payload still lands in the external artifact regardless.
+ *
+ * Cap enforcement (hard truncation) iteratively shortens the longest string
+ * field on the preview object by ~10% per pass until the JSON-serialized
+ * form fits under `capBytes` or no string remains to trim. Nested objects
+ * are not recursed — previews are intentionally shallow (a handful of
+ * top-level fields).
+ */
+export function buildManifestPreview(descriptor, payload) {
+    const decl = descriptor.manifestPreview;
+    if (!decl)
+        return undefined;
+    let extracted;
+    try {
+        extracted = decl.extract(payload);
+    }
+    catch (err) {
+        console.warn(`  ⚠️  manifestPreview.extract("${descriptor.type}") threw: ${errMessage(err)} — dropping preview`);
+        return undefined;
+    }
+    const parsed = decl.schema.safeParse(extracted);
+    if (!parsed.success) {
+        console.warn(`  ⚠️  manifestPreview schema rejected "${descriptor.type}" preview — dropping preview`);
+        return undefined;
+    }
+    const fitted = fitPreviewToCap(parsed.data, decl.capBytes);
+    if (!fitted) {
+        console.warn(`  ⚠️  manifestPreview for "${descriptor.type}" exceeds capBytes=${decl.capBytes} and cannot be truncated — dropping preview`);
+        return undefined;
+    }
+    return fitted;
+}
+function errMessage(err) {
+    return err instanceof Error ? err.message : String(err);
+}
+/**
+ * Hard-truncate string fields of `preview` until `JSON.stringify(preview)`
+ * fits within `capBytes`. Operates on a shallow clone to preserve purity.
+ * Returns the fitted preview, or `undefined` if no amount of shortening
+ * brings the preview under cap (e.g. the non-string fields alone exceed it).
+ */
+function fitPreviewToCap(preview, capBytes) {
+    if (preview === null || typeof preview !== "object") {
+        return byteLengthUtf8(JSON.stringify(preview)) <= capBytes
+            ? preview
+            : undefined;
+    }
+    const clone = { ...preview };
+    let bytes = byteLengthUtf8(JSON.stringify(clone));
+    if (bytes <= capBytes)
+        return preview; // already fits; original returned
+    // Repeatedly trim the longest string field by ~10% of its length (min 4)
+    // until we fit or no trimmable string is left.
+    // Bound the loop to guard against pathological schemas.
+    for (let pass = 0; pass < 256; pass++) {
+        const longestKey = findLongestStringKey(clone);
+        if (longestKey === null)
+            return undefined;
+        const current = clone[longestKey];
+        if (current.length <= 1) {
+            // Drop this field entirely — it can't be shortened further.
+            delete clone[longestKey];
+        }
+        else {
+            const trimBy = Math.max(4, Math.ceil(current.length * 0.1));
+            clone[longestKey] = current.slice(0, current.length - trimBy);
+        }
+        bytes = byteLengthUtf8(JSON.stringify(clone));
+        if (bytes <= capBytes)
+            return clone;
+    }
+    return undefined;
+}
+/**
+ * UTF-8 byte length of a string. Implemented without Node's `Buffer` so
+ * `@sanity/ailf-core` stays dependency-free of `@types/node` — the kernel
+ * runs in both Node and the browser (Studio).
+ */
+function byteLengthUtf8(s) {
+    let bytes = 0;
+    for (let i = 0; i < s.length; i++) {
+        const c = s.charCodeAt(i);
+        if (c < 0x80)
+            bytes += 1;
+        else if (c < 0x800)
+            bytes += 2;
+        else if (c >= 0xd800 && c < 0xdc00) {
+            // high surrogate — a 4-byte UTF-8 sequence; skip the paired low surrogate
+            bytes += 4;
+            i++;
+        }
+        else
+            bytes += 3;
+    }
+    return bytes;
+}
+function findLongestStringKey(obj) {
+    let best = null;
+    for (const [k, v] of Object.entries(obj)) {
+        if (typeof v === "string" && (best === null || v.length > best.len)) {
+            best = { key: k, len: v.length };
+        }
+    }
+    return best === null ? null : best.key;
+}