npm - @sanity/ailf - Versions diffs - 7.2.2 → 7.3.0 - Mend

@sanity/ailf 7.2.2 → 7.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/config/airbyte/ai_literacy_framework.connector.yaml +38 -0
package/config/bigquery/README.md +39 -7
package/config/bigquery/views/reports.sql +6 -0
package/dist/_vendor/ailf-core/ports/artifact-writer.d.ts +22 -0
package/dist/_vendor/ailf-core/ports/index.d.ts +1 -1
package/dist/_vendor/ailf-core/schemas/report.d.ts +30 -0
package/dist/_vendor/ailf-core/schemas/report.js +21 -2
package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +14 -0
package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
package/dist/_vendor/ailf-core/services/index.js +4 -0
package/dist/_vendor/ailf-core/services/report-validity-detector.d.ts +116 -0
package/dist/_vendor/ailf-core/services/report-validity-detector.js +128 -0
package/dist/_vendor/ailf-core/types/index.d.ts +19 -0
package/dist/_vendor/ailf-core/types/index.js +1 -0
package/dist/_vendor/ailf-core/types/report-validity.d.ts +60 -0
package/dist/_vendor/ailf-core/types/report-validity.js +42 -0
package/dist/_vendor/ailf-shared/generated/help-content.js +4 -3
package/dist/_vendor/ailf-shared/glossary.d.ts +32 -0
package/dist/_vendor/ailf-shared/glossary.js +35 -0
package/dist/_vendor/ailf-shared/index.d.ts +2 -1
package/dist/_vendor/ailf-shared/index.js +2 -1
package/dist/_vendor/ailf-shared/run-classification.d.ts +53 -0
package/dist/_vendor/ailf-shared/run-classification.js +111 -0
package/dist/_vendor/ailf-shared/trustworthiness.d.ts +97 -0
package/dist/_vendor/ailf-shared/trustworthiness.js +86 -0
package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
package/dist/artifact-capture/fanout-artifact-writer.d.ts +8 -0
package/dist/artifact-capture/fanout-artifact-writer.js +10 -0
package/dist/artifact-capture/gcs-artifact-writer.d.ts +12 -2
package/dist/artifact-capture/gcs-artifact-writer.js +18 -0
package/dist/commands/publish.js +9 -2
package/dist/orchestration/steps/publish-report-step.js +11 -3
package/dist/orchestration/steps/run-eval-step.js +56 -3
package/dist/pipeline/cache-hit-restore.d.ts +37 -1
package/dist/pipeline/cache-hit-restore.js +108 -1
package/dist/pipeline/report-validity.d.ts +32 -0
package/dist/pipeline/report-validity.js +43 -0
package/dist/report-store.d.ts +1 -0
package/dist/report-store.js +2 -0
package/package.json +1 -1

package/dist/_vendor/ailf-shared/trustworthiness.js ADDED Viewed

@@ -0,0 +1,86 @@
+/**
+ * trustworthiness.ts — The single trust gate for reports (D0059).
+ *
+ * `includeInDefaultTrends` is the one definition of "show this report by
+ * default." Every surface (dashboard analytics, Studio presets, the BigQuery
+ * `reports.sql` view) references this predicate so the gate cannot drift
+ * between consumers.
+ *
+ * Two orthogonal axes decide inclusion:
+ *
+ *  - **Validity (data health, D0059)** — the *primary* gate. A report is
+ *    included only when its `validity.status` is `ok` OR validity is absent
+ *    (pre-stamp reads are trusted until backfilled — the rollout is additive
+ *    and nullable). Any non-`ok` status (`degraded` / `incomplete` /
+ *    `suspect`) excludes the report regardless of intent.
+ *  - **Intent (run classification, D0037)** — a *secondary* exclusion. The
+ *    explicit `test` and `experimental` classifications are dropped;
+ *    `adhoc` / `official` / `external` (and a missing classification) are kept.
+ *    `adhoc` is intentionally included — it holds real production one-offs;
+ *    the validity gate, not the intent gate, removes the bad ones inside it.
+ *
+ * We model a slim subset of the core `Report` shape (the two read axes) rather
+ * than importing `Report` / `ReportValidity` from `@sanity/ailf-core`: this
+ * package is the dependency-graph leaf and imports nothing from core. A full
+ * core `Report` is structurally assignable to {@link TrustGateReport}.
+ *
+ * The predicate is total — it never throws — and is kept trivially
+ * translatable to the two query-language forms it is materialized as on the
+ * other surfaces (`W-studio-bigquery-validity`): the GROQ filter behind the
+ * Studio "Trustworthy" preset ({@link INCLUDE_IN_DEFAULT_TRENDS_GROQ}) and the
+ * SQL boolean in the BigQuery `reports.sql` view
+ * ({@link INCLUDE_IN_DEFAULT_TRENDS_SQL}). Those constants live here, beside the
+ * function, so the one gate cannot drift between consumers; a cross-check test
+ * asserts all three forms agree across the full truth table.
+ *
+ * Note the SQL form is NULL-safe on *both* axes: a bare
+ * `classification NOT IN ('test','experimental')` would evaluate to `NULL`
+ * (not `TRUE`) for an unclassified row under SQL three-valued logic, silently
+ * excluding pre-taxonomy reports the TS predicate keeps — hence the explicit
+ * `classification IS NULL OR …`.
+ *
+ * @see docs/decisions/D0059-report-validity-axis-and-trustworthiness-gate.md
+ * @see docs/design-docs/report-trustworthiness-model.md — §Decision/3
+ */
+/**
+ * Whether a report should appear in default trend views.
+ *
+ * Validity is the primary gate; intent is a secondary exclusion. See the
+ * module header for the full rationale and the equivalent SQL.
+ *
+ * @returns `true` when the report is trustworthy enough to show by default.
+ */
+export function includeInDefaultTrends(report) {
+    const status = report.validity?.status;
+    // Primary gate: trustworthy when explicitly `ok` or not yet assessed.
+    const validityOk = status == null || status === "ok";
+    const classification = report.provenance?.classification;
+    // Secondary exclusion: drop explicit test/experimental intent only.
+    const intentIncluded = classification !== "test" && classification !== "experimental";
+    return validityOk && intentIncluded;
+}
+/**
+ * GROQ form of {@link includeInDefaultTrends}, as a boolean expression over an
+ * `ailf.report` document. Drop it into a Studio structure filter with the
+ * document-type guard, e.g.
+ * `` `_type == "ailf.report" && ${INCLUDE_IN_DEFAULT_TRENDS_GROQ}` ``.
+ *
+ * GROQ's `in` returns `false` (not `null`) for an absent left operand, so an
+ * unclassified report passes the intent clause without an explicit
+ * `defined(...)` guard — matching the TS predicate's "missing ⇒ kept" rule.
+ * `defined(validity.status)` makes the absent-validity case trusted.
+ */
+export const INCLUDE_IN_DEFAULT_TRENDS_GROQ = '(!defined(validity.status) || validity.status == "ok") && !(provenance.classification in ["test", "experimental"])';
+/**
+ * SQL form of {@link includeInDefaultTrends}, as a boolean expression over the
+ * flattened `ailf.reports` BigQuery row (columns `validity_status`,
+ * `classification`). Materialized verbatim as the `include_in_default_trends`
+ * column in `packages/eval/config/bigquery/views/reports.sql`; an eval test
+ * asserts the view embeds this exact string.
+ *
+ * Both axes are NULL-safe so the column matches the TS predicate row-for-row:
+ * `classification NOT IN (...)` alone is `NULL` for an unclassified row under
+ * SQL three-valued logic, which a `WHERE`/boolean context treats as `FALSE` —
+ * silently dropping pre-taxonomy reports the TS predicate keeps.
+ */
+export const INCLUDE_IN_DEFAULT_TRENDS_SQL = "(validity_status IS NULL OR validity_status = 'ok') AND (classification IS NULL OR classification NOT IN ('test', 'experimental'))";

package/dist/adapters/task-sources/repo-schemas.d.ts CHANGED Viewed

@@ -1564,8 +1564,8 @@ export declare const RepoConfigSchema: z.ZodObject<{
     summary: z.ZodOptional<z.ZodObject<{
         onRun: z.ZodOptional<z.ZodEnum<{
             never: "never";
-            always: "always";
             auto: "auto";
+            always: "always";
         }>>;
     }, z.core.$strip>>;
     taskSource: z.ZodOptional<z.ZodObject<{

package/dist/artifact-capture/fanout-artifact-writer.d.ts CHANGED Viewed

@@ -38,6 +38,14 @@ export declare class FanoutArtifactWriter implements ArtifactWriter {
     private readonly writers;
     private readonly progress?;
     constructor(writers: readonly ArtifactWriter[], options?: FanoutArtifactWriterOptions);
+    /**
+     * The delegate writers in declaration order. Exposed read-only so callers
+     * walking the writer chain (e.g. to feature-detect an
+     * `ArtifactObjectChecker`) can descend into the fanout without it having to
+     * re-implement every optional capability. Mirrors the decorators' readonly
+     * `inner` accessor.
+     */
+    get delegates(): readonly ArtifactWriter[];
     private reportProgress;
     emit<T extends ArtifactType>(type: T, association: AssociationValues, payload: unknown): Promise<ArtifactRef | null>;
     appendNdjson<T extends ArtifactType>(type: T, association: AssociationValues, rows: readonly unknown[]): Promise<ArtifactRef | null>;

package/dist/artifact-capture/fanout-artifact-writer.js CHANGED Viewed

@@ -33,6 +33,16 @@ export class FanoutArtifactWriter {
         this.writers = writers;
         this.progress = options.progress;
     }
+    /**
+     * The delegate writers in declaration order. Exposed read-only so callers
+     * walking the writer chain (e.g. to feature-detect an
+     * `ArtifactObjectChecker`) can descend into the fanout without it having to
+     * re-implement every optional capability. Mirrors the decorators' readonly
+     * `inner` accessor.
+     */
+    get delegates() {
+        return this.writers;
+    }
     reportProgress(ref) {
         if (!this.progress)
             return;

package/dist/artifact-capture/gcs-artifact-writer.d.ts CHANGED Viewed

@@ -28,7 +28,7 @@
  * @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
  */
 import { Storage } from "@google-cloud/storage";
-import { type ArtifactEntry, type ArtifactRef, type ArtifactType, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssociationValues, type RunId, type RunManifest, type WriteSource } from "../_vendor/ailf-core/index.d.ts";
+import { type ArtifactEntry, type ArtifactObjectChecker, type ArtifactRef, type ArtifactType, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssociationValues, type RunId, type RunManifest, type WriteSource } from "../_vendor/ailf-core/index.d.ts";
 import { type UploadMetricsSink } from "./upload-metrics.js";
 export interface GcsArtifactWriterOptions {
     /** GCS bucket name (e.g., "ailf-artifacts") */
@@ -61,7 +61,7 @@ export interface GcsArtifactWriterOptions {
      */
     writerSource?: WriteSource;
 }
-export declare class GcsArtifactWriter implements ArtifactWriter {
+export declare class GcsArtifactWriter implements ArtifactWriter, ArtifactObjectChecker {
     private client;
     private readonly options;
     private readonly ndjsonStreams;
@@ -83,6 +83,16 @@ export declare class GcsArtifactWriter implements ArtifactWriter {
     emit<T extends ArtifactType>(type: T, association: AssociationValues, payload: unknown): Promise<ArtifactRef | null>;
     appendNdjson<T extends ArtifactType>(type: T, association: AssociationValues, rows: readonly unknown[]): Promise<ArtifactRef | null>;
     writeManifest(runId: RunId, manifest: RunManifest): Promise<ArtifactRef | null>;
+    /**
+     * Existence probe used by the cache-hit restore prune (D0057). Unlike the
+     * write methods (P5 non-blocking — swallow errors, return null), this
+     * resolves `false` ONLY for a definitively-absent object and **throws** on
+     * any other failure (auth / network / quota) so the caller can fail open
+     * and keep the ref rather than dropping a real artifact on a transient
+     * blip. `file.exists()` rejects only on real errors; a missing object
+     * resolves `[false]`.
+     */
+    objectExists(path: string): Promise<boolean>;
     /** @deprecated Use `emit()` instead. Routes through the same GCS I/O. */
     writeBulk(type: ArtifactType, runId: RunId, data: unknown): Promise<ArtifactRef | null>;
     /** @deprecated Use `emit()` per entry instead. */

package/dist/artifact-capture/gcs-artifact-writer.js CHANGED Viewed

@@ -223,6 +223,24 @@ export class GcsArtifactWriter {
             this.reportProgress(ref);
         return ref;
     }
+    // ---- ArtifactObjectChecker (D0057) --------------------------------------
+    /**
+     * Existence probe used by the cache-hit restore prune (D0057). Unlike the
+     * write methods (P5 non-blocking — swallow errors, return null), this
+     * resolves `false` ONLY for a definitively-absent object and **throws** on
+     * any other failure (auth / network / quota) so the caller can fail open
+     * and keep the ref rather than dropping a real artifact on a transient
+     * blip. `file.exists()` rejects only on real errors; a missing object
+     * resolves `[false]`.
+     */
+    async objectExists(path) {
+        const storage = this.getClient();
+        const [exists] = await storage
+            .bucket(this.options.bucket)
+            .file(path)
+            .exists();
+        return exists;
+    }
     // ---- Deprecated legacy surface (W0052) ----------------------------------
     /** @deprecated Use `emit()` instead. Routes through the same GCS I/O. */
     async writeBulk(type, runId, data) {

package/dist/commands/publish.js CHANGED Viewed

@@ -27,6 +27,7 @@ import { addOutputDirOption } from "./shared/options.js";
 import { getCallerCwd, resolveOutputDir } from "./shared/resolve-output-dir.js";
 import { buildProvenance, } from "../pipeline/provenance.js";
 import { generateReportTitle } from "../pipeline/report-title.js";
+import { stampReportValidity } from "../pipeline/report-validity.js";
 import { buildSlimReportSummary } from "../_vendor/ailf-core/index.js";
 import { generateReportId, } from "../report-store.js";
 import { withRetry } from "../sinks/retry.js";
@@ -214,8 +215,14 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
     // -----------------------------------------------------------------------
     // 5. Write to Sanity (system of record)
     // -----------------------------------------------------------------------
+    // Stamp the data-health validity axis + normalize classification (D0059)
+    // — the same server-computed forward guarantee the pipeline write path
+    // applies, so reports published via this command carry validity too.
+    const stampedReport = stampReportValidity(report, now);
     console.log("  Writing to Sanity Content Lake...");
-    const sanityResult = store ? await store.write(report) : null;
+    const sanityResult = store
+        ? await store.write(stampedReport)
+        : null;
     if (sanityResult) {
         console.log(`  ✅ Report written: ${sanityResult}`);
     }
@@ -237,7 +244,7 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
         console.log();
         console.log(`  Delivering to ${sinks.length} sink(s)...`);
         const settled = await Promise.allSettled(sinks.map(async (sink) => {
-            const result = await withRetry(() => sink.publish(report));
+            const result = await withRetry(() => sink.publish(stampedReport));
             return { name: sink.name, result };
         }));
         for (const outcome of settled) {

package/dist/orchestration/steps/publish-report-step.js CHANGED Viewed

@@ -16,6 +16,7 @@ import { assoc, buildSlimReportSummary, } from "../../_vendor/ailf-core/index.js
 import { checkScoreSummaryValid } from "../../pipeline/checks.js";
 import { buildProvenance, } from "../../pipeline/provenance.js";
 import { generateReportTitle } from "../../pipeline/report-title.js";
+import { stampReportValidity } from "../../pipeline/report-validity.js";
 import { generateReportId } from "../../report-store.js";
 import { withRetry } from "../../sinks/retry.js";
 export class PublishReportStep {
@@ -145,21 +146,28 @@ export class PublishReportStep {
                 testResults: slimSummary.testResults.map(slimTestResult),
             };
         }
+        // Stamp the data-health `validity` axis (D0059) and normalize
+        // `provenance.classification` on the report now that it is fully assembled
+        // (degradation + slim summary settled). The verdict is server-computed
+        // from the report's own data — never the caller envelope (D0037) — and
+        // assessed at the report's completion time. From here on, the stamped
+        // report is what reaches the snapshot artifact, the store, and the sinks.
+        const stampedReport = stampReportValidity(report, now);
         // Share reportId with downstream steps (CallbackStep + orchestrator job update)
         state.reportId = reportId;
         // W0050 — migrated from ctx.collector.capture to the unified writer.
         // reportSnapshot: full Report JSON for replay (run-scoped, bulk).
-        await ctx.artifactWriter.emit("reportSnapshot", assoc(ctx), report);
+        await ctx.artifactWriter.emit("reportSnapshot", assoc(ctx), stampedReport);
         // autoComparison: delta vs baseline (run-scoped, bulk, optional).
         if (comparison) {
             await ctx.artifactWriter.emit("autoComparison", assoc(ctx), comparison);
         }
         // Write to store (system of record — best-effort, P5)
         const sanityResult = ctx.reportStore
-            ? await ctx.reportStore.write(report)
+            ? await ctx.reportStore.write(stampedReport)
             : null;
         // Run sinks (fire-and-forget, P6)
-        const publishResult = await runSinks(report, ctx);
+        const publishResult = await runSinks(stampedReport, ctx);
         // sinkResults: per-sink outcome (run-scoped, per-entry keyed by sink name).
         for (const r of publishResult.sinkResults) {
             await ctx.artifactWriter.emit("sinkResults", assoc(ctx, { name: r.name }), {

package/dist/orchestration/steps/run-eval-step.js CHANGED Viewed

@@ -11,9 +11,11 @@ import { emitPerEntryEvalResults } from "../../pipeline/emit-eval-results.js";
 import { emitSymbolPreflight } from "../../pipeline/preflight/emit-symbol-preflight.js";
 import { loadPackageSurface } from "../../pipeline/preflight/load-package-surface.js";
 import { AccumulatingArtifactWriter } from "../../artifact-capture/accumulating-artifact-writer.js";
+import { FanoutArtifactWriter } from "../../artifact-capture/fanout-artifact-writer.js";
+import { InstrumentedArtifactWriter } from "../../artifact-capture/instrumented-artifact-writer.js";
 import { getStepInputPaths } from "../../pipeline/cache.js";
 import { buildCacheContext } from "../cache-context.js";
-import { remapToCacheHitRefs } from "../../pipeline/cache-hit-restore.js";
+import { pruneToResolvableRefs, remapToCacheHitRefs, } from "../../pipeline/cache-hit-restore.js";
 import { checkCanonicalContextsExist, checkGeneratedConfigsExist, checkResultsExist, } from "../../pipeline/checks.js";
 import { computeEvalFingerprint } from "../../pipeline/eval-fingerprint.js";
 import { loadGraderModel } from "../../pipeline/grader-api.js";
@@ -147,11 +149,29 @@ export class RunEvalStep {
                     remoteCacheResult.sourceRunId &&
                     ctx.artifactWriter instanceof AccumulatingArtifactWriter) {
                     const restored = remapToCacheHitRefs(remoteCacheResult.artifactManifest, { sourceRunId: remoteCacheResult.sourceRunId });
-                    ctx.artifactWriter.injectAccumulated(restored);
-                    const count = Object.keys(restored).length;
+                    // W0350 / D0057 — a degraded source run can advertise per-entry
+                    // artifacts (e.g. rawResults) whose objects were never written under
+                    // its prefix. Drop those over-claims here, at the restore boundary,
+                    // so the new run's manifest advertises only artifacts that resolve —
+                    // rather than pushing per-object HEAD checks onto the read side's hot
+                    // signing path (AC 3). When no object checker is reachable in the
+                    // writer chain (local-only / NoOp / gateway backends), skip the prune
+                    // and restore verbatim, preserving prior behavior.
+                    const checker = findObjectChecker(ctx.artifactWriter);
+                    const { manifest: resolvable, droppedEntries, droppedRefs, } = checker
+                        ? await pruneToResolvableRefs(restored, checker)
+                        : { manifest: restored, droppedEntries: 0, droppedRefs: 0 };
+                    ctx.artifactWriter.injectAccumulated(resolvable);
+                    const count = Object.keys(resolvable).length;
                     if (count > 0) {
                         console.log(`  ↪ Restored ${count} artifact ref${count === 1 ? "" : "s"} from run ${remoteCacheResult.sourceRunId}`);
                     }
+                    if (droppedEntries > 0 || droppedRefs > 0) {
+                        const refsNote = droppedRefs > 0
+                            ? ` and ${droppedRefs} ref${droppedRefs === 1 ? "" : "s"}`
+                            : "";
+                        console.log(`  ⚠️  Dropped ${droppedEntries} unresolvable artifact entr${droppedEntries === 1 ? "y" : "ies"}${refsNote} over-claimed by cache parent ${remoteCacheResult.sourceRunId}`);
+                    }
                 }
                 return {
                     durationMs: Date.now() - start,
@@ -275,6 +295,39 @@ export class RunEvalStep {
     }
 }
 // ---------------------------------------------------------------------------
+// Object-checker discovery (D0057 / W0350)
+// ---------------------------------------------------------------------------
+const FIND_CHECKER_MAX_STEPS = 16;
+/**
+ * Walk a writer's decorator/fanout chain to feature-detect an
+ * `ArtifactObjectChecker`. The composition root wraps the backend in
+ * `AccumulatingArtifactWriter` (and optionally `InstrumentedArtifactWriter`)
+ * and layers GCS over local via `FanoutArtifactWriter`. Only
+ * `GcsArtifactWriter` implements `objectExists`, so on local-only / NoOp /
+ * gateway chains this returns null and the cache-hit restore skips pruning.
+ * `MAX_STEPS` is a cycle guard against a future decorator self-reference.
+ */
+function findObjectChecker(writer) {
+    const stack = [writer];
+    for (let steps = 0; stack.length > 0 && steps < FIND_CHECKER_MAX_STEPS; steps++) {
+        const cursor = stack.pop();
+        if (!cursor)
+            continue;
+        if (hasObjectExists(cursor))
+            return cursor;
+        if (cursor instanceof AccumulatingArtifactWriter)
+            stack.push(cursor.inner);
+        else if (cursor instanceof InstrumentedArtifactWriter)
+            stack.push(cursor.inner);
+        else if (cursor instanceof FanoutArtifactWriter)
+            stack.push(...cursor.delegates);
+    }
+    return null;
+}
+function hasObjectExists(w) {
+    return (typeof w.objectExists === "function");
+}
+// ---------------------------------------------------------------------------
 // Remote cache helpers
 // ---------------------------------------------------------------------------
 async function checkRemoteCache(fingerprint, reportStore, rootDir) {

package/dist/pipeline/cache-hit-restore.d.ts CHANGED Viewed

@@ -8,7 +8,7 @@
  * @see docs/decisions/D0040-artifact-ref-source-run-id.md
  * @see docs/design-docs/cache-hit-artifact-restoration.md
  */
-import { type ArtifactManifest, type RunId } from "../_vendor/ailf-core/index.d.ts";
+import { type ArtifactManifest, type ArtifactObjectChecker, type RunId } from "../_vendor/ailf-core/index.d.ts";
 /**
  * Copy an artifact manifest verbatim and stamp `sourceRunId` on every ref
  * that doesn't already carry one.
@@ -47,3 +47,39 @@ import { type ArtifactManifest, type RunId } from "../_vendor/ailf-core/index.d.
 export declare function remapToCacheHitRefs(source: ArtifactManifest, opts: {
     sourceRunId: RunId;
 }): ArtifactManifest;
+/** Outcome of `pruneToResolvableRefs`. */
+export interface PruneResult {
+    /** Manifest with over-claimed entries/refs removed. */
+    readonly manifest: ArtifactManifest;
+    /** Per-entry entries dropped because their object did not exist. */
+    readonly droppedEntries: number;
+    /** Refs dropped entirely (bulk missing, or per-entry left with no entries). */
+    readonly droppedRefs: number;
+}
+/**
+ * Drop artifact refs (and per-entry entries) a cached report over-claimed —
+ * entries whose backing object was never written under the source run's
+ * storage prefix (D0040 / D0057, W0350).
+ *
+ * A degraded source run can publish a manifest that lists `rawResults`
+ * entries with no GCS object behind them; `remapToCacheHitRefs` copies those
+ * phantom entries forward into the new run's manifest, and the read side then
+ * signs URLs that 404 ("the specified key does not exist"). Pruning here, at
+ * the cache-hit restore boundary, removes the over-claim at the source so the
+ * written manifest's `entryCount` / `entries[]` reflect only artifacts that
+ * actually resolve — instead of pushing per-object HEAD checks onto the hot
+ * signing path (W0350 AC 3).
+ *
+ * Resolution mirrors the gateway: a per-entry object lives at
+ * `descriptor.objectPath(sourceRunId, entry.key)`, where `sourceRunId` is the
+ * runId encoded in `ref.path` (preferred — structurally tied to where bytes
+ * physically live) falling back to `ref.sourceRunId` (the lineage hint).
+ *
+ * **Fail open.** `checker.objectExists` throws when existence can't be
+ * determined (auth / network / quota). A throw KEEPS the ref/entry — we never
+ * drop a real artifact on a transient blip; the read side already tolerates a
+ * rare residual 404 (W0349). Only a definitive `false` drops an entry.
+ *
+ * Pure w.r.t. its inputs: returns a fresh manifest, never mutates `source`.
+ */
+export declare function pruneToResolvableRefs(source: ArtifactManifest, checker: ArtifactObjectChecker): Promise<PruneResult>;

package/dist/pipeline/cache-hit-restore.js CHANGED Viewed

@@ -8,7 +8,7 @@
  * @see docs/decisions/D0040-artifact-ref-source-run-id.md
  * @see docs/design-docs/cache-hit-artifact-restoration.md
  */
-import { ARTIFACT_REGISTRY, } from "../_vendor/ailf-core/index.js";
+import { ARTIFACT_REGISTRY, runId as parseRunId, } from "../_vendor/ailf-core/index.js";
 /**
  * Copy an artifact manifest verbatim and stamp `sourceRunId` on every ref
  * that doesn't already carry one.
@@ -60,3 +60,110 @@ export function remapToCacheHitRefs(source, opts) {
     }
     return out;
 }
+/**
+ * Drop artifact refs (and per-entry entries) a cached report over-claimed —
+ * entries whose backing object was never written under the source run's
+ * storage prefix (D0040 / D0057, W0350).
+ *
+ * A degraded source run can publish a manifest that lists `rawResults`
+ * entries with no GCS object behind them; `remapToCacheHitRefs` copies those
+ * phantom entries forward into the new run's manifest, and the read side then
+ * signs URLs that 404 ("the specified key does not exist"). Pruning here, at
+ * the cache-hit restore boundary, removes the over-claim at the source so the
+ * written manifest's `entryCount` / `entries[]` reflect only artifacts that
+ * actually resolve — instead of pushing per-object HEAD checks onto the hot
+ * signing path (W0350 AC 3).
+ *
+ * Resolution mirrors the gateway: a per-entry object lives at
+ * `descriptor.objectPath(sourceRunId, entry.key)`, where `sourceRunId` is the
+ * runId encoded in `ref.path` (preferred — structurally tied to where bytes
+ * physically live) falling back to `ref.sourceRunId` (the lineage hint).
+ *
+ * **Fail open.** `checker.objectExists` throws when existence can't be
+ * determined (auth / network / quota). A throw KEEPS the ref/entry — we never
+ * drop a real artifact on a transient blip; the read side already tolerates a
+ * rare residual 404 (W0349). Only a definitive `false` drops an entry.
+ *
+ * Pure w.r.t. its inputs: returns a fresh manifest, never mutates `source`.
+ */
+export async function pruneToResolvableRefs(source, checker) {
+    const out = {};
+    let droppedEntries = 0;
+    let droppedRefs = 0;
+    for (const [type, ref] of Object.entries(source)) {
+        if (!ref)
+            continue;
+        const artifactType = type;
+        const descriptor = ARTIFACT_REGISTRY[artifactType];
+        // Bulk: a single object at ref.path.
+        if (ref.layout === "bulk") {
+            if (await existsOrKeep(checker, ref.path))
+                out[artifactType] = ref;
+            else
+                droppedRefs++;
+            continue;
+        }
+        // Per-entry: each entry is its own object under the source run prefix.
+        const sourceRunId = resolveSourceRunId(ref);
+        const entries = ref.entries ?? [];
+        if (!descriptor || sourceRunId === undefined || entries.length === 0) {
+            // Can't resolve per-entry object paths — fail open, keep verbatim.
+            out[artifactType] = ref;
+            continue;
+        }
+        const keptFlags = await Promise.all(entries.map(async (entry) => {
+            let objectPath;
+            try {
+                objectPath = descriptor.objectPath(sourceRunId, entry.key);
+            }
+            catch {
+                return true; // malformed key — fail open rather than drop
+            }
+            return existsOrKeep(checker, objectPath);
+        }));
+        const kept = entries.filter((_, i) => keptFlags[i]);
+        if (kept.length === entries.length) {
+            out[artifactType] = ref;
+            continue;
+        }
+        droppedEntries += entries.length - kept.length;
+        if (kept.length === 0) {
+            droppedRefs++; // nothing resolvable — drop the whole over-claimed ref
+            continue;
+        }
+        out[artifactType] = {
+            ...ref,
+            entries: kept,
+            entryCount: kept.length,
+            bytes: kept.reduce((sum, e) => sum + (e.bytes ?? 0), 0),
+        };
+    }
+    return { manifest: out, droppedEntries, droppedRefs };
+}
+/**
+ * Resolve where a ref's bytes physically live. Prefers the runId encoded in
+ * `ref.path` (validated through the canonical parser so a malformed manifest
+ * path can't propagate into a synthesized object name) over the
+ * `ref.sourceRunId` lineage hint — matching the gateway's resolution order.
+ */
+function resolveSourceRunId(ref) {
+    const fromPath = /^runs\/([^/]+)/.exec(ref.path)?.[1];
+    if (fromPath) {
+        const parsed = parseRunId(fromPath);
+        if (parsed.ok)
+            return parsed.value;
+    }
+    return ref.sourceRunId;
+}
+/**
+ * `checker.objectExists` wrapper that returns `true` (keep) on a thrown,
+ * indeterminate result — only a definitive `false` drops the artifact.
+ */
+async function existsOrKeep(checker, path) {
+    try {
+        return await checker.objectExists(path);
+    }
+    catch {
+        return true;
+    }
+}

package/dist/pipeline/report-validity.d.ts ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * stampReportValidity — apply the report-trustworthiness detector at write time.
+ *
+ * The eval write path's forward guarantee (D0059): every newly written report
+ * carries a top-level `validity` data-health stamp so the trustworthiness gap
+ * cannot recur on new reports. Lives in `pipeline/` (not the orchestration
+ * step) so both report-write paths — `PublishReportStep` and the standalone
+ * `publish` command — import it without a command→orchestration-step coupling.
+ *
+ * @see docs/decisions/D0059-report-validity-axis-and-trustworthiness-gate.md
+ * @see docs/design-docs/report-trustworthiness-model.md
+ */
+import { type Report } from "../_vendor/ailf-core/index.d.ts";
+/**
+ * Stamp the data-health `validity` axis (D0059) onto a report and normalize
+ * its `provenance.classification` to the canonical spelling.
+ *
+ * Runs the pure detector (`assessReportValidity`) over the assembled report.
+ * `Report` structurally satisfies the detector's `ReportValidityInput`
+ * (`provenance` extends `RunContext`; `summary` is a `ReportSummary`), so no
+ * adapter is needed. The verdict is **server-computed from the report's own
+ * data** (D0037): `assessedAt` is injected by the caller (the report's
+ * completion time) and nothing is read from the caller envelope.
+ *
+ * `classification` is patched only when the detector returns one — it returns
+ * `undefined` when the existing value is already canonical and no Tier-1 rule
+ * fired, so the patch is idempotent and never clobbers a correct (or
+ * human-corrected) value. Tier-2 review flags are not persisted here; the
+ * one-shot backfill consumes them. Returns a new report; the input is not
+ * mutated.
+ */
+export declare function stampReportValidity(report: Report, assessedAt: string): Report;

package/dist/pipeline/report-validity.js ADDED Viewed

@@ -0,0 +1,43 @@
+/**
+ * stampReportValidity — apply the report-trustworthiness detector at write time.
+ *
+ * The eval write path's forward guarantee (D0059): every newly written report
+ * carries a top-level `validity` data-health stamp so the trustworthiness gap
+ * cannot recur on new reports. Lives in `pipeline/` (not the orchestration
+ * step) so both report-write paths — `PublishReportStep` and the standalone
+ * `publish` command — import it without a command→orchestration-step coupling.
+ *
+ * @see docs/decisions/D0059-report-validity-axis-and-trustworthiness-gate.md
+ * @see docs/design-docs/report-trustworthiness-model.md
+ */
+import { assessReportValidity } from "../_vendor/ailf-core/index.js";
+/**
+ * Stamp the data-health `validity` axis (D0059) onto a report and normalize
+ * its `provenance.classification` to the canonical spelling.
+ *
+ * Runs the pure detector (`assessReportValidity`) over the assembled report.
+ * `Report` structurally satisfies the detector's `ReportValidityInput`
+ * (`provenance` extends `RunContext`; `summary` is a `ReportSummary`), so no
+ * adapter is needed. The verdict is **server-computed from the report's own
+ * data** (D0037): `assessedAt` is injected by the caller (the report's
+ * completion time) and nothing is read from the caller envelope.
+ *
+ * `classification` is patched only when the detector returns one — it returns
+ * `undefined` when the existing value is already canonical and no Tier-1 rule
+ * fired, so the patch is idempotent and never clobbers a correct (or
+ * human-corrected) value. Tier-2 review flags are not persisted here; the
+ * one-shot backfill consumes them. Returns a new report; the input is not
+ * mutated.
+ */
+export function stampReportValidity(report, assessedAt) {
+    const { classification, validity } = assessReportValidity(report, {
+        assessedAt,
+    });
+    return {
+        ...report,
+        provenance: classification
+            ? { ...report.provenance, classification }
+            : report.provenance,
+        validity,
+    };
+}

package/dist/report-store.d.ts CHANGED Viewed

@@ -225,6 +225,7 @@ export interface SanityReportDoc {
     };
     tag: null | string;
     title: null | string;
+    validity?: Report["validity"];
 }
 export declare function toSanityReportDoc(report: Report): SanityReportDoc;
 /**

package/dist/report-store.js CHANGED Viewed

@@ -491,6 +491,7 @@ export function toSanityReportDoc(report) {
         },
         tag: report.tag ?? null,
         title: report.title ?? null,
+        ...(report.validity ? { validity: report.validity } : {}),
     };
 }
 /**
@@ -534,6 +535,7 @@ export function toReport(doc) {
         summary: doc.summary,
         tag: doc.tag,
         title: doc.title,
+        validity: doc.validity,
     };
 }
 /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "7.2.2",
+  "version": "7.3.0",
   "private": false,
   "publishConfig": {
     "access": "public"