@sanity/ailf 7.2.2 → 7.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/config/airbyte/ai_literacy_framework.connector.yaml +38 -0
  2. package/config/bigquery/README.md +39 -7
  3. package/config/bigquery/views/reports.sql +6 -0
  4. package/dist/_vendor/ailf-core/ports/artifact-writer.d.ts +22 -0
  5. package/dist/_vendor/ailf-core/ports/index.d.ts +1 -1
  6. package/dist/_vendor/ailf-core/schemas/report.d.ts +30 -0
  7. package/dist/_vendor/ailf-core/schemas/report.js +21 -2
  8. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +14 -0
  9. package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
  10. package/dist/_vendor/ailf-core/services/index.js +4 -0
  11. package/dist/_vendor/ailf-core/services/report-validity-detector.d.ts +116 -0
  12. package/dist/_vendor/ailf-core/services/report-validity-detector.js +128 -0
  13. package/dist/_vendor/ailf-core/types/index.d.ts +19 -0
  14. package/dist/_vendor/ailf-core/types/index.js +1 -0
  15. package/dist/_vendor/ailf-core/types/report-validity.d.ts +60 -0
  16. package/dist/_vendor/ailf-core/types/report-validity.js +42 -0
  17. package/dist/_vendor/ailf-shared/generated/help-content.js +4 -3
  18. package/dist/_vendor/ailf-shared/glossary.d.ts +32 -0
  19. package/dist/_vendor/ailf-shared/glossary.js +35 -0
  20. package/dist/_vendor/ailf-shared/index.d.ts +2 -1
  21. package/dist/_vendor/ailf-shared/index.js +2 -1
  22. package/dist/_vendor/ailf-shared/run-classification.d.ts +53 -0
  23. package/dist/_vendor/ailf-shared/run-classification.js +111 -0
  24. package/dist/_vendor/ailf-shared/trustworthiness.d.ts +97 -0
  25. package/dist/_vendor/ailf-shared/trustworthiness.js +86 -0
  26. package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
  27. package/dist/artifact-capture/fanout-artifact-writer.d.ts +8 -0
  28. package/dist/artifact-capture/fanout-artifact-writer.js +10 -0
  29. package/dist/artifact-capture/gcs-artifact-writer.d.ts +12 -2
  30. package/dist/artifact-capture/gcs-artifact-writer.js +18 -0
  31. package/dist/commands/publish.js +9 -2
  32. package/dist/orchestration/steps/publish-report-step.js +11 -3
  33. package/dist/orchestration/steps/run-eval-step.js +56 -3
  34. package/dist/pipeline/cache-hit-restore.d.ts +37 -1
  35. package/dist/pipeline/cache-hit-restore.js +108 -1
  36. package/dist/pipeline/report-validity.d.ts +32 -0
  37. package/dist/pipeline/report-validity.js +43 -0
  38. package/dist/report-store.d.ts +1 -0
  39. package/dist/report-store.js +2 -0
  40. package/package.json +1 -1
@@ -0,0 +1,86 @@
1
+ /**
2
+ * trustworthiness.ts — The single trust gate for reports (D0059).
3
+ *
4
+ * `includeInDefaultTrends` is the one definition of "show this report by
5
+ * default." Every surface (dashboard analytics, Studio presets, the BigQuery
6
+ * `reports.sql` view) references this predicate so the gate cannot drift
7
+ * between consumers.
8
+ *
9
+ * Two orthogonal axes decide inclusion:
10
+ *
11
+ * - **Validity (data health, D0059)** — the *primary* gate. A report is
12
+ * included only when its `validity.status` is `ok` OR validity is absent
13
+ * (pre-stamp reads are trusted until backfilled — the rollout is additive
14
+ * and nullable). Any non-`ok` status (`degraded` / `incomplete` /
15
+ * `suspect`) excludes the report regardless of intent.
16
+ * - **Intent (run classification, D0037)** — a *secondary* exclusion. The
17
+ * explicit `test` and `experimental` classifications are dropped;
18
+ * `adhoc` / `official` / `external` (and a missing classification) are kept.
19
+ * `adhoc` is intentionally included — it holds real production one-offs;
20
+ * the validity gate, not the intent gate, removes the bad ones inside it.
21
+ *
22
+ * We model a slim subset of the core `Report` shape (the two read axes) rather
23
+ * than importing `Report` / `ReportValidity` from `@sanity/ailf-core`: this
24
+ * package is the dependency-graph leaf and imports nothing from core. A full
25
+ * core `Report` is structurally assignable to {@link TrustGateReport}.
26
+ *
27
+ * The predicate is total — it never throws — and is kept trivially
28
+ * translatable to the two query-language forms it is materialized as on the
29
+ * other surfaces (`W-studio-bigquery-validity`): the GROQ filter behind the
30
+ * Studio "Trustworthy" preset ({@link INCLUDE_IN_DEFAULT_TRENDS_GROQ}) and the
31
+ * SQL boolean in the BigQuery `reports.sql` view
32
+ * ({@link INCLUDE_IN_DEFAULT_TRENDS_SQL}). Those constants live here, beside the
33
+ * function, so the one gate cannot drift between consumers; a cross-check test
34
+ * asserts all three forms agree across the full truth table.
35
+ *
36
+ * Note the SQL form is NULL-safe on *both* axes: a bare
37
+ * `classification NOT IN ('test','experimental')` would evaluate to `NULL`
38
+ * (not `TRUE`) for an unclassified row under SQL three-valued logic, silently
39
+ * excluding pre-taxonomy reports the TS predicate keeps — hence the explicit
40
+ * `classification IS NULL OR …`.
41
+ *
42
+ * @see docs/decisions/D0059-report-validity-axis-and-trustworthiness-gate.md
43
+ * @see docs/design-docs/report-trustworthiness-model.md — §Decision/3
44
+ */
45
+ /**
46
+ * Whether a report should appear in default trend views.
47
+ *
48
+ * Validity is the primary gate; intent is a secondary exclusion. See the
49
+ * module header for the full rationale and the equivalent SQL.
50
+ *
51
+ * @returns `true` when the report is trustworthy enough to show by default.
52
+ */
53
+ export function includeInDefaultTrends(report) {
54
+ const status = report.validity?.status;
55
+ // Primary gate: trustworthy when explicitly `ok` or not yet assessed.
56
+ const validityOk = status == null || status === "ok";
57
+ const classification = report.provenance?.classification;
58
+ // Secondary exclusion: drop explicit test/experimental intent only.
59
+ const intentIncluded = classification !== "test" && classification !== "experimental";
60
+ return validityOk && intentIncluded;
61
+ }
62
+ /**
63
+ * GROQ form of {@link includeInDefaultTrends}, as a boolean expression over an
64
+ * `ailf.report` document. Drop it into a Studio structure filter with the
65
+ * document-type guard, e.g.
66
+ * `` `_type == "ailf.report" && ${INCLUDE_IN_DEFAULT_TRENDS_GROQ}` ``.
67
+ *
68
+ * GROQ's `in` returns `false` (not `null`) for an absent left operand, so an
69
+ * unclassified report passes the intent clause without an explicit
70
+ * `defined(...)` guard — matching the TS predicate's "missing ⇒ kept" rule.
71
+ * `defined(validity.status)` makes the absent-validity case trusted.
72
+ */
73
+ export const INCLUDE_IN_DEFAULT_TRENDS_GROQ = '(!defined(validity.status) || validity.status == "ok") && !(provenance.classification in ["test", "experimental"])';
74
+ /**
75
+ * SQL form of {@link includeInDefaultTrends}, as a boolean expression over the
76
+ * flattened `ailf.reports` BigQuery row (columns `validity_status`,
77
+ * `classification`). Materialized verbatim as the `include_in_default_trends`
78
+ * column in `packages/eval/config/bigquery/views/reports.sql`; an eval test
79
+ * asserts the view embeds this exact string.
80
+ *
81
+ * Both axes are NULL-safe so the column matches the TS predicate row-for-row:
82
+ * `classification NOT IN (...)` alone is `NULL` for an unclassified row under
83
+ * SQL three-valued logic, which a `WHERE`/boolean context treats as `FALSE` —
84
+ * silently dropping pre-taxonomy reports the TS predicate keeps.
85
+ */
86
+ export const INCLUDE_IN_DEFAULT_TRENDS_SQL = "(validity_status IS NULL OR validity_status = 'ok') AND (classification IS NULL OR classification NOT IN ('test', 'experimental'))";
@@ -1564,8 +1564,8 @@ export declare const RepoConfigSchema: z.ZodObject<{
1564
1564
  summary: z.ZodOptional<z.ZodObject<{
1565
1565
  onRun: z.ZodOptional<z.ZodEnum<{
1566
1566
  never: "never";
1567
- always: "always";
1568
1567
  auto: "auto";
1568
+ always: "always";
1569
1569
  }>>;
1570
1570
  }, z.core.$strip>>;
1571
1571
  taskSource: z.ZodOptional<z.ZodObject<{
@@ -38,6 +38,14 @@ export declare class FanoutArtifactWriter implements ArtifactWriter {
38
38
  private readonly writers;
39
39
  private readonly progress?;
40
40
  constructor(writers: readonly ArtifactWriter[], options?: FanoutArtifactWriterOptions);
41
+ /**
42
+ * The delegate writers in declaration order. Exposed read-only so callers
43
+ * walking the writer chain (e.g. to feature-detect an
44
+ * `ArtifactObjectChecker`) can descend into the fanout without it having to
45
+ * re-implement every optional capability. Mirrors the decorators' readonly
46
+ * `inner` accessor.
47
+ */
48
+ get delegates(): readonly ArtifactWriter[];
41
49
  private reportProgress;
42
50
  emit<T extends ArtifactType>(type: T, association: AssociationValues, payload: unknown): Promise<ArtifactRef | null>;
43
51
  appendNdjson<T extends ArtifactType>(type: T, association: AssociationValues, rows: readonly unknown[]): Promise<ArtifactRef | null>;
@@ -33,6 +33,16 @@ export class FanoutArtifactWriter {
33
33
  this.writers = writers;
34
34
  this.progress = options.progress;
35
35
  }
36
+ /**
37
+ * The delegate writers in declaration order. Exposed read-only so callers
38
+ * walking the writer chain (e.g. to feature-detect an
39
+ * `ArtifactObjectChecker`) can descend into the fanout without it having to
40
+ * re-implement every optional capability. Mirrors the decorators' readonly
41
+ * `inner` accessor.
42
+ */
43
+ get delegates() {
44
+ return this.writers;
45
+ }
36
46
  reportProgress(ref) {
37
47
  if (!this.progress)
38
48
  return;
@@ -28,7 +28,7 @@
28
28
  * @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
29
29
  */
30
30
  import { Storage } from "@google-cloud/storage";
31
- import { type ArtifactEntry, type ArtifactRef, type ArtifactType, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssociationValues, type RunId, type RunManifest, type WriteSource } from "../_vendor/ailf-core/index.d.ts";
31
+ import { type ArtifactEntry, type ArtifactObjectChecker, type ArtifactRef, type ArtifactType, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssociationValues, type RunId, type RunManifest, type WriteSource } from "../_vendor/ailf-core/index.d.ts";
32
32
  import { type UploadMetricsSink } from "./upload-metrics.js";
33
33
  export interface GcsArtifactWriterOptions {
34
34
  /** GCS bucket name (e.g., "ailf-artifacts") */
@@ -61,7 +61,7 @@ export interface GcsArtifactWriterOptions {
61
61
  */
62
62
  writerSource?: WriteSource;
63
63
  }
64
- export declare class GcsArtifactWriter implements ArtifactWriter {
64
+ export declare class GcsArtifactWriter implements ArtifactWriter, ArtifactObjectChecker {
65
65
  private client;
66
66
  private readonly options;
67
67
  private readonly ndjsonStreams;
@@ -83,6 +83,16 @@ export declare class GcsArtifactWriter implements ArtifactWriter {
83
83
  emit<T extends ArtifactType>(type: T, association: AssociationValues, payload: unknown): Promise<ArtifactRef | null>;
84
84
  appendNdjson<T extends ArtifactType>(type: T, association: AssociationValues, rows: readonly unknown[]): Promise<ArtifactRef | null>;
85
85
  writeManifest(runId: RunId, manifest: RunManifest): Promise<ArtifactRef | null>;
86
+ /**
87
+ * Existence probe used by the cache-hit restore prune (D0057). Unlike the
88
+ * write methods (P5 non-blocking — swallow errors, return null), this
89
+ * resolves `false` ONLY for a definitively-absent object and **throws** on
90
+ * any other failure (auth / network / quota) so the caller can fail open
91
+ * and keep the ref rather than dropping a real artifact on a transient
92
+ * blip. `file.exists()` rejects only on real errors; a missing object
93
+ * resolves `[false]`.
94
+ */
95
+ objectExists(path: string): Promise<boolean>;
86
96
  /** @deprecated Use `emit()` instead. Routes through the same GCS I/O. */
87
97
  writeBulk(type: ArtifactType, runId: RunId, data: unknown): Promise<ArtifactRef | null>;
88
98
  /** @deprecated Use `emit()` per entry instead. */
@@ -223,6 +223,24 @@ export class GcsArtifactWriter {
223
223
  this.reportProgress(ref);
224
224
  return ref;
225
225
  }
226
+ // ---- ArtifactObjectChecker (D0057) --------------------------------------
227
+ /**
228
+ * Existence probe used by the cache-hit restore prune (D0057). Unlike the
229
+ * write methods (P5 non-blocking — swallow errors, return null), this
230
+ * resolves `false` ONLY for a definitively-absent object and **throws** on
231
+ * any other failure (auth / network / quota) so the caller can fail open
232
+ * and keep the ref rather than dropping a real artifact on a transient
233
+ * blip. `file.exists()` rejects only on real errors; a missing object
234
+ * resolves `[false]`.
235
+ */
236
+ async objectExists(path) {
237
+ const storage = this.getClient();
238
+ const [exists] = await storage
239
+ .bucket(this.options.bucket)
240
+ .file(path)
241
+ .exists();
242
+ return exists;
243
+ }
226
244
  // ---- Deprecated legacy surface (W0052) ----------------------------------
227
245
  /** @deprecated Use `emit()` instead. Routes through the same GCS I/O. */
228
246
  async writeBulk(type, runId, data) {
@@ -27,6 +27,7 @@ import { addOutputDirOption } from "./shared/options.js";
27
27
  import { getCallerCwd, resolveOutputDir } from "./shared/resolve-output-dir.js";
28
28
  import { buildProvenance, } from "../pipeline/provenance.js";
29
29
  import { generateReportTitle } from "../pipeline/report-title.js";
30
+ import { stampReportValidity } from "../pipeline/report-validity.js";
30
31
  import { buildSlimReportSummary } from "../_vendor/ailf-core/index.js";
31
32
  import { generateReportId, } from "../report-store.js";
32
33
  import { withRetry } from "../sinks/retry.js";
@@ -214,8 +215,14 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
214
215
  // -----------------------------------------------------------------------
215
216
  // 5. Write to Sanity (system of record)
216
217
  // -----------------------------------------------------------------------
218
+ // Stamp the data-health validity axis + normalize classification (D0059)
219
+ // — the same server-computed forward guarantee the pipeline write path
220
+ // applies, so reports published via this command carry validity too.
221
+ const stampedReport = stampReportValidity(report, now);
217
222
  console.log(" Writing to Sanity Content Lake...");
218
- const sanityResult = store ? await store.write(report) : null;
223
+ const sanityResult = store
224
+ ? await store.write(stampedReport)
225
+ : null;
219
226
  if (sanityResult) {
220
227
  console.log(` ✅ Report written: ${sanityResult}`);
221
228
  }
@@ -237,7 +244,7 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
237
244
  console.log();
238
245
  console.log(` Delivering to ${sinks.length} sink(s)...`);
239
246
  const settled = await Promise.allSettled(sinks.map(async (sink) => {
240
- const result = await withRetry(() => sink.publish(report));
247
+ const result = await withRetry(() => sink.publish(stampedReport));
241
248
  return { name: sink.name, result };
242
249
  }));
243
250
  for (const outcome of settled) {
@@ -16,6 +16,7 @@ import { assoc, buildSlimReportSummary, } from "../../_vendor/ailf-core/index.js
16
16
  import { checkScoreSummaryValid } from "../../pipeline/checks.js";
17
17
  import { buildProvenance, } from "../../pipeline/provenance.js";
18
18
  import { generateReportTitle } from "../../pipeline/report-title.js";
19
+ import { stampReportValidity } from "../../pipeline/report-validity.js";
19
20
  import { generateReportId } from "../../report-store.js";
20
21
  import { withRetry } from "../../sinks/retry.js";
21
22
  export class PublishReportStep {
@@ -145,21 +146,28 @@ export class PublishReportStep {
145
146
  testResults: slimSummary.testResults.map(slimTestResult),
146
147
  };
147
148
  }
149
+ // Stamp the data-health `validity` axis (D0059) and normalize
150
+ // `provenance.classification` on the report now that it is fully assembled
151
+ // (degradation + slim summary settled). The verdict is server-computed
152
+ // from the report's own data — never the caller envelope (D0037) — and
153
+ // assessed at the report's completion time. From here on, the stamped
154
+ // report is what reaches the snapshot artifact, the store, and the sinks.
155
+ const stampedReport = stampReportValidity(report, now);
148
156
  // Share reportId with downstream steps (CallbackStep + orchestrator job update)
149
157
  state.reportId = reportId;
150
158
  // W0050 — migrated from ctx.collector.capture to the unified writer.
151
159
  // reportSnapshot: full Report JSON for replay (run-scoped, bulk).
152
- await ctx.artifactWriter.emit("reportSnapshot", assoc(ctx), report);
160
+ await ctx.artifactWriter.emit("reportSnapshot", assoc(ctx), stampedReport);
153
161
  // autoComparison: delta vs baseline (run-scoped, bulk, optional).
154
162
  if (comparison) {
155
163
  await ctx.artifactWriter.emit("autoComparison", assoc(ctx), comparison);
156
164
  }
157
165
  // Write to store (system of record — best-effort, P5)
158
166
  const sanityResult = ctx.reportStore
159
- ? await ctx.reportStore.write(report)
167
+ ? await ctx.reportStore.write(stampedReport)
160
168
  : null;
161
169
  // Run sinks (fire-and-forget, P6)
162
- const publishResult = await runSinks(report, ctx);
170
+ const publishResult = await runSinks(stampedReport, ctx);
163
171
  // sinkResults: per-sink outcome (run-scoped, per-entry keyed by sink name).
164
172
  for (const r of publishResult.sinkResults) {
165
173
  await ctx.artifactWriter.emit("sinkResults", assoc(ctx, { name: r.name }), {
@@ -11,9 +11,11 @@ import { emitPerEntryEvalResults } from "../../pipeline/emit-eval-results.js";
11
11
  import { emitSymbolPreflight } from "../../pipeline/preflight/emit-symbol-preflight.js";
12
12
  import { loadPackageSurface } from "../../pipeline/preflight/load-package-surface.js";
13
13
  import { AccumulatingArtifactWriter } from "../../artifact-capture/accumulating-artifact-writer.js";
14
+ import { FanoutArtifactWriter } from "../../artifact-capture/fanout-artifact-writer.js";
15
+ import { InstrumentedArtifactWriter } from "../../artifact-capture/instrumented-artifact-writer.js";
14
16
  import { getStepInputPaths } from "../../pipeline/cache.js";
15
17
  import { buildCacheContext } from "../cache-context.js";
16
- import { remapToCacheHitRefs } from "../../pipeline/cache-hit-restore.js";
18
+ import { pruneToResolvableRefs, remapToCacheHitRefs, } from "../../pipeline/cache-hit-restore.js";
17
19
  import { checkCanonicalContextsExist, checkGeneratedConfigsExist, checkResultsExist, } from "../../pipeline/checks.js";
18
20
  import { computeEvalFingerprint } from "../../pipeline/eval-fingerprint.js";
19
21
  import { loadGraderModel } from "../../pipeline/grader-api.js";
@@ -147,11 +149,29 @@ export class RunEvalStep {
147
149
  remoteCacheResult.sourceRunId &&
148
150
  ctx.artifactWriter instanceof AccumulatingArtifactWriter) {
149
151
  const restored = remapToCacheHitRefs(remoteCacheResult.artifactManifest, { sourceRunId: remoteCacheResult.sourceRunId });
150
- ctx.artifactWriter.injectAccumulated(restored);
151
- const count = Object.keys(restored).length;
152
+ // W0350 / D0057 — a degraded source run can advertise per-entry
153
+ // artifacts (e.g. rawResults) whose objects were never written under
154
+ // its prefix. Drop those over-claims here, at the restore boundary,
155
+ // so the new run's manifest advertises only artifacts that resolve —
156
+ // rather than pushing per-object HEAD checks onto the read side's hot
157
+ // signing path (AC 3). When no object checker is reachable in the
158
+ // writer chain (local-only / NoOp / gateway backends), skip the prune
159
+ // and restore verbatim, preserving prior behavior.
160
+ const checker = findObjectChecker(ctx.artifactWriter);
161
+ const { manifest: resolvable, droppedEntries, droppedRefs, } = checker
162
+ ? await pruneToResolvableRefs(restored, checker)
163
+ : { manifest: restored, droppedEntries: 0, droppedRefs: 0 };
164
+ ctx.artifactWriter.injectAccumulated(resolvable);
165
+ const count = Object.keys(resolvable).length;
152
166
  if (count > 0) {
153
167
  console.log(` ↪ Restored ${count} artifact ref${count === 1 ? "" : "s"} from run ${remoteCacheResult.sourceRunId}`);
154
168
  }
169
+ if (droppedEntries > 0 || droppedRefs > 0) {
170
+ const refsNote = droppedRefs > 0
171
+ ? ` and ${droppedRefs} ref${droppedRefs === 1 ? "" : "s"}`
172
+ : "";
173
+ console.log(` ⚠️ Dropped ${droppedEntries} unresolvable artifact entr${droppedEntries === 1 ? "y" : "ies"}${refsNote} over-claimed by cache parent ${remoteCacheResult.sourceRunId}`);
174
+ }
155
175
  }
156
176
  return {
157
177
  durationMs: Date.now() - start,
@@ -275,6 +295,39 @@ export class RunEvalStep {
275
295
  }
276
296
  }
277
297
  // ---------------------------------------------------------------------------
298
+ // Object-checker discovery (D0057 / W0350)
299
+ // ---------------------------------------------------------------------------
300
+ const FIND_CHECKER_MAX_STEPS = 16;
301
+ /**
302
+ * Walk a writer's decorator/fanout chain to feature-detect an
303
+ * `ArtifactObjectChecker`. The composition root wraps the backend in
304
+ * `AccumulatingArtifactWriter` (and optionally `InstrumentedArtifactWriter`)
305
+ * and layers GCS over local via `FanoutArtifactWriter`. Only
306
+ * `GcsArtifactWriter` implements `objectExists`, so on local-only / NoOp /
307
+ * gateway chains this returns null and the cache-hit restore skips pruning.
308
+ * `MAX_STEPS` is a cycle guard against a future decorator self-reference.
309
+ */
310
+ function findObjectChecker(writer) {
311
+ const stack = [writer];
312
+ for (let steps = 0; stack.length > 0 && steps < FIND_CHECKER_MAX_STEPS; steps++) {
313
+ const cursor = stack.pop();
314
+ if (!cursor)
315
+ continue;
316
+ if (hasObjectExists(cursor))
317
+ return cursor;
318
+ if (cursor instanceof AccumulatingArtifactWriter)
319
+ stack.push(cursor.inner);
320
+ else if (cursor instanceof InstrumentedArtifactWriter)
321
+ stack.push(cursor.inner);
322
+ else if (cursor instanceof FanoutArtifactWriter)
323
+ stack.push(...cursor.delegates);
324
+ }
325
+ return null;
326
+ }
327
+ function hasObjectExists(w) {
328
+ return (typeof w.objectExists === "function");
329
+ }
330
+ // ---------------------------------------------------------------------------
278
331
  // Remote cache helpers
279
332
  // ---------------------------------------------------------------------------
280
333
  async function checkRemoteCache(fingerprint, reportStore, rootDir) {
@@ -8,7 +8,7 @@
8
8
  * @see docs/decisions/D0040-artifact-ref-source-run-id.md
9
9
  * @see docs/design-docs/cache-hit-artifact-restoration.md
10
10
  */
11
- import { type ArtifactManifest, type RunId } from "../_vendor/ailf-core/index.d.ts";
11
+ import { type ArtifactManifest, type ArtifactObjectChecker, type RunId } from "../_vendor/ailf-core/index.d.ts";
12
12
  /**
13
13
  * Copy an artifact manifest verbatim and stamp `sourceRunId` on every ref
14
14
  * that doesn't already carry one.
@@ -47,3 +47,39 @@ import { type ArtifactManifest, type RunId } from "../_vendor/ailf-core/index.d.
47
47
  export declare function remapToCacheHitRefs(source: ArtifactManifest, opts: {
48
48
  sourceRunId: RunId;
49
49
  }): ArtifactManifest;
50
+ /** Outcome of `pruneToResolvableRefs`. */
51
+ export interface PruneResult {
52
+ /** Manifest with over-claimed entries/refs removed. */
53
+ readonly manifest: ArtifactManifest;
54
+ /** Per-entry entries dropped because their object did not exist. */
55
+ readonly droppedEntries: number;
56
+ /** Refs dropped entirely (bulk missing, or per-entry left with no entries). */
57
+ readonly droppedRefs: number;
58
+ }
59
+ /**
60
+ * Drop artifact refs (and per-entry entries) a cached report over-claimed —
61
+ * entries whose backing object was never written under the source run's
62
+ * storage prefix (D0040 / D0057, W0350).
63
+ *
64
+ * A degraded source run can publish a manifest that lists `rawResults`
65
+ * entries with no GCS object behind them; `remapToCacheHitRefs` copies those
66
+ * phantom entries forward into the new run's manifest, and the read side then
67
+ * signs URLs that 404 ("the specified key does not exist"). Pruning here, at
68
+ * the cache-hit restore boundary, removes the over-claim at the source so the
69
+ * written manifest's `entryCount` / `entries[]` reflect only artifacts that
70
+ * actually resolve — instead of pushing per-object HEAD checks onto the hot
71
+ * signing path (W0350 AC 3).
72
+ *
73
+ * Resolution mirrors the gateway: a per-entry object lives at
74
+ * `descriptor.objectPath(sourceRunId, entry.key)`, where `sourceRunId` is the
75
+ * runId encoded in `ref.path` (preferred — structurally tied to where bytes
76
+ * physically live) falling back to `ref.sourceRunId` (the lineage hint).
77
+ *
78
+ * **Fail open.** `checker.objectExists` throws when existence can't be
79
+ * determined (auth / network / quota). A throw KEEPS the ref/entry — we never
80
+ * drop a real artifact on a transient blip; the read side already tolerates a
81
+ * rare residual 404 (W0349). Only a definitive `false` drops an entry.
82
+ *
83
+ * Pure w.r.t. its inputs: returns a fresh manifest, never mutates `source`.
84
+ */
85
+ export declare function pruneToResolvableRefs(source: ArtifactManifest, checker: ArtifactObjectChecker): Promise<PruneResult>;
@@ -8,7 +8,7 @@
8
8
  * @see docs/decisions/D0040-artifact-ref-source-run-id.md
9
9
  * @see docs/design-docs/cache-hit-artifact-restoration.md
10
10
  */
11
- import { ARTIFACT_REGISTRY, } from "../_vendor/ailf-core/index.js";
11
+ import { ARTIFACT_REGISTRY, runId as parseRunId, } from "../_vendor/ailf-core/index.js";
12
12
  /**
13
13
  * Copy an artifact manifest verbatim and stamp `sourceRunId` on every ref
14
14
  * that doesn't already carry one.
@@ -60,3 +60,110 @@ export function remapToCacheHitRefs(source, opts) {
60
60
  }
61
61
  return out;
62
62
  }
63
+ /**
64
+ * Drop artifact refs (and per-entry entries) a cached report over-claimed —
65
+ * entries whose backing object was never written under the source run's
66
+ * storage prefix (D0040 / D0057, W0350).
67
+ *
68
+ * A degraded source run can publish a manifest that lists `rawResults`
69
+ * entries with no GCS object behind them; `remapToCacheHitRefs` copies those
70
+ * phantom entries forward into the new run's manifest, and the read side then
71
+ * signs URLs that 404 ("the specified key does not exist"). Pruning here, at
72
+ * the cache-hit restore boundary, removes the over-claim at the source so the
73
+ * written manifest's `entryCount` / `entries[]` reflect only artifacts that
74
+ * actually resolve — instead of pushing per-object HEAD checks onto the hot
75
+ * signing path (W0350 AC 3).
76
+ *
77
+ * Resolution mirrors the gateway: a per-entry object lives at
78
+ * `descriptor.objectPath(sourceRunId, entry.key)`, where `sourceRunId` is the
79
+ * runId encoded in `ref.path` (preferred — structurally tied to where bytes
80
+ * physically live) falling back to `ref.sourceRunId` (the lineage hint).
81
+ *
82
+ * **Fail open.** `checker.objectExists` throws when existence can't be
83
+ * determined (auth / network / quota). A throw KEEPS the ref/entry — we never
84
+ * drop a real artifact on a transient blip; the read side already tolerates a
85
+ * rare residual 404 (W0349). Only a definitive `false` drops an entry.
86
+ *
87
+ * Pure w.r.t. its inputs: returns a fresh manifest, never mutates `source`.
88
+ */
89
+ export async function pruneToResolvableRefs(source, checker) {
90
+ const out = {};
91
+ let droppedEntries = 0;
92
+ let droppedRefs = 0;
93
+ for (const [type, ref] of Object.entries(source)) {
94
+ if (!ref)
95
+ continue;
96
+ const artifactType = type;
97
+ const descriptor = ARTIFACT_REGISTRY[artifactType];
98
+ // Bulk: a single object at ref.path.
99
+ if (ref.layout === "bulk") {
100
+ if (await existsOrKeep(checker, ref.path))
101
+ out[artifactType] = ref;
102
+ else
103
+ droppedRefs++;
104
+ continue;
105
+ }
106
+ // Per-entry: each entry is its own object under the source run prefix.
107
+ const sourceRunId = resolveSourceRunId(ref);
108
+ const entries = ref.entries ?? [];
109
+ if (!descriptor || sourceRunId === undefined || entries.length === 0) {
110
+ // Can't resolve per-entry object paths — fail open, keep verbatim.
111
+ out[artifactType] = ref;
112
+ continue;
113
+ }
114
+ const keptFlags = await Promise.all(entries.map(async (entry) => {
115
+ let objectPath;
116
+ try {
117
+ objectPath = descriptor.objectPath(sourceRunId, entry.key);
118
+ }
119
+ catch {
120
+ return true; // malformed key — fail open rather than drop
121
+ }
122
+ return existsOrKeep(checker, objectPath);
123
+ }));
124
+ const kept = entries.filter((_, i) => keptFlags[i]);
125
+ if (kept.length === entries.length) {
126
+ out[artifactType] = ref;
127
+ continue;
128
+ }
129
+ droppedEntries += entries.length - kept.length;
130
+ if (kept.length === 0) {
131
+ droppedRefs++; // nothing resolvable — drop the whole over-claimed ref
132
+ continue;
133
+ }
134
+ out[artifactType] = {
135
+ ...ref,
136
+ entries: kept,
137
+ entryCount: kept.length,
138
+ bytes: kept.reduce((sum, e) => sum + (e.bytes ?? 0), 0),
139
+ };
140
+ }
141
+ return { manifest: out, droppedEntries, droppedRefs };
142
+ }
143
+ /**
144
+ * Resolve where a ref's bytes physically live. Prefers the runId encoded in
145
+ * `ref.path` (validated through the canonical parser so a malformed manifest
146
+ * path can't propagate into a synthesized object name) over the
147
+ * `ref.sourceRunId` lineage hint — matching the gateway's resolution order.
148
+ */
149
+ function resolveSourceRunId(ref) {
150
+ const fromPath = /^runs\/([^/]+)/.exec(ref.path)?.[1];
151
+ if (fromPath) {
152
+ const parsed = parseRunId(fromPath);
153
+ if (parsed.ok)
154
+ return parsed.value;
155
+ }
156
+ return ref.sourceRunId;
157
+ }
158
+ /**
159
+ * `checker.objectExists` wrapper that returns `true` (keep) on a thrown,
160
+ * indeterminate result — only a definitive `false` drops the artifact.
161
+ */
162
+ async function existsOrKeep(checker, path) {
163
+ try {
164
+ return await checker.objectExists(path);
165
+ }
166
+ catch {
167
+ return true;
168
+ }
169
+ }
@@ -0,0 +1,32 @@
1
+ /**
2
+ * stampReportValidity — apply the report-trustworthiness detector at write time.
3
+ *
4
+ * The eval write path's forward guarantee (D0059): every newly written report
5
+ * carries a top-level `validity` data-health stamp so the trustworthiness gap
6
+ * cannot recur on new reports. Lives in `pipeline/` (not the orchestration
7
+ * step) so both report-write paths — `PublishReportStep` and the standalone
8
+ * `publish` command — import it without a command→orchestration-step coupling.
9
+ *
10
+ * @see docs/decisions/D0059-report-validity-axis-and-trustworthiness-gate.md
11
+ * @see docs/design-docs/report-trustworthiness-model.md
12
+ */
13
+ import { type Report } from "../_vendor/ailf-core/index.d.ts";
14
+ /**
15
+ * Stamp the data-health `validity` axis (D0059) onto a report and normalize
16
+ * its `provenance.classification` to the canonical spelling.
17
+ *
18
+ * Runs the pure detector (`assessReportValidity`) over the assembled report.
19
+ * `Report` structurally satisfies the detector's `ReportValidityInput`
20
+ * (`provenance` extends `RunContext`; `summary` is a `ReportSummary`), so no
21
+ * adapter is needed. The verdict is **server-computed from the report's own
22
+ * data** (D0037): `assessedAt` is injected by the caller (the report's
23
+ * completion time) and nothing is read from the caller envelope.
24
+ *
25
+ * `classification` is patched only when the detector returns one — it returns
26
+ * `undefined` when the existing value is already canonical and no Tier-1 rule
27
+ * fired, so the patch is idempotent and never clobbers a correct (or
28
+ * human-corrected) value. Tier-2 review flags are not persisted here; the
29
+ * one-shot backfill consumes them. Returns a new report; the input is not
30
+ * mutated.
31
+ */
32
+ export declare function stampReportValidity(report: Report, assessedAt: string): Report;
@@ -0,0 +1,43 @@
1
+ /**
2
+ * stampReportValidity — apply the report-trustworthiness detector at write time.
3
+ *
4
+ * The eval write path's forward guarantee (D0059): every newly written report
5
+ * carries a top-level `validity` data-health stamp so the trustworthiness gap
6
+ * cannot recur on new reports. Lives in `pipeline/` (not the orchestration
7
+ * step) so both report-write paths — `PublishReportStep` and the standalone
8
+ * `publish` command — import it without a command→orchestration-step coupling.
9
+ *
10
+ * @see docs/decisions/D0059-report-validity-axis-and-trustworthiness-gate.md
11
+ * @see docs/design-docs/report-trustworthiness-model.md
12
+ */
13
+ import { assessReportValidity } from "../_vendor/ailf-core/index.js";
14
+ /**
15
+ * Stamp the data-health `validity` axis (D0059) onto a report and normalize
16
+ * its `provenance.classification` to the canonical spelling.
17
+ *
18
+ * Runs the pure detector (`assessReportValidity`) over the assembled report.
19
+ * `Report` structurally satisfies the detector's `ReportValidityInput`
20
+ * (`provenance` extends `RunContext`; `summary` is a `ReportSummary`), so no
21
+ * adapter is needed. The verdict is **server-computed from the report's own
22
+ * data** (D0037): `assessedAt` is injected by the caller (the report's
23
+ * completion time) and nothing is read from the caller envelope.
24
+ *
25
+ * `classification` is patched only when the detector returns one — it returns
26
+ * `undefined` when the existing value is already canonical and no Tier-1 rule
27
+ * fired, so the patch is idempotent and never clobbers a correct (or
28
+ * human-corrected) value. Tier-2 review flags are not persisted here; the
29
+ * one-shot backfill consumes them. Returns a new report; the input is not
30
+ * mutated.
31
+ */
32
+ export function stampReportValidity(report, assessedAt) {
33
+ const { classification, validity } = assessReportValidity(report, {
34
+ assessedAt,
35
+ });
36
+ return {
37
+ ...report,
38
+ provenance: classification
39
+ ? { ...report.provenance, classification }
40
+ : report.provenance,
41
+ validity,
42
+ };
43
+ }
@@ -225,6 +225,7 @@ export interface SanityReportDoc {
225
225
  };
226
226
  tag: null | string;
227
227
  title: null | string;
228
+ validity?: Report["validity"];
228
229
  }
229
230
  export declare function toSanityReportDoc(report: Report): SanityReportDoc;
230
231
  /**
@@ -491,6 +491,7 @@ export function toSanityReportDoc(report) {
491
491
  },
492
492
  tag: report.tag ?? null,
493
493
  title: report.title ?? null,
494
+ ...(report.validity ? { validity: report.validity } : {}),
494
495
  };
495
496
  }
496
497
  /**
@@ -534,6 +535,7 @@ export function toReport(doc) {
534
535
  summary: doc.summary,
535
536
  tag: doc.tag,
536
537
  title: doc.title,
538
+ validity: doc.validity,
537
539
  };
538
540
  }
539
541
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "7.2.2",
3
+ "version": "7.3.0",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"