@sanity/ailf 6.0.0 → 6.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/config/airbyte/ai_literacy_framework.connector.yaml +276 -0
  2. package/config/bigquery/views/synthesis_parse_failure_rate_7d.sql +42 -0
  3. package/dist/_vendor/ailf-core/artifact-registry.d.ts +17 -0
  4. package/dist/_vendor/ailf-core/artifact-registry.js +14 -0
  5. package/dist/_vendor/ailf-core/ports/context.d.ts +22 -0
  6. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -0
  7. package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -0
  8. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.js +59 -0
  9. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.js +5 -1
  10. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.js +47 -3
  11. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.d.ts +10 -0
  12. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.js +13 -0
  13. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.js +17 -1
  14. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.js +1 -1
  15. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.js +5 -1
  16. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +5 -1
  17. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.js +5 -1
  18. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.js +15 -2
  19. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.d.ts +5 -3
  20. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.js +19 -31
  21. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  22. package/dist/_vendor/ailf-core/services/index.js +1 -1
  23. package/dist/_vendor/ailf-core/types/diagnosis.d.ts +3 -0
  24. package/dist/_vendor/ailf-core/types/index.d.ts +7 -0
  25. package/dist/_vendor/ailf-core/types/repo-config.d.ts +16 -0
  26. package/dist/_vendor/ailf-core/types/synthesis-telemetry.d.ts +101 -0
  27. package/dist/_vendor/ailf-core/types/synthesis-telemetry.js +18 -0
  28. package/dist/adapters/config-sources/file-config-adapter.js +8 -6
  29. package/dist/adapters/llm/index.d.ts +1 -1
  30. package/dist/adapters/llm/index.js +1 -1
  31. package/dist/adapters/llm/openai-llm-client.js +7 -2
  32. package/dist/adapters/llm/retry.d.ts +18 -0
  33. package/dist/adapters/llm/retry.js +21 -0
  34. package/dist/adapters/synthesis/synthesis-telemetry-schema.d.ts +49 -0
  35. package/dist/adapters/synthesis/synthesis-telemetry-schema.js +55 -0
  36. package/dist/adapters/task-sources/content-lake-task-source.js +10 -5
  37. package/dist/adapters/task-sources/repo-schemas.d.ts +7 -0
  38. package/dist/adapters/task-sources/repo-schemas.js +10 -0
  39. package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +11 -1
  40. package/dist/artifact-capture/api-gateway-artifact-writer.js +3 -1
  41. package/dist/artifact-capture/batching-api-gateway-artifact-writer.d.ts +11 -1
  42. package/dist/artifact-capture/batching-api-gateway-artifact-writer.js +3 -1
  43. package/dist/artifact-capture/gcs-artifact-writer.d.ts +11 -1
  44. package/dist/artifact-capture/gcs-artifact-writer.js +6 -3
  45. package/dist/artifact-capture/local-fs-artifact-writer.d.ts +11 -1
  46. package/dist/artifact-capture/local-fs-artifact-writer.js +6 -3
  47. package/dist/commands/interpret.d.ts +21 -1
  48. package/dist/commands/interpret.js +13 -4
  49. package/dist/commands/pipeline-action.d.ts +44 -0
  50. package/dist/commands/pipeline-action.js +193 -1
  51. package/dist/commands/run.d.ts +2 -0
  52. package/dist/commands/run.js +2 -0
  53. package/dist/composition-root.d.ts +22 -5
  54. package/dist/composition-root.js +78 -8
  55. package/dist/orchestration/pipeline-orchestrator.js +3 -0
  56. package/dist/orchestration/steps/gap-analysis-step.js +0 -1
  57. package/dist/report-store.d.ts +40 -0
  58. package/dist/report-store.js +88 -0
  59. package/package.json +1 -1
@@ -20,9 +20,13 @@ import { buildAppContext, parseArtifactUploadEnv, } from "../orchestration/build
20
20
  import { buildStepSequence } from "../orchestration/build-step-sequence.js";
21
21
  import { orchestratePipeline } from "../orchestration/pipeline-orchestrator.js";
22
22
  import { load } from "js-yaml";
23
- import { PLACEHOLDER_OWNER_TEAM } from "../_vendor/ailf-core/index.js";
23
+ import { PLACEHOLDER_OWNER_TEAM, } from "../_vendor/ailf-core/index.js";
24
24
  import { parseRepoConfig, } from "../adapters/task-sources/repo-schemas.js";
25
25
  import { getCallerCwd, resolveOutputDir } from "./shared/resolve-output-dir.js";
26
+ // Phase 6 / DIAG-06 — single formatter, single visual contract (D6-04).
27
+ // Import statically so bundlers and type-checkers can verify the export
28
+ // exists at build time rather than deferring to runtime dynamic import.
29
+ import { formatCardSummaryLine } from "./interpret.js";
26
30
  const __dirname = dirname(fileURLToPath(import.meta.url));
27
31
  const ROOT = resolve(__dirname, "..", "..");
28
32
  // ---------------------------------------------------------------------------
@@ -250,6 +254,10 @@ export function computeResolvedOptions(opts) {
250
254
  const graderReplications = repoConfig?.execution?.graderReplications;
251
255
  const borderlineReplications = repoConfig?.execution?.borderlineReplications;
252
256
  const gapAnalysisEnabled = repoConfig?.execution?.gapAnalysis ?? true;
257
+ // Phase 6 / DIAG-06 — post-run diagnosis summary policy from .ailf/config.yaml.
258
+ // Precedence resolution (CLI flag > env > config > auto) lives in
259
+ // shouldRunPostSummary(); this only carries the config-file signal.
260
+ const summaryOnRun = repoConfig?.summary?.onRun;
253
261
  // Grader context policy. Cascade: env var > .ailf/config.yaml > unset
254
262
  // (defaults to rubric-only at the EvalConfig boundary). The env var is the
255
263
  // operational lever for one-shot comparison runs without editing the config file.
@@ -348,6 +356,7 @@ export function computeResolvedOptions(opts) {
348
356
  undefined,
349
357
  purposeOption: opts.purpose?.trim() || undefined,
350
358
  labelOptions: opts.label ?? [],
359
+ summaryOnRun,
351
360
  };
352
361
  }
353
362
  const PUBLISH_AUTO_VALUES = ["always", "full-runs", "never"];
@@ -373,6 +382,179 @@ function resolvePublishAuto(repoValue) {
373
382
  }
374
383
  return "full-runs";
375
384
  }
385
+ // ---------------------------------------------------------------------------
386
+ // Phase 6 / DIAG-06 — post-run diagnosis summary helpers
387
+ // ---------------------------------------------------------------------------
388
+ /**
389
+ * Determine whether the post-run diagnosis summary hook should fire.
390
+ *
391
+ * 4-level precedence chain (D6-20):
392
+ * Level 1 — CLI flag (absolute): if `cliOpts.summary` is boolean, use it.
393
+ * Level 2 — AILF_INTERPRET_ON_RUN env var (absolute): strict "1"/"0" parse;
394
+ * anything else falls through (T-06-11 spoofing mitigation).
395
+ * Level 3 — config `summary.onRun` (absolute): "always" → true; "never" → false;
396
+ * "auto" or absent falls through to level 4.
397
+ * Level 4 — default auto: TTY && !CI (SC1 default-off in CI).
398
+ */
399
+ export function shouldRunPostSummary(cliOpts, resolvedOnRun) {
400
+ // Level 1: CLI flag wins absolutely
401
+ if (cliOpts.summary === true)
402
+ return true;
403
+ if (cliOpts.summary === false)
404
+ return false;
405
+ // Level 2: AILF_INTERPRET_ON_RUN env var (strict parse)
406
+ const envVal = process.env.AILF_INTERPRET_ON_RUN;
407
+ if (envVal === "1")
408
+ return true;
409
+ if (envVal === "0")
410
+ return false;
411
+ // Anything else (garbage, unset) falls through
412
+ // Level 3: config summary.onRun
413
+ if (resolvedOnRun === "always")
414
+ return true;
415
+ if (resolvedOnRun === "never")
416
+ return false;
417
+ // "auto" or undefined falls through
418
+ // Level 4: default auto — fire only when stdout is interactive and not in CI
419
+ return Boolean(process.stdout.isTTY) && process.env.CI !== "true";
420
+ }
421
+ /**
422
+ * Build a SynthesisCostTelemetry payload from a completed Diagnosis.
423
+ *
424
+ * Aggregates:
425
+ * - cost: sum of meta.cost across all cards (undefined treated as 0)
426
+ * - parseFailureCount: cards where status==="degraded" (parse failures)
427
+ * - parseFailureRate: parseFailureCount / total-cards (max 8 per D6-09)
428
+ * - perCard: per-card row with safe-extracted structured metadata
429
+ *
430
+ * Deliberately does NOT read card.body — only structured meta fields are
431
+ * persisted (T-06-14 PII guard per threat model).
432
+ */
433
+ // D6-09: denominator is always the fixed card-registry size, not cards.length.
434
+ // Using cards.length would allow parseFailureRate > 1.0 when the registry is
435
+ // a subset (e.g. test registries), violating the SynthesisCostTelemetrySchema
436
+ // min(0).max(1) constraint. Single edit point if the registry ever grows.
437
+ const CARD_REGISTRY_SIZE = 8;
438
+ export function buildSynthesisTelemetry(diagnosis) {
439
+ const cards = diagnosis.cards;
440
+ let totalCost = 0;
441
+ let parseFailureCount = 0;
442
+ const perCard = cards.map((card) => {
443
+ // "missing" cards have no `meta` — narrow with status guard
444
+ const meta = card.status !== "missing" ? card.meta : undefined;
445
+ const cost = meta?.cost ?? 0;
446
+ totalCost += cost;
447
+ // Parse failures produce status="degraded" (not "missing") in the current
448
+ // runner (diagnosis-runner.ts). A "missing" card is absence, not failure.
449
+ // If a future code path can produce status="missing" from a parse failure,
450
+ // this line must be updated and the parseFailed contract re-evaluated.
451
+ const parseFailed = card.status === "degraded";
452
+ if (parseFailed)
453
+ parseFailureCount++;
454
+ const row = {
455
+ cardType: card.cardType,
456
+ parseFailed,
457
+ cardVersion: meta?.cardVersion ?? "unknown",
458
+ generatedAt: meta?.generatedAt ?? new Date().toISOString(),
459
+ };
460
+ if (cost > 0)
461
+ row.cost = cost;
462
+ if (meta?.latencyMs !== undefined)
463
+ row.latencyMs = meta.latencyMs;
464
+ if (meta?.tokenUsage?.input !== undefined)
465
+ row.tokenInput = meta.tokenUsage.input;
466
+ if (meta?.tokenUsage?.output !== undefined)
467
+ row.tokenOutput = meta.tokenUsage.output;
468
+ return row;
469
+ });
470
+ return {
471
+ cost: totalCost,
472
+ parseFailureCount,
473
+ parseFailureRate: parseFailureCount / CARD_REGISTRY_SIZE,
474
+ perCard,
475
+ };
476
+ }
477
+ /**
478
+ * Run post-pipeline hooks after the pipeline completes.
479
+ *
480
+ * Fires after orchestratePipeline() + writePipelineResult() (D6-02).
481
+ * Hook failure prints to stderr but does NOT change exit code (D6-03).
482
+ * CI default-off: fires only when shouldRunPostSummary returns true (D6-20).
483
+ *
484
+ * @param ctx - App context (composition root wiring)
485
+ * @param result - Pipeline result (includes reportId when published)
486
+ * @param args - Hook options (cliOpts, summaryOnRun from config, optional runnerFactory for tests)
487
+ */
488
+ export async function runPostPipelineHooks(ctx, result, args) {
489
+ if (!shouldRunPostSummary(args.cliOpts, args.summaryOnRun))
490
+ return;
491
+ if (!result.reportId) {
492
+ process.stderr.write("ℹ️ No report published — skipping post-summary.\n");
493
+ return;
494
+ }
495
+ const reportId = result.reportId;
496
+ try {
497
+ // Build the runner — use injected factory (tests) or composition root (production)
498
+ let runner;
499
+ if (args.runnerFactory) {
500
+ runner = args.runnerFactory(ctx);
501
+ }
502
+ else {
503
+ const { getDiagnosisRunner } = await import("../composition-root.js");
504
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
505
+ runner = getDiagnosisRunner(ctx);
506
+ }
507
+ // Read the stored report — needed by the runner for version metadata
508
+ const report = await ctx.reportStore?.read(reportId);
509
+ if (!report) {
510
+ process.stderr.write(`ℹ️ Report not found: ${reportId} — skipping post-summary.\n`);
511
+ return;
512
+ }
513
+ // Derive version metadata from the stored report (same approach as interpret.ts)
514
+ const rec = report;
515
+ const summary = rec.summary;
516
+ const versions = summary?.versions;
517
+ const versionedInputs = {
518
+ graderJudgmentsVersion: typeof versions?.graderJudgmentsVersion === "string"
519
+ ? versions.graderJudgmentsVersion
520
+ : "unknown",
521
+ ensembleVersion: typeof versions?.ensembleVersion === "string"
522
+ ? versions.ensembleVersion
523
+ : "unknown",
524
+ diagnosisVersion: typeof versions?.diagnosisVersion === "string"
525
+ ? versions.diagnosisVersion
526
+ : "unknown",
527
+ cardVersion: typeof versions?.cardVersion === "string"
528
+ ? versions.cardVersion
529
+ : "unknown",
530
+ };
531
+ // Run the diagnosis
532
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
533
+ const diagnosis = await runner.run({
534
+ report: report,
535
+ versions: versionedInputs,
536
+ refresh: false,
537
+ });
538
+ // Print per-card summary lines to stdout (D6-04 single formatter)
539
+ for (const card of diagnosis.cards) {
540
+ process.stdout.write(`${formatCardSummaryLine(card)}\n`);
541
+ }
542
+ // Build and write synthesis telemetry back to the report doc (D6-08)
543
+ // patchSynthesis is now part of ReportStorePort (CR-01) — guard on store
544
+ // presence only; absent store means no report store is configured (expected).
545
+ if (ctx.reportStore) {
546
+ const telemetry = buildSynthesisTelemetry(diagnosis);
547
+ await ctx.reportStore.patchSynthesis(reportId, telemetry);
548
+ }
549
+ else {
550
+ process.stderr.write("ℹ️ No reportStore configured — synthesis telemetry not written to Sanity.\n");
551
+ }
552
+ }
553
+ catch (err) {
554
+ const msg = err instanceof Error ? err.message : String(err);
555
+ process.stderr.write(`⚠️ Diagnosis failed: ${msg}. Run \`ailf interpret ${reportId}\` to retry.\n`);
556
+ }
557
+ }
376
558
  /** Resolve and validate the --task-source flag value. */
377
559
  function resolveTaskSourceType(raw) {
378
560
  if (!raw || raw === "content-lake")
@@ -471,6 +653,11 @@ export async function executePipeline(cliOpts) {
471
653
  const steps = buildStepSequence(ctx, pipelineStart);
472
654
  const result = await orchestratePipeline(ctx, steps);
473
655
  writePipelineResult(result, config.outputDir);
656
+ // Phase 6 / DIAG-06: post-run hook fires after artifacts are written (D6-02)
657
+ await runPostPipelineHooks(ctx, result, {
658
+ cliOpts,
659
+ summaryOnRun: config.summaryOnRun,
660
+ });
474
661
  process.exit(result.success ? 0 : 1);
475
662
  }
476
663
  const o = resolveOptions(cliOpts);
@@ -510,6 +697,11 @@ export async function executePipeline(cliOpts) {
510
697
  const steps = buildStepSequence(ctx, pipelineStart);
511
698
  const result = await orchestratePipeline(ctx, steps);
512
699
  writePipelineResult(result, o.outputDir);
700
+ // Phase 6 / DIAG-06: post-run hook fires after artifacts are written (D6-02)
701
+ await runPostPipelineHooks(ctx, result, {
702
+ cliOpts,
703
+ summaryOnRun: o.summaryOnRun,
704
+ });
513
705
  process.exit(result.success ? 0 : 1);
514
706
  }
515
707
  // ---------------------------------------------------------------------------
@@ -47,6 +47,8 @@ export interface PipelineCliOptions {
47
47
  publish?: boolean;
48
48
  publishTag?: string;
49
49
  remoteCache?: boolean;
50
+ /** Phase 6 / DIAG-06: post-run diagnosis summary toggle. Undefined when neither flag is passed. */
51
+ summary?: boolean;
50
52
  sanityDocument: string[];
51
53
  sanityPerspective?: string;
52
54
  search?: string;
@@ -43,6 +43,8 @@ export function createRunCommand() {
43
43
  .option("-p, --publish", "Write report to Sanity + fan out to sinks (auto-enabled for full runs when report store is configured)")
44
44
  .option("--no-publish", "Suppress auto-publishing")
45
45
  .option("--publish-tag <tag>", "Label for published report")
46
+ .option("--summary", "Force post-run diagnosis summary (overrides config and CI default-off)")
47
+ .option("--no-summary", "Suppress post-run diagnosis summary")
46
48
  .option("--config <path>", "Load pipeline config from a TS/JS/YAML/JSON file (overrides most CLI flags)")
47
49
  .option("-o, --output <path>", "Write PR comment markdown to file")
48
50
  .option("--promptfoo-url <url>", "Promptfoo share URL for report")
@@ -15,7 +15,7 @@
15
15
  * @see packages/core/src/ports/context.ts — AppContext interface
16
16
  * @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
17
17
  */
18
- import { type AppContext, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssertionRegistration, type CardRegistry, type DiagnosisRunner, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
18
+ import { type AppContext, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssertionRegistration, type CardRegistry, type DiagnosisRunner, type Logger, type ResolvedConfig, type WriteSource } from "./_vendor/ailf-core/index.d.ts";
19
19
  export type { LLMClientKeys } from "./_vendor/ailf-core/index.d.ts";
20
20
  import { type BorderlineConsensusOptions, type BorderlineConsensusResult } from "./pipeline/borderline-consensus-runner.js";
21
21
  import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource } from "./adapters/task-sources/index.js";
@@ -44,7 +44,7 @@ export declare function createAppContext(config: ResolvedConfig): AppContext;
44
44
  *
45
45
  * Exported for unit-test access; not part of the public package API.
46
46
  */
47
- export declare function createArtifactWriter(config: ResolvedConfig, logger: Logger, progress?: ArtifactWriterProgressOptions): ArtifactWriter;
47
+ export declare function createArtifactWriter(config: ResolvedConfig, logger: Logger, progress?: ArtifactWriterProgressOptions, writerSource?: WriteSource): ArtifactWriter;
48
48
  /**
49
49
  * Build the `TaskSource` adapter wired by the composition root for a
50
50
  * given `ResolvedConfig`. Exported for test access — composition-root
@@ -110,10 +110,27 @@ export declare function buildDiagnosisRegistry(): CardRegistry;
110
110
  *
111
111
  * Wires the full 8-card registry, `loadAttributions` bound to the local
112
112
  * filesystem (Phase-4 per-entry attribution objects at
113
- * `{artifactsDir}/runs/{runId}/attribution/*.json`), and no-op cache
114
- * reader/writer (Plan-06 CLI command will wire the real cache seam).
113
+ * `{artifactsDir}/runs/{runId}/attribution/*.json`), and the real
114
+ * `diagnosisWriter` that emits the Diagnosis through a post-hoc-flagged
115
+ * artifact writer AND patches the report doc's
116
+ * `summary.artifactManifest.diagnosis` slot. Two steps because `ailf interpret`
117
+ * runs after the report doc has already been published — the pipeline path's
118
+ * publish-report-step.ts:187 lifts the in-memory run manifest into the doc at
119
+ * end-of-run, but that step never fires for a deferred command.
120
+ *
121
+ * The post-hoc writer is built with `writerSource: "post-hoc"` so the D0050
122
+ * guard accepts the diagnosis descriptor (`writePolicy: "post-hoc"`). Without
123
+ * this, every emit would be rejected at runtime.
124
+ *
125
+ * `diagnosisReader` is still a no-op shim: the Studio data path uses the
126
+ * artifact-manifest entry (populated by the writer + patch) plus a signed-URL
127
+ * fetch, so reader-side cache wiring is deferred to a follow-up W-item.
128
+ * Without the reader, `ailf interpret --refresh` cache hits are not yet served
129
+ * from GCS — they recompute.
115
130
  *
116
131
  * Plan-06 API/CLI consumers import this function from the composition root
117
132
  * and pass `ctx` from `createAppContext(config)`.
118
133
  */
119
- export declare function getDiagnosisRunner(ctx: AppContext): DiagnosisRunner;
134
+ export declare function getDiagnosisRunner(ctx: AppContext, opts?: {
135
+ artifactWriter?: ArtifactWriter;
136
+ }): DiagnosisRunner;
@@ -192,7 +192,7 @@ const DEFAULT_LOCAL_ARTIFACTS_DIR = ".ailf/results/captures";
192
192
  *
193
193
  * Exported for unit-test access; not part of the public package API.
194
194
  */
195
- export function createArtifactWriter(config, logger, progress) {
195
+ export function createArtifactWriter(config, logger, progress, writerSource = "pipeline") {
196
196
  // Legacy `artifactUpload: false` still disables — treat as an alias for
197
197
  // the canonical `artifactsDisabled: true` until W0052 removes it.
198
198
  if (config.artifactsDisabled === true || config.artifactUpload === false) {
@@ -214,10 +214,11 @@ export function createArtifactWriter(config, logger, progress) {
214
214
  // W0053: progress attaches to the OUTERMOST of (local-only | fanout). When
215
215
  // fanout is wired, the delegates stay silent so we don't double-count the
216
216
  // same caller-visible write across two backends.
217
- const remote = createRemoteArtifactWriter(config, logger, metrics);
217
+ const remote = createRemoteArtifactWriter(config, logger, metrics, writerSource);
218
218
  const local = new LocalFilesystemArtifactWriter({
219
219
  rootDir,
220
220
  exclude,
221
+ writerSource,
221
222
  ...(remote ? {} : { progress }),
222
223
  });
223
224
  // W0064 — when a remote backend is wired, list it first so its ArtifactRef
@@ -267,7 +268,7 @@ function resolveExcludeList(raw, logger) {
267
268
  * the sole backend for that run, which is the D0033 M4 default for laptops
268
269
  * and CI without GCS creds.
269
270
  */
270
- function createRemoteArtifactWriter(config, logger, metrics) {
271
+ function createRemoteArtifactWriter(config, logger, metrics, writerSource = "pipeline") {
271
272
  const bucket = config.artifactGcsBucket ?? DEFAULT_ARTIFACT_BUCKET;
272
273
  const hasGcsCredentials = Boolean(process.env.GOOGLE_APPLICATION_CREDENTIALS || process.env.GCLOUD_PROJECT);
273
274
  if (hasGcsCredentials) {
@@ -279,6 +280,7 @@ function createRemoteArtifactWriter(config, logger, metrics) {
279
280
  logger.debug(`Artifact remote backend: GcsArtifactWriter (ADC, bucket=${bucket}, defaultConcurrency=8)`);
280
281
  return new GcsArtifactWriter({
281
282
  bucket,
283
+ writerSource,
282
284
  ...(metrics ? { metrics } : {}),
283
285
  });
284
286
  }
@@ -306,6 +308,7 @@ function createRemoteArtifactWriter(config, logger, metrics) {
306
308
  apiKey: config.apiKey,
307
309
  bucket,
308
310
  putConcurrency: concurrency,
311
+ writerSource,
309
312
  ...(metrics ? { metrics } : {}),
310
313
  });
311
314
  }
@@ -314,6 +317,7 @@ function createRemoteArtifactWriter(config, logger, metrics) {
314
317
  apiBaseUrl: config.apiUrl,
315
318
  apiKey: config.apiKey,
316
319
  bucket,
320
+ writerSource,
317
321
  ...(metrics ? { metrics } : {}),
318
322
  });
319
323
  }
@@ -585,17 +589,83 @@ async function loadAttributionsFromLocalFs(runId, artifactsDir, logger) {
585
589
  *
586
590
  * Wires the full 8-card registry, `loadAttributions` bound to the local
587
591
  * filesystem (Phase-4 per-entry attribution objects at
588
- * `{artifactsDir}/runs/{runId}/attribution/*.json`), and no-op cache
589
- * reader/writer (Plan-06 CLI command will wire the real cache seam).
592
+ * `{artifactsDir}/runs/{runId}/attribution/*.json`), and the real
593
+ * `diagnosisWriter` that emits the Diagnosis through a post-hoc-flagged
594
+ * artifact writer AND patches the report doc's
595
+ * `summary.artifactManifest.diagnosis` slot. Two steps because `ailf interpret`
596
+ * runs after the report doc has already been published — the pipeline path's
597
+ * publish-report-step.ts:187 lifts the in-memory run manifest into the doc at
598
+ * end-of-run, but that step never fires for a deferred command.
599
+ *
600
+ * The post-hoc writer is built with `writerSource: "post-hoc"` so the D0050
601
+ * guard accepts the diagnosis descriptor (`writePolicy: "post-hoc"`). Without
602
+ * this, every emit would be rejected at runtime.
603
+ *
604
+ * `diagnosisReader` is still a no-op shim: the Studio data path uses the
605
+ * artifact-manifest entry (populated by the writer + patch) plus a signed-URL
606
+ * fetch, so reader-side cache wiring is deferred to a follow-up W-item.
607
+ * Without the reader, `ailf interpret --refresh` cache hits are not yet served
608
+ * from GCS — they recompute.
590
609
  *
591
610
  * Plan-06 API/CLI consumers import this function from the composition root
592
611
  * and pass `ctx` from `createAppContext(config)`.
593
612
  */
594
- export function getDiagnosisRunner(ctx) {
613
+ export function getDiagnosisRunner(ctx, opts) {
595
614
  const artifactsDir = ctx.config.artifactsDir ?? DIAGNOSIS_LOCAL_ARTIFACTS_DIR;
596
- // No-op cache shimsPlan 06 wires the real cache.
615
+ // Post-hoc artifact writerbuilt with the same fanout/remote/local layering
616
+ // as the pipeline writer but flagged so the D0050 guard accepts post-hoc
617
+ // descriptors. Construction is per-runner so the AccumulatingArtifactWriter's
618
+ // internal manifest doesn't carry state between unrelated interpret runs.
619
+ // Tests inject their own writer via opts.artifactWriter; the production
620
+ // CLI / pipeline callers never pass it.
621
+ const postHocArtifactWriter = opts?.artifactWriter ??
622
+ createArtifactWriter(ctx.config, ctx.logger, undefined, "post-hoc");
623
+ // No-op reader — see JSDoc above. The Studio data path is manifest-driven,
624
+ // not reader-driven, so the writer + patch alone unblock Phase 7.
597
625
  const diagnosisReader = async (_path) => null;
598
- const diagnosisWriter = async (_path, _diagnosis) => { };
626
+ // Real writer two-step persistence:
627
+ // 1. Emit the diagnosis payload through the post-hoc writer; the descriptor's
628
+ // `objectPath: diagnosisPathBuilder()` derives the storage path from
629
+ // `{runId, reportId, compoundVersion}`.
630
+ // 2. Patch the published report doc's `summary.artifactManifest.diagnosis`
631
+ // slot with the returned ArtifactRef, so Studio's slim-shape GROQ
632
+ // projection surfaces the entry. (The pipeline path runs this lift via
633
+ // publish-report-step.ts; that step never fires for a deferred command,
634
+ // hence the explicit patch here.)
635
+ //
636
+ // Errors are caught and logged rather than thrown — the diagnosis runner
637
+ // separates "compute" from "persist". Failed persistence should not panic
638
+ // the runner; the computed cards still surface to API/CLI callers in-memory.
639
+ // ReportStore.patchArtifactManifest is itself non-fatal on Sanity failure,
640
+ // so it does not need its own try/catch.
641
+ const diagnosisWriter = async (_descriptorPath, diagnosis) => {
642
+ let ref;
643
+ try {
644
+ // Anchor the diagnosis to the REPORT's run, not the post-hoc CLI's
645
+ // session run. `ctx.runId` is freshly generated per interpret
646
+ // invocation; the report doc's `provenance.runId` is what Studio
647
+ // and the signing endpoint look up. Using `assoc(ctx, ...)` would
648
+ // bind `run` to ctx.runId — the path would be writeable but
649
+ // unreachable from the Studio side.
650
+ ref = await postHocArtifactWriter.emit("diagnosis", { run: diagnosis.runId, report: diagnosis.reportId }, diagnosis);
651
+ }
652
+ catch (error) {
653
+ ctx.logger.warn("diagnosis-emit-failed", {
654
+ reportId: diagnosis.reportId,
655
+ error: error instanceof Error ? error.message : String(error),
656
+ });
657
+ return;
658
+ }
659
+ if (!ref)
660
+ return;
661
+ if (!ctx.reportStore) {
662
+ ctx.logger.warn("diagnosis-emit: no reportStore on context", {
663
+ reportId: diagnosis.reportId,
664
+ });
665
+ return;
666
+ }
667
+ await ctx.reportStore.patchArtifactManifest(diagnosis.reportId, "diagnosis", ref);
668
+ };
599
669
  return createDiagnosisRunner({
600
670
  llm: ctx.llmClient,
601
671
  model: modelId("anthropic:claude-opus-4-6"),
@@ -275,6 +275,9 @@ export async function orchestratePipeline(ctx, steps) {
275
275
  belowCritical: state.belowCritical,
276
276
  durationMs,
277
277
  promptfooUrls: state.promptfooUrls,
278
+ // Phase 6 / DIAG-06 — thread reportId from state so the post-run hook in
279
+ // pipeline-action.ts can target the published Content Lake document.
280
+ reportId: state.reportId,
278
281
  steps: results,
279
282
  success: true,
280
283
  testSummary: state.testSummary,
@@ -215,7 +215,6 @@ export class GapAnalysisStep {
215
215
  ...(documentManifest !== undefined && { documentManifest }),
216
216
  failureModes: failureModeReport,
217
217
  lowScoringJudgments,
218
- recommendations: gapReport,
219
218
  scores: enrichedScores,
220
219
  ...(testResults !== undefined && { testResults }),
221
220
  };
@@ -15,6 +15,7 @@
15
15
  * @see docs/design-docs/report-store/domain-model.md
16
16
  */
17
17
  import type { SanityClient } from "@sanity/client";
18
+ import type { ArtifactRef, ArtifactType, SynthesisCostTelemetry } from "./_vendor/ailf-core/index.d.ts";
18
19
  import type { ComparisonReport, ISOTimestamp, LineageQuery, Report, ReportId, ReportProvenance, ScoreSummary } from "./pipeline/types.js";
19
20
  /**
20
21
  * Result of an auto-comparison, bundling the ComparisonReport with the
@@ -89,6 +90,22 @@ export declare class ReportStore {
89
90
  * @see docs/design-docs/report-store/architecture.md — Auto-comparison
90
91
  */
91
92
  findComparableBaseline(query: LineageQuery): Promise<null | Report>;
93
+ /**
94
+ * Fetch the most recent report from the Content Lake.
95
+ *
96
+ * Mirrors the API gateway's `ReportStoreApi.latest()` signature
97
+ * (`packages/api/src/lib/sanity.ts`). Used by `ailf interpret latest`
98
+ * when no explicit report ID is supplied.
99
+ *
100
+ * @param query Optional narrowing by `mode` and/or `source.name`.
101
+ * @returns The most recent matching report, or null if none exist
102
+ * or on API failure. Schema-validation errors are advisory (logged
103
+ * and null-returned) per the same rationale as `findByFingerprint`.
104
+ */
105
+ latest(query?: {
106
+ mode?: string;
107
+ source?: string;
108
+ }): Promise<null | Report>;
92
109
  /**
93
110
  * Read a report by its ID.
94
111
  *
@@ -108,6 +125,29 @@ export declare class ReportStore {
108
125
  * runtime schema gate. Schema drift is a bug, not an outage.
109
126
  */
110
127
  write(report: Report): Promise<null | ReportId>;
128
+ /**
129
+ * Patch synthesis telemetry onto a published report (Phase 6 / DIAG-06).
130
+ * Non-fatal on Sanity failure — the on-disk reportSnapshot artifact
131
+ * remains the source of truth. Mirrors `write()` (L379–411) for
132
+ * error handling.
133
+ *
134
+ * Document _id is `report-${reportId}` (see `toSanityReportDoc` L559).
135
+ */
136
+ patchSynthesis(reportId: ReportId, telemetry: SynthesisCostTelemetry): Promise<void>;
137
+ /**
138
+ * Patch a single artifact-manifest entry onto a published report.
139
+ *
140
+ * Used by deferred commands like `ailf interpret` whose post-hoc writer
141
+ * produces a new ArtifactRef *after* the report doc was published. The
142
+ * pipeline path lifts the full manifest into the doc at publish time
143
+ * (publish-report-step.ts:187); this method is the post-hoc equivalent
144
+ * for a single slot.
145
+ *
146
+ * Non-fatal on Sanity failure — mirrors `patchSynthesis` (L423).
147
+ *
148
+ * Document _id is `report-${reportId}` (see `toSanityReportDoc` L559).
149
+ */
150
+ patchArtifactManifest(reportId: ReportId, slot: ArtifactType, ref: ArtifactRef): Promise<void>;
111
151
  /**
112
152
  * Query error arrays from the last N reports for chronic failure detection.
113
153
  *
@@ -207,6 +207,50 @@ export class ReportStore {
207
207
  return null;
208
208
  }
209
209
  }
210
+ /**
211
+ * Fetch the most recent report from the Content Lake.
212
+ *
213
+ * Mirrors the API gateway's `ReportStoreApi.latest()` signature
214
+ * (`packages/api/src/lib/sanity.ts`). Used by `ailf interpret latest`
215
+ * when no explicit report ID is supplied.
216
+ *
217
+ * @param query Optional narrowing by `mode` and/or `source.name`.
218
+ * @returns The most recent matching report, or null if none exist
219
+ * or on API failure. Schema-validation errors are advisory (logged
220
+ * and null-returned) per the same rationale as `findByFingerprint`.
221
+ */
222
+ async latest(query) {
223
+ try {
224
+ let groq = `*[_type == $type`;
225
+ const params = { type: REPORT_TYPE };
226
+ if (query?.mode) {
227
+ groq += ` && provenance.mode == $mode`;
228
+ params.mode = query.mode;
229
+ }
230
+ if (query?.source) {
231
+ groq += ` && provenance.source.name == $source`;
232
+ params.source = query.source;
233
+ }
234
+ groq += `] | order(completedAt desc) [0]`;
235
+ const doc = await this.client.fetch(groq, params);
236
+ return doc ? toReport(doc) : null;
237
+ }
238
+ catch (error) {
239
+ // Advisory lookup — a malformed prior doc must not break a read-only
240
+ // CLI invocation. Log and return null so the caller surfaces a
241
+ // user-friendly "no report found" error instead of a Zod stack trace.
242
+ if (error instanceof ReportSchemaValidationError) {
243
+ logAdvisoryQuerySchemaFailure({
244
+ query: "latest",
245
+ context: { mode: query?.mode, sourceName: query?.source },
246
+ error,
247
+ });
248
+ return null;
249
+ }
250
+ console.warn(` ⚠️ Failed to fetch latest report from Sanity: ${error instanceof Error ? error.message : String(error)}`);
251
+ return null;
252
+ }
253
+ }
210
254
  /**
211
255
  * Read a report by its ID.
212
256
  *
@@ -264,6 +308,50 @@ export class ReportStore {
264
308
  return null;
265
309
  }
266
310
  }
311
+ /**
312
+ * Patch synthesis telemetry onto a published report (Phase 6 / DIAG-06).
313
+ * Non-fatal on Sanity failure — the on-disk reportSnapshot artifact
314
+ * remains the source of truth. Mirrors `write()` (L379–411) for
315
+ * error handling.
316
+ *
317
+ * Document _id is `report-${reportId}` (see `toSanityReportDoc` L559).
318
+ */
319
+ async patchSynthesis(reportId, telemetry) {
320
+ try {
321
+ await this.client
322
+ .patch(`report-${reportId}`)
323
+ .set({ "summary.synthesis": { diagnosis: telemetry } })
324
+ .commit();
325
+ }
326
+ catch (error) {
327
+ console.warn(` ⚠️ Failed to patch synthesis telemetry on report ${reportId}: ${error instanceof Error ? error.message : String(error)}`);
328
+ }
329
+ }
330
+ /**
331
+ * Patch a single artifact-manifest entry onto a published report.
332
+ *
333
+ * Used by deferred commands like `ailf interpret` whose post-hoc writer
334
+ * produces a new ArtifactRef *after* the report doc was published. The
335
+ * pipeline path lifts the full manifest into the doc at publish time
336
+ * (publish-report-step.ts:187); this method is the post-hoc equivalent
337
+ * for a single slot.
338
+ *
339
+ * Non-fatal on Sanity failure — mirrors `patchSynthesis` (L423).
340
+ *
341
+ * Document _id is `report-${reportId}` (see `toSanityReportDoc` L559).
342
+ */
343
+ async patchArtifactManifest(reportId, slot, ref) {
344
+ try {
345
+ await this.client
346
+ .patch(`report-${reportId}`)
347
+ .setIfMissing({ "summary.artifactManifest": {} })
348
+ .set({ [`summary.artifactManifest.${slot}`]: ref })
349
+ .commit();
350
+ }
351
+ catch (error) {
352
+ console.warn(` ⚠️ Failed to patch artifactManifest.${slot} on report ${reportId}: ${error instanceof Error ? error.message : String(error)}`);
353
+ }
354
+ }
267
355
  /**
268
356
  * Query error arrays from the last N reports for chronic failure detection.
269
357
  *
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "6.0.0",
3
+ "version": "6.1.1",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"