npm - @sanity/ailf - Versions diffs - 2.7.1 → 2.8.0 - Mend

@sanity/ailf 2.7.1 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/dist/_vendor/ailf-core/artifact-registry.d.ts +72 -0
package/dist/_vendor/ailf-core/artifact-registry.js +150 -0
package/dist/_vendor/ailf-core/index.d.ts +2 -1
package/dist/_vendor/ailf-core/index.js +2 -1
package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +3 -3
package/dist/_vendor/ailf-core/ports/artifact-writer.d.ts +56 -0
package/dist/_vendor/ailf-core/ports/artifact-writer.js +28 -0
package/dist/_vendor/ailf-core/ports/context.d.ts +13 -3
package/dist/_vendor/ailf-core/ports/index.d.ts +3 -3
package/dist/_vendor/ailf-core/ports/index.js +1 -1
package/dist/_vendor/ailf-core/types/branded-ids.d.ts +9 -0
package/dist/_vendor/ailf-core/types/branded-ids.js +21 -0
package/dist/_vendor/ailf-core/types/index.d.ts +110 -68
package/dist/_vendor/ailf-core/types/index.js +1 -1
package/dist/_vendor/ailf-shared/index.d.ts +2 -0
package/dist/_vendor/ailf-shared/index.js +2 -0
package/dist/_vendor/ailf-shared/run-context.d.ts +55 -0
package/dist/_vendor/ailf-shared/run-context.js +17 -0
package/dist/_vendor/ailf-shared/run-trigger.d.ts +30 -0
package/dist/_vendor/ailf-shared/run-trigger.js +13 -0
package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +39 -0
package/dist/artifact-capture/api-gateway-artifact-writer.js +148 -0
package/dist/artifact-capture/gcs-artifact-writer.d.ts +30 -0
package/dist/artifact-capture/gcs-artifact-writer.js +119 -0
package/dist/commands/publish.js +3 -2
package/dist/composition-root.d.ts +3 -3
package/dist/composition-root.js +20 -15
package/dist/orchestration/build-step-sequence.js +6 -1
package/dist/orchestration/steps/calculate-scores-step.js +42 -2
package/dist/orchestration/steps/finalize-run-step.d.ts +29 -0
package/dist/orchestration/steps/finalize-run-step.js +103 -0
package/dist/orchestration/steps/publish-report-step.js +19 -39
package/dist/pipeline/calculate-scores.js +13 -2
package/dist/pipeline/provenance.d.ts +24 -44
package/dist/pipeline/provenance.js +17 -165
package/dist/pipeline/report-title.d.ts +2 -2
package/dist/pipeline/run-context.d.ts +57 -0
package/dist/pipeline/run-context.js +156 -0
package/dist/pipeline/upload-test-outputs.d.ts +26 -0
package/dist/pipeline/upload-test-outputs.js +34 -0
package/dist/report-store.js +4 -2
package/package.json +3 -3
package/dist/_vendor/ailf-core/ports/artifact-uploader.d.ts +0 -35
package/dist/_vendor/ailf-core/ports/artifact-uploader.js +0 -18
package/dist/artifact-capture/api-gateway-artifact-uploader.d.ts +0 -41
package/dist/artifact-capture/api-gateway-artifact-uploader.js +0 -123
package/dist/artifact-capture/gcs-report-artifact-uploader.d.ts +0 -31
package/dist/artifact-capture/gcs-report-artifact-uploader.js +0 -66

package/dist/pipeline/provenance.d.ts CHANGED Viewed

@@ -1,65 +1,45 @@
 /**
  * pipeline/provenance.ts
  *
- * Builds ReportProvenance from data available during a pipeline run.
+ * Builds `ReportProvenance` from data available during a pipeline run.
  *
- * Provenance captures what produced an evaluation report: which models,
- * which source, which mode, what triggered it, git metadata, etc.
- * Most of this data already flows through the pipeline — this module
- * just captures what would otherwise be ephemeral.
+ * `ReportProvenance extends RunContext` (D0032). This module derives
+ * RunContext via `buildRunContext()` and attaches report-specific extras
+ * (lineage, autoScope, promptfoo URLs, targetDocuments, runId). A single
+ * derivation path for RunContext foreclosures drift between the run
+ * manifest (GCS) and the report provenance (Content Lake).
  *
- * @see docs/design-docs/report-store/domain-model.md
- * @see docs/design-docs/report-store/architecture.md — Provenance collection
+ * @see packages/eval/src/pipeline/run-context.ts — the shared derivation path
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md (§ Drift Prevention)
  */
-import type { Logger } from "../_vendor/ailf-core/index.d.ts";
-import type { ResolvedSourceConfig } from "../sources.js";
-import type { EvalMode, PromptfooUrlEntry, ReportAutoScope, ReportProvenance } from "./types.js";
-export interface ProvenanceInput {
-    /** Feature areas that were evaluated */
-    areas: string[];
-    /** Logger instance (defaults to ConsoleLogger) */
-    logger?: Logger;
+import type { PromptfooUrlEntry, ReportAutoScope, ReportProvenance, RunId } from "./types.js";
+import { type RunContextInput } from "./run-context.js";
+/**
+ * Inputs needed to build a ReportProvenance. Extends `RunContextInput` so
+ * the RunContext derivation path is shared.
+ */
+export interface ProvenanceInput extends RunContextInput {
     /** Release auto-scope metadata (when perspective evaluation was scoped) */
     autoScope?: ReportAutoScope;
-    /**
-     * Git metadata from the *calling* repository (cross-repo evaluations).
-     * When provided, overrides CI env var detection so provenance attributes
-     * to the caller — not the AILF core repo where the workflow executes.
-     */
-    callerGit?: {
-        branch?: string;
-        prNumber?: number;
-        repo: string;
-        sha?: string;
-    };
     /** SHA-256 hash of the doc context files (from cache system) */
     contextHash?: string;
-    /** Evaluation fingerprint for cross-environment cache lookup */
-    evalFingerprint?: string;
-    /** Evaluation mode */
-    mode: EvalMode;
     /** @deprecated Use `promptfooUrls` — kept for backward compatibility */
     promptfooUrl?: string;
     /** Per-mode Promptfoo share URLs */
     promptfooUrls?: PromptfooUrlEntry[];
-    /** Path to the package root (for reading config/models) */
-    rootDir: string;
-    /** Report ID that triggered this re-run (becomes lineage.rerunOf) */
-    sourceReportId?: string;
+    /** Identity of the pipeline run that produced this report (D0032) */
+    runId: RunId;
     /** Sanity document IDs targeted */
     sanityDocumentIds?: string[];
-    /** Resolved documentation source */
-    source: ResolvedSourceConfig;
-    /** Specific task IDs evaluated (if scoped) */
-    taskIds?: string[];
+    /** Report ID that triggered this re-run (becomes lineage.rerunOf) */
+    sourceReportId?: string;
 }
 /**
- * Build a ReportProvenance object from pipeline context.
+ * Build a ReportProvenance from pipeline context.
  *
- * Assembles provenance from:
- * - Pipeline options (mode, source, areas, tasks)
- * - config/models.ts (model list, grader)
- * - Environment variables (CI metadata, trigger detection)
- * - Optional metadata (context hash, Promptfoo URL)
+ * RunContext fields (mode, areas, taskIds, models, graderModel, source,
+ * evalFingerprint, trigger, git) come from `buildRunContext`. Report-
+ * specific fields (autoScope, contextHash, lineage, promptfoo*, runId,
+ * targetDocuments) are attached here.
  */
 export declare function buildProvenance(input: ProvenanceInput): ReportProvenance;

package/dist/pipeline/provenance.js CHANGED Viewed

@@ -1,188 +1,40 @@
 /**
  * pipeline/provenance.ts
  *
- * Builds ReportProvenance from data available during a pipeline run.
+ * Builds `ReportProvenance` from data available during a pipeline run.
  *
- * Provenance captures what produced an evaluation report: which models,
- * which source, which mode, what triggered it, git metadata, etc.
- * Most of this data already flows through the pipeline — this module
- * just captures what would otherwise be ephemeral.
+ * `ReportProvenance extends RunContext` (D0032). This module derives
+ * RunContext via `buildRunContext()` and attaches report-specific extras
+ * (lineage, autoScope, promptfoo URLs, targetDocuments, runId). A single
+ * derivation path for RunContext foreclosures drift between the run
+ * manifest (GCS) and the report provenance (Content Lake).
  *
- * @see docs/design-docs/report-store/domain-model.md
- * @see docs/design-docs/report-store/architecture.md — Provenance collection
+ * @see packages/eval/src/pipeline/run-context.ts — the shared derivation path
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md (§ Drift Prevention)
  */
-import { ConsoleLogger } from "../adapters/loggers/index.js";
-import { tryLoadConfigFile } from "./compiler/config-loader.js";
+import { buildRunContext } from "./run-context.js";
 /**
- * Build a ReportProvenance object from pipeline context.
+ * Build a ReportProvenance from pipeline context.
  *
- * Assembles provenance from:
- * - Pipeline options (mode, source, areas, tasks)
- * - config/models.ts (model list, grader)
- * - Environment variables (CI metadata, trigger detection)
- * - Optional metadata (context hash, Promptfoo URL)
+ * RunContext fields (mode, areas, taskIds, models, graderModel, source,
+ * evalFingerprint, trigger, git) come from `buildRunContext`. Report-
+ * specific fields (autoScope, contextHash, lineage, promptfoo*, runId,
+ * targetDocuments) are attached here.
  */
 export function buildProvenance(input) {
-    const log = input.logger ?? new ConsoleLogger();
-    const models = loadModelsConfig(input.rootDir, log);
-    log.debug("Assembling provenance input", {
-        mode: input.mode,
-        sourceName: input.source.name,
-        sourceBaseUrl: input.source.baseUrl,
-        areas: input.areas,
-        taskIds: input.taskIds,
-        hasContextHash: Boolean(input.contextHash),
-        hasEvalFingerprint: Boolean(input.evalFingerprint),
-        hasCallerGit: Boolean(input.callerGit),
-        hasSourceReportId: Boolean(input.sourceReportId),
-        modelCount: models.models.length,
-    });
-    // Cross-repo evaluations: prefer explicit caller git metadata over
-    // CI env vars (which always reflect the AILF core repo).
-    const git = input.callerGit
-        ? {
-            branch: input.callerGit.branch ?? "unknown",
-            prNumber: input.callerGit.prNumber,
-            repo: input.callerGit.repo,
-            sha: input.callerGit.sha ?? "unknown",
-        }
-        : detectGitMetadata();
+    const runContext = buildRunContext(input);
     // Build lineage from explicit relationships
     const lineage = input.sourceReportId
         ? { rerunOf: input.sourceReportId }
         : undefined;
-    const trigger = detectTrigger();
-    log.debug("Provenance computed", {
-        triggerType: trigger.type,
-        gitRepo: git?.repo,
-        gitBranch: git?.branch,
-        evalFingerprint: input.evalFingerprint,
-        hasLineage: Boolean(lineage),
-    });
-    // Non-literacy modes (agent-harness, mcp-server, etc.) don't use the
-    // config/models.ts model matrix — listing those models would be misleading.
-    // Only include them for literacy mode where they're the actual eval targets.
-    const evaluatedModels = input.mode === "literacy"
-        ? models.models.map((m) => ({ id: m.id, label: m.label }))
-        : [];
     return {
-        areas: input.areas,
+        ...runContext,
         autoScope: input.autoScope,
         contextHash: input.contextHash,
-        evalFingerprint: input.evalFingerprint,
-        git,
-        graderModel: models.grader.id,
         lineage,
-        mode: input.mode,
-        models: evaluatedModels,
         promptfooUrl: input.promptfooUrl,
         promptfooUrls: input.promptfooUrls,
-        source: {
-            baseUrl: input.source.baseUrl,
-            dataset: input.source.dataset,
-            name: input.source.name,
-            perspective: input.source.perspective,
-            projectId: input.source.projectId,
-        },
+        runId: input.runId,
         targetDocuments: input.sanityDocumentIds,
-        taskIds: input.taskIds,
-        trigger: detectTrigger(),
-    };
-}
-// ---------------------------------------------------------------------------
-// Trigger detection
-// ---------------------------------------------------------------------------
-/**
- * Extract git metadata from GitHub Actions environment variables.
- * Returns undefined when not running in CI.
- */
-function detectGitMetadata() {
-    const repo = process.env.GITHUB_REPOSITORY;
-    if (!repo)
-        return undefined;
-    const sha = process.env.GITHUB_SHA ?? "unknown";
-    const ref = process.env.GITHUB_REF ?? "";
-    // Extract branch name from ref (refs/heads/main → main)
-    const branch = ref.startsWith("refs/heads/")
-        ? ref.slice("refs/heads/".length)
-        : ref.startsWith("refs/pull/")
-            ? `pr-${ref.split("/")[2]}`
-            : ref;
-    // Extract PR number from GITHUB_REF (refs/pull/123/merge)
-    const prMatch = ref.match(/^refs\/pull\/(\d+)\//);
-    const prNumber = prMatch ? parseInt(prMatch[1], 10) : undefined;
-    return { branch, prNumber, repo, sha };
-}
-// ---------------------------------------------------------------------------
-// Git metadata
-// ---------------------------------------------------------------------------
-/**
- * Infer what triggered this evaluation from environment variables.
- *
- * Detection order:
- * 1. AILF_TRIGGER_TYPE — explicit override (for custom integrations)
- * 2. GITHUB_EVENT_NAME === "schedule" — cron-triggered
- * 3. GITHUB_EVENT_NAME === "repository_dispatch" — cross-repo trigger
- * 4. GITHUB_ACTIONS === "true" — CI-triggered
- * 5. Default: manual
- */
-function detectTrigger() {
-    const explicit = process.env.AILF_TRIGGER_TYPE;
-    if (explicit === "scheduled") {
-        return {
-            schedule: process.env.AILF_SCHEDULE ?? "unknown",
-            type: "scheduled",
-        };
-    }
-    if (explicit === "webhook") {
-        return {
-            documentId: process.env.AILF_WEBHOOK_DOCUMENT_ID,
-            source: process.env.AILF_WEBHOOK_SOURCE ?? "unknown",
-            type: "webhook",
-        };
-    }
-    // GitHub Actions context
-    const eventName = process.env.GITHUB_EVENT_NAME;
-    if (eventName === "schedule") {
-        return {
-            schedule: process.env.GITHUB_SCHEDULE ?? "unknown",
-            type: "scheduled",
-        };
-    }
-    if (eventName === "repository_dispatch") {
-        return {
-            callerRef: process.env.GITHUB_REF,
-            // Note: callerRepo here is a fallback. The accurate caller repo
-            // comes from callerGit (injected into the PipelineRequest payload).
-            // GITHUB_REPOSITORY_OWNER_ID is just the org ID, not owner/repo.
-            callerRepo: process.env.GITHUB_REPOSITORY_OWNER_ID ?? "unknown",
-            type: "cross-repo",
-        };
-    }
-    if (process.env.GITHUB_ACTIONS === "true") {
-        return {
-            runId: process.env.GITHUB_RUN_ID ?? "unknown",
-            type: "ci",
-            workflow: process.env.GITHUB_WORKFLOW ?? "unknown",
-        };
-    }
-    return { type: "manual" };
-}
-// ---------------------------------------------------------------------------
-// Model config loading
-// ---------------------------------------------------------------------------
-/**
- * Load config/models to extract model list and grader info.
- * Falls back to a minimal config if the file can't be read.
- */
-function loadModelsConfig(rootDir, log) {
-    const result = tryLoadConfigFile("models", rootDir);
-    if (result)
-        return result.data;
-    log.warn("Could not read config/models for provenance");
-    return {
-        defaults: {},
-        grader: { id: "unknown" },
-        models: [],
     };
 }

package/dist/pipeline/report-title.d.ts CHANGED Viewed

@@ -15,7 +15,7 @@
  * @see docs/design-docs/report-store/domain-model.md
  * @see packages/eval/src/pipeline/provenance.ts — builds the provenance input
  */
-import type { EvalMode, ReportTrigger } from "./types.js";
+import type { EvalMode, RunTrigger } from "./types.js";
 /** Input required to generate a human-readable report title. */
 export interface ReportTitleInput {
     provenance: {
@@ -31,7 +31,7 @@ export interface ReportTitleInput {
         /** Sanity document IDs targeted (when scoped to specific documents) */
         targetDocuments?: string[];
         /** What triggered the evaluation */
-        trigger: ReportTrigger;
+        trigger: RunTrigger;
     };
     /**
      * Total number of known feature areas in the system.

package/dist/pipeline/run-context.d.ts ADDED Viewed

@@ -0,0 +1,57 @@
+/**
+ * buildRunContext — the single code path that derives `RunContext` from
+ * pipeline inputs.
+ *
+ * `RunContext` is the 9-field shape shared between `RunManifest.context`
+ * (in GCS) and `ReportProvenance` (in Content Lake, which `extends
+ * RunContext`). Routing every consumer through this function makes it
+ * structurally impossible for the two to disagree: there is no second
+ * code path to drift against.
+ *
+ * Contract test: `packages/eval/src/__tests__/run-context-parity.test.ts`
+ *
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md (§ Move 5 — Drift Prevention)
+ */
+import type { Logger, RunContext } from "../_vendor/ailf-core/index.d.ts";
+import type { ResolvedSourceConfig } from "../sources.js";
+import type { EvalMode } from "./types.js";
+/**
+ * Inputs required to derive a `RunContext`. `ProvenanceInput` extends this
+ * so every caller of `buildProvenance` is automatically a valid input to
+ * `buildRunContext`.
+ */
+export interface RunContextInput {
+    /** Feature areas that were evaluated */
+    areas: string[];
+    /**
+     * Git metadata from the *calling* repository (cross-repo evaluations).
+     * When provided, overrides CI env var detection so context attributes
+     * to the caller — not the AILF core repo where the workflow executes.
+     */
+    callerGit?: {
+        branch?: string;
+        prNumber?: number;
+        repo: string;
+        sha?: string;
+    };
+    /** Evaluation fingerprint for cross-environment cache lookup */
+    evalFingerprint?: string;
+    /** Logger instance (defaults to ConsoleLogger) */
+    logger?: Logger;
+    /** Evaluation mode */
+    mode: EvalMode;
+    /** Path to the package root (for reading config/models) */
+    rootDir: string;
+    /** Resolved documentation source */
+    source: ResolvedSourceConfig;
+    /** Specific task IDs evaluated (if scoped) */
+    taskIds?: string[];
+}
+/**
+ * Derive `RunContext` from pipeline inputs. The only construction path.
+ *
+ * Both `FinalizeRunStep` (via `RunManifest.context`) and
+ * `PublishReportStep` (via `ReportProvenance`) call this function — the
+ * former directly, the latter transitively through `buildProvenance`.
+ */
+export declare function buildRunContext(input: RunContextInput): RunContext;

package/dist/pipeline/run-context.js ADDED Viewed

@@ -0,0 +1,156 @@
+/**
+ * buildRunContext — the single code path that derives `RunContext` from
+ * pipeline inputs.
+ *
+ * `RunContext` is the 9-field shape shared between `RunManifest.context`
+ * (in GCS) and `ReportProvenance` (in Content Lake, which `extends
+ * RunContext`). Routing every consumer through this function makes it
+ * structurally impossible for the two to disagree: there is no second
+ * code path to drift against.
+ *
+ * Contract test: `packages/eval/src/__tests__/run-context-parity.test.ts`
+ *
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md (§ Move 5 — Drift Prevention)
+ */
+import { ConsoleLogger } from "../adapters/loggers/index.js";
+import { tryLoadConfigFile } from "./compiler/config-loader.js";
+/**
+ * Derive `RunContext` from pipeline inputs. The only construction path.
+ *
+ * Both `FinalizeRunStep` (via `RunManifest.context`) and
+ * `PublishReportStep` (via `ReportProvenance`) call this function — the
+ * former directly, the latter transitively through `buildProvenance`.
+ */
+export function buildRunContext(input) {
+    const log = input.logger ?? new ConsoleLogger();
+    const models = loadModelsConfig(input.rootDir, log);
+    // Cross-repo evaluations: prefer explicit caller git metadata over
+    // CI env vars (which always reflect the AILF core repo).
+    const git = input.callerGit
+        ? {
+            branch: input.callerGit.branch ?? "unknown",
+            prNumber: input.callerGit.prNumber,
+            repo: input.callerGit.repo,
+            sha: input.callerGit.sha ?? "unknown",
+        }
+        : detectGitMetadata();
+    const trigger = detectTrigger();
+    // Non-literacy modes (agent-harness, mcp-server, etc.) don't use the
+    // config/models.ts model matrix — listing those models would be
+    // misleading. Only include them for literacy mode where they're the
+    // actual eval targets.
+    const evaluatedModels = input.mode === "literacy"
+        ? models.models.map((m) => ({ id: m.id, label: m.label }))
+        : [];
+    return {
+        areas: input.areas,
+        evalFingerprint: input.evalFingerprint,
+        git,
+        graderModel: models.grader.id,
+        mode: input.mode,
+        models: evaluatedModels,
+        source: {
+            baseUrl: input.source.baseUrl,
+            dataset: input.source.dataset,
+            name: input.source.name,
+            perspective: input.source.perspective,
+            projectId: input.source.projectId,
+        },
+        taskIds: input.taskIds,
+        trigger,
+    };
+}
+// ---------------------------------------------------------------------------
+// Environment-derived context
+// ---------------------------------------------------------------------------
+/**
+ * Extract git metadata from GitHub Actions environment variables.
+ * Returns undefined when not running in CI.
+ */
+function detectGitMetadata() {
+    const repo = process.env.GITHUB_REPOSITORY;
+    if (!repo)
+        return undefined;
+    const sha = process.env.GITHUB_SHA ?? "unknown";
+    const ref = process.env.GITHUB_REF ?? "";
+    // Extract branch name from ref (refs/heads/main → main)
+    const branch = ref.startsWith("refs/heads/")
+        ? ref.slice("refs/heads/".length)
+        : ref.startsWith("refs/pull/")
+            ? `pr-${ref.split("/")[2]}`
+            : ref;
+    // Extract PR number from GITHUB_REF (refs/pull/123/merge)
+    const prMatch = ref.match(/^refs\/pull\/(\d+)\//);
+    const prNumber = prMatch ? parseInt(prMatch[1], 10) : undefined;
+    return { branch, prNumber, repo, sha };
+}
+/**
+ * Infer what triggered this evaluation from environment variables.
+ *
+ * Detection order:
+ * 1. AILF_TRIGGER_TYPE — explicit override (for custom integrations)
+ * 2. GITHUB_EVENT_NAME === "schedule" — cron-triggered
+ * 3. GITHUB_EVENT_NAME === "repository_dispatch" — cross-repo trigger
+ * 4. GITHUB_ACTIONS === "true" — CI-triggered
+ * 5. Default: manual
+ */
+function detectTrigger() {
+    const explicit = process.env.AILF_TRIGGER_TYPE;
+    if (explicit === "scheduled") {
+        return {
+            schedule: process.env.AILF_SCHEDULE ?? "unknown",
+            type: "scheduled",
+        };
+    }
+    if (explicit === "webhook") {
+        return {
+            documentId: process.env.AILF_WEBHOOK_DOCUMENT_ID,
+            source: process.env.AILF_WEBHOOK_SOURCE ?? "unknown",
+            type: "webhook",
+        };
+    }
+    // GitHub Actions context
+    const eventName = process.env.GITHUB_EVENT_NAME;
+    if (eventName === "schedule") {
+        return {
+            schedule: process.env.GITHUB_SCHEDULE ?? "unknown",
+            type: "scheduled",
+        };
+    }
+    if (eventName === "repository_dispatch") {
+        return {
+            callerRef: process.env.GITHUB_REF,
+            // Note: callerRepo here is a fallback. The accurate caller repo
+            // comes from callerGit (injected into the PipelineRequest payload).
+            // GITHUB_REPOSITORY_OWNER_ID is just the org ID, not owner/repo.
+            callerRepo: process.env.GITHUB_REPOSITORY_OWNER_ID ?? "unknown",
+            type: "cross-repo",
+        };
+    }
+    if (process.env.GITHUB_ACTIONS === "true") {
+        return {
+            runId: process.env.GITHUB_RUN_ID ?? "unknown",
+            type: "ci",
+            workflow: process.env.GITHUB_WORKFLOW ?? "unknown",
+        };
+    }
+    return { type: "manual" };
+}
+// ---------------------------------------------------------------------------
+// Model config loading
+// ---------------------------------------------------------------------------
+/**
+ * Load config/models to extract model list and grader info.
+ * Falls back to a minimal config if the file can't be read.
+ */
+function loadModelsConfig(rootDir, log) {
+    const result = tryLoadConfigFile("models", rootDir);
+    if (result)
+        return result.data;
+    log.warn("Could not read config/models for run context");
+    return {
+        defaults: {},
+        grader: { id: "unknown" },
+        models: [],
+    };
+}

package/dist/pipeline/upload-test-outputs.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * upload-test-outputs.ts — shared helper for the testOutputs artifact upload.
+ *
+ * CalculateScoresStep calls this once its score-summary.json is complete.
+ * Each {taskId, modelId} pair becomes one GCS object under
+ * `runs/{runId}/test-outputs/{taskId}--{modelId}.json` carrying the full
+ * response output and truncation flag. The returned ArtifactRef's
+ * `entries[]` catalog lists every uploaded entry so Studio can render
+ * drill-down state without a second listing call.
+ *
+ * PublishReportStep later strips responseOutput from the inline
+ * testResults[] when this upload succeeds, so the Content Lake document
+ * stays slim — the full output lives in GCS and is fetched per-entry
+ * on click.
+ *
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md
+ */
+import type { ArtifactRef, ArtifactWriter, RunId, StoredTestResult } from "../_vendor/ailf-core/index.d.ts";
+/**
+ * Upload testOutputs as per-entry GCS objects under
+ * `runs/{runId}/test-outputs/`, one per `{taskId}::{modelId}` pair.
+ *
+ * Returns the `ArtifactRef` on success, or `null` when upload is skipped or
+ * fails (P5: non-blocking).
+ */
+export declare function uploadTestOutputs(writer: ArtifactWriter, runId: RunId, testResults: StoredTestResult[]): Promise<ArtifactRef | null>;

package/dist/pipeline/upload-test-outputs.js ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * upload-test-outputs.ts — shared helper for the testOutputs artifact upload.
+ *
+ * CalculateScoresStep calls this once its score-summary.json is complete.
+ * Each {taskId, modelId} pair becomes one GCS object under
+ * `runs/{runId}/test-outputs/{taskId}--{modelId}.json` carrying the full
+ * response output and truncation flag. The returned ArtifactRef's
+ * `entries[]` catalog lists every uploaded entry so Studio can render
+ * drill-down state without a second listing call.
+ *
+ * PublishReportStep later strips responseOutput from the inline
+ * testResults[] when this upload succeeds, so the Content Lake document
+ * stays slim — the full output lives in GCS and is fetched per-entry
+ * on click.
+ *
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md
+ */
+/**
+ * Upload testOutputs as per-entry GCS objects under
+ * `runs/{runId}/test-outputs/`, one per `{taskId}::{modelId}` pair.
+ *
+ * Returns the `ArtifactRef` on success, or `null` when upload is skipped or
+ * fails (P5: non-blocking).
+ */
+export async function uploadTestOutputs(writer, runId, testResults) {
+    const entries = testResults.map((tr) => ({
+        key: `${tr.taskId}::${tr.modelId}`,
+        data: {
+            responseOutput: tr.responseOutput ?? "",
+            responseOutputTruncated: tr.responseOutputTruncated ?? false,
+        },
+    }));
+    return writer.writePerEntry("testOutputs", runId, entries);
+}

package/dist/report-store.js CHANGED Viewed

@@ -211,8 +211,10 @@ export class ReportStore {
                 summary: {
                     ...report.summary,
                     // Artifact references live inside summary in Sanity so they're
-                    // projected automatically by the reportDetailQuery (D0030)
-                    ...(report.artifacts ? { artifacts: report.artifacts } : {}),
+                    // projected automatically by the reportDetailQuery (D0032)
+                    ...(report.artifactManifest
+                        ? { artifactManifest: report.artifactManifest }
+                        : {}),
                 },
                 tag: report.tag ?? null,
                 title: report.title ?? null,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "2.7.1",
+  "version": "2.8.0",
   "private": false,
   "publishConfig": {
     "access": "public"
@@ -52,8 +52,8 @@
     "@types/node": "^22.13.1",
     "tsx": "^4.19.2",
     "typescript": "^5.7.3",
-    "@sanity/ailf-shared": "0.1.0",
-    "@sanity/ailf-core": "0.1.0"
+    "@sanity/ailf-core": "0.1.0",
+    "@sanity/ailf-shared": "0.1.0"
   },
   "scripts": {
     "build": "tsc && tsx scripts/bundle-workspace-deps.ts",

package/dist/_vendor/ailf-core/ports/artifact-uploader.d.ts DELETED Viewed

@@ -1,35 +0,0 @@
-/**
- * Port: ArtifactUploader — uploads report artifacts to external object storage.
- *
- * Separate from ArtifactCollector (which captures forensic archives).
- * This port puts structured files at known paths so Studio can fetch
- * them on demand via signed URLs.
- *
- * @see docs/design-docs/external-artifact-store.md
- * @see docs/decisions/D0030-external-artifact-store.md
- */
-import type { ArtifactRef } from "../types/index.js";
-/**
- * Uploads report artifacts to external storage.
- *
- * Implementations:
- * - GcsReportArtifactUploader (packages/eval) — uploads to GCS
- * - NoOpArtifactUploader (below) — returns null (no-op when GCS is not configured)
- */
-export interface ArtifactUploader {
-    /**
-     * Upload a JSON artifact for a report.
-     *
-     * @param reportId - Report identifier (used as the GCS path prefix)
-     * @param fileName - File name within the report prefix (e.g., "test-outputs.json")
-     * @param data     - Serializable data (will be JSON.stringify'd)
-     * @returns ArtifactRef on success, null if upload is skipped or fails
-     */
-    upload(reportId: string, fileName: string, data: unknown): Promise<ArtifactRef | null>;
-}
-/**
- * No-op uploader — always returns null. Used when GCS is not configured.
- */
-export declare class NoOpArtifactUploader implements ArtifactUploader {
-    upload(): Promise<null>;
-}