npm - @sanity/ailf - Versions diffs - 0.1.27 → 0.1.29 - Mend

@sanity/ailf 0.1.27 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/dist/_vendor/ailf-core/ports/context.d.ts +4 -0
package/dist/_vendor/ailf-core/ports/task-source.d.ts +2 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
package/dist/_vendor/ailf-core/types/index.d.ts +26 -0
package/dist/adapters/api-client/build-request.d.ts +1 -0
package/dist/adapters/api-client/build-request.js +6 -2
package/dist/adapters/api-client/progress.js +1 -1
package/dist/adapters/task-sources/content-lake-task-source.js +20 -1
package/dist/adapters/task-sources/repo-task-source.js +7 -0
package/dist/commands/explain-handler.js +1 -0
package/dist/commands/pipeline-action.d.ts +1 -0
package/dist/commands/pipeline-action.js +8 -0
package/dist/commands/pipeline.d.ts +1 -0
package/dist/commands/pipeline.js +7 -0
package/dist/commands/publish.js +10 -2
package/dist/orchestration/build-app-context.js +1 -0
package/dist/orchestration/steps/fetch-docs-step.js +3 -2
package/dist/orchestration/steps/generate-configs-step.js +8 -3
package/dist/orchestration/steps/publish-report-step.js +12 -2
package/dist/orchestration/steps/run-eval-step.js +4 -2
package/dist/pipeline/map-request-to-config.js +1 -0
package/dist/pipeline/plan.d.ts +1 -0
package/dist/pipeline/plan.js +2 -1
package/dist/pipeline/provenance.d.ts +2 -0
package/dist/pipeline/provenance.js +5 -0
package/dist/report-store.d.ts +20 -2
package/dist/report-store.js +31 -7
package/dist/webhook/eval-request-handler.d.ts +2 -0
package/dist/webhook/eval-request-handler.js +3 -0
package/package.json +1 -1

package/dist/_vendor/ailf-core/ports/context.d.ts CHANGED Viewed

@@ -35,6 +35,8 @@ export interface ResolvedConfig {
     areas?: string[];
     /** Task ID filter */
     tasks?: string[];
+    /** Tag filter — tasks must have at least one matching tag */
+    tags?: string[];
     /** Changed doc slugs for impact scoping */
     changedDocs?: string[];
     /** Documentation source name */
@@ -89,6 +91,8 @@ export interface ResolvedConfig {
     studioOriginOverride?: string;
     /** Sanity document filter args */
     sanityDocumentArgs?: string[];
+    /** Report ID that triggered this re-run (flows to provenance.lineage.rerunOf) */
+    sourceReportId?: string;
     /** Disable release-aware auto-scoping (evaluate all tasks even when perspective is set) */
     noAutoScope: boolean;
     /** Before option for comparison */

package/dist/_vendor/ailf-core/ports/task-source.d.ts CHANGED Viewed

@@ -112,6 +112,8 @@ export interface TaskDefinition {
     baseline?: BaselineConfig;
     /** Additional template variables beyond task (e.g., custom vars) */
     extraVars?: Record<string, unknown>;
+    /** Freeform labels for filtering and organization */
+    tags?: string[];
 }
 /** Check if a canonical doc ref resolves by slug.
  *

package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts CHANGED Viewed

@@ -68,6 +68,7 @@ export declare const PipelineRequestSchema: z.ZodObject<{
         "origin-only": "origin-only";
     }>>;
     source: z.ZodOptional<z.ZodString>;
+    sourceReportId: z.ZodOptional<z.ZodString>;
     taskMode: z.ZodOptional<z.ZodEnum<{
         "content-lake": "content-lake";
         yaml: "yaml";

package/dist/_vendor/ailf-core/schemas/pipeline-request.js CHANGED Viewed

@@ -80,6 +80,7 @@ export const PipelineRequestSchema = z.object({
     readiness: z.boolean().optional(),
     searchMode: z.enum(["off", "open", "origin-only"]).optional(),
     source: z.string().optional(),
+    sourceReportId: z.string().optional(),
     taskMode: z.enum(["content-lake", "yaml", "inline"]).optional(),
     tasks: z.array(z.string()).optional(),
     urls: z.array(z.string().url()).optional(),

package/dist/_vendor/ailf-core/types/index.d.ts CHANGED Viewed

@@ -179,6 +179,8 @@ export interface FeatureScore {
 export interface FilterOptions {
     /** Feature areas to include (filename stems, e.g., ["groq", "frameworks"]) */
     areas?: string[];
+    /** Tags to include — tasks must have at least one matching tag */
+    tags?: string[];
     /** Specific task IDs to include (e.g., ["groq-blog-queries"]) */
     taskIds?: string[];
 }
@@ -1032,6 +1034,28 @@ export interface ReportAutoScope {
         removed: number;
     };
 }
+/**
+ * Typed relationships between reports. Each field is optional and
+ * independent — populated only when that relationship exists.
+ *
+ * Stored at `provenance.lineage` in the report document.
+ *
+ * @see docs/design-docs/report-store/domain-model.md
+ */
+export interface ReportLineage {
+    /**
+     * This report was explicitly compared against another report.
+     * Set when auto-compare selects a specific baseline or when the user
+     * requests comparison against a named report.
+     */
+    comparedAgainst?: ReportId;
+    /**
+     * This report was explicitly re-run from another report.
+     * The re-run has the same EvalScope (mode, areas, perspective, etc.)
+     * but measures the current state of docs/models/tasks.
+     */
+    rerunOf?: ReportId;
+}
 /** Full provenance metadata for an evaluation report */
 export interface ReportProvenance {
     /** Which feature areas were evaluated */
@@ -1055,6 +1079,8 @@ export interface ReportProvenance {
     };
     /** Grader model used for scoring */
     graderModel: string;
+    /** Typed relationships with other reports (re-run, comparison) */
+    lineage?: ReportLineage;
     /** Evaluation mode */
     mode: EvalMode;
     /** Models under evaluation */

package/dist/adapters/api-client/build-request.d.ts CHANGED Viewed

@@ -35,6 +35,7 @@ export interface RemoteConfigSlice {
     };
     areas?: string[];
     tasks?: string[];
+    tags?: string[];
     changedDocs?: string[];
     source?: string;
     compareEnabled?: boolean;

package/dist/adapters/api-client/build-request.js CHANGED Viewed

@@ -167,12 +167,16 @@ function taskToInlineFormat(task) {
     if (task.baseline) {
         inline.baseline = task.baseline;
     }
+    if (task.tags?.length) {
+        inline.tags = task.tags;
+    }
     return inline;
 }
 function buildFilterOptions(config) {
     const areas = config.areas?.length ? config.areas : undefined;
     const taskIds = config.tasks?.length ? config.tasks : undefined;
-    if (!areas && !taskIds)
+    const tags = config.tags?.length ? config.tags : undefined;
+    if (!areas && !taskIds && !tags)
         return undefined;
-    return { areas, taskIds };
+    return { areas, taskIds, tags };
 }

package/dist/adapters/api-client/progress.js CHANGED Viewed

@@ -36,7 +36,7 @@ export function createProgressDisplay() {
                 line = `⏳ [queued] Waiting for runner... (${elapsed})`;
                 break;
             case "running": {
-                if (job.progress) {
+                if (job.progress?.step && job.progress.current && job.progress.total) {
                     const { step, current, total } = job.progress;
                     line = `⏳ [running] Step ${current}/${total}: ${step} (${elapsed})`;
                 }

package/dist/adapters/task-sources/content-lake-task-source.js CHANGED Viewed

@@ -32,6 +32,7 @@ const TASKS_QUERY = /* groq */ `
   && (!defined($areas) || featureArea->areaId.current in $areas)
   && (!defined($taskIds) || id.current in $taskIds)
   && (execution.enabled != false)
+  && (!defined($tags) || count((tags)[@ in $tags]) > 0)
 ] | order(featureArea->areaId.current asc, id.current asc) {
   "taskId": id.current,
   description,
@@ -51,6 +52,7 @@ const TASKS_QUERY = /* groq */ `
   assert,
   rawAssert,
   baseline,
+  tags,
   "referenceSolutionTitle": referenceSolution->title
 }
 `;
@@ -90,6 +92,7 @@ function buildGroqParams(filter) {
         areas: filter?.areas && filter.areas.length > 0
             ? filter.areas.map((a) => a.toLowerCase())
             : null,
+        tags: filter?.tags && filter.tags.length > 0 ? filter.tags : null,
         taskIds: filter?.taskIds && filter.taskIds.length > 0 ? filter.taskIds : null,
     };
 }
@@ -116,6 +119,21 @@ function mapToTaskDefinition(raw) {
         .map(mapCanonicalDocRef)
         .filter((d) => d !== null);
     const assertions = mapAssertions(raw.assert ?? []);
+    // Append raw pass-through assertions (escape hatch for arbitrary Promptfoo
+    // assertion types that aren't in the curated list). These bypass template
+    // resolution and flow directly into the expanded Promptfoo test case as
+    // value-based assertions. In baseline mode, buildBaselineAsserts() with
+    // "abbreviated" (the default) drops non-rubric assertions, so rawAssert
+    // entries only run in the gold variant — consistent with how regular
+    // value-based assertions like `contains` or `regex` behave.
+    const rawAssertions = (raw.rawAssert ?? [])
+        .filter((a) => !!a.type)
+        .map((a) => ({
+        type: a.type,
+        ...(a.value !== undefined ? { value: a.value } : {}),
+        ...(a.threshold !== undefined ? { threshold: a.threshold } : {}),
+    }));
+    const allAssertions = [...assertions, ...rawAssertions];
     const baseline = raw.baseline
         ? {
             ...(raw.baseline.enabled !== undefined
@@ -129,7 +147,7 @@ function mapToTaskDefinition(raw) {
         }
         : undefined;
     return {
-        assertions,
+        assertions: allAssertions,
         canonicalDocs,
         description: raw.description,
         docCoverage: raw.docCoverage ?? false,
@@ -143,6 +161,7 @@ function mapToTaskDefinition(raw) {
         referenceSolution: "",
         taskPrompt: raw.taskPrompt,
         ...(baseline ? { baseline } : {}),
+        ...(raw.tags?.length ? { tags: raw.tags } : {}),
     };
 }
 /**

package/dist/adapters/task-sources/repo-task-source.js CHANGED Viewed

@@ -79,6 +79,12 @@ export class RepoTaskSource {
                 if (entry.execution?.enabled === false) {
                     continue;
                 }
+                // Tag filter — skip tasks that don't match any requested tag
+                if (filter?.tags &&
+                    filter.tags.length > 0 &&
+                    (!entry.tags || !entry.tags.some((t) => filter.tags.includes(t)))) {
+                    continue;
+                }
                 definitions.push(mapToTaskDefinition(entry));
             }
         }
@@ -108,5 +114,6 @@ function mapToTaskDefinition(raw) {
         taskPrompt: typeof task === "string" ? task : "",
         ...(raw.baseline ? { baseline: raw.baseline } : {}),
         ...(extraVars ? { extraVars } : {}),
+        ...(raw.tags?.length ? { tags: raw.tags } : {}),
     };
 }

package/dist/commands/explain-handler.js CHANGED Viewed

@@ -688,6 +688,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
         skipEval: raw.skipEval ?? false,
         skipFetch: raw.skipFetch ?? false,
         source: raw.source,
+        tag: raw.tag ?? [],
         task: raw.task,
         threshold: raw.threshold,
         url: raw.url ?? [],

package/dist/commands/pipeline-action.d.ts CHANGED Viewed

@@ -54,6 +54,7 @@ export interface ResolvedOptions {
     remote: boolean;
     repoTasksPath?: string;
     taskOption?: string;
+    tagOption?: string[];
     taskSourceType?: "content-lake" | "repo" | "yaml";
     urlArgs: string[];
     apiUrl: string;

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -126,6 +126,13 @@ export function computeResolvedOptions(opts) {
     // Scoping
     const areaOption = opts.area ?? process.env.EVAL_FILTER_AREAS ?? undefined;
     const taskOption = opts.task ?? process.env.EVAL_FILTER_TASKS ?? undefined;
+    const tagOption = opts.tag?.length
+        ? opts.tag
+        : process.env.EVAL_FILTER_TAGS
+            ? process.env.EVAL_FILTER_TAGS.split(",")
+                .map((s) => s.trim())
+                .filter(Boolean)
+            : undefined;
     const changedDocsOption = opts.changedDocs ?? process.env.EVAL_CHANGED_DOCS ?? undefined;
     // Document-driven scoping (pure — computes impactSummary without env writes)
     let impactSummary;
@@ -237,6 +244,7 @@ export function computeResolvedOptions(opts) {
             ? resolve(callerCwd, opts.repoTasksPath)
             : undefined,
         taskOption,
+        tagOption,
         taskSourceType: resolveTaskSourceType(opts.taskSource),
         urlArgs,
     };

package/dist/commands/pipeline.d.ts CHANGED Viewed

@@ -56,6 +56,7 @@ export interface PipelineCliOptions {
     remote: boolean;
     repoTasksPath?: string;
     task?: string;
+    tag: string[];
     taskSource?: string;
     threshold?: number;
     url: string[];

package/dist/commands/pipeline.js CHANGED Viewed

@@ -22,6 +22,13 @@ export function createPipelineCommand() {
         .option("--no-auto-scope", "Disable release-aware auto-scoping (evaluate all tasks even when a perspective is set)")
         .option("-a, --area <areas>", "Scope to feature areas (comma-separated)")
         .option("-t, --task <id>", "Scope to specific task ID")
+        .option("--tag <tags>", "Scope to tasks with matching tags (comma-separated, repeatable)", (val, prev) => [
+        ...prev,
+        ...val
+            .split(",")
+            .map((s) => s.trim())
+            .filter(Boolean),
+    ], [])
         .option("--changed-docs <slugs>", "Auto-scope to tasks affected by these document slugs")
         .option("-j, --concurrency <n>", "Max parallel API calls during evaluation", parseInt)
         .option("--grader-replications <n>", "Grader consistency replications", parseInt)

package/dist/commands/publish.js CHANGED Viewed

@@ -24,7 +24,7 @@ import { fileURLToPath } from "url";
 import { Command } from "commander";
 import { createAppContext } from "../composition-root.js";
 import { buildProvenance, } from "../pipeline/provenance.js";
-import { generateReportId } from "../report-store.js";
+import { generateReportId, } from "../report-store.js";
 import { withRetry } from "../sinks/retry.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
@@ -154,9 +154,17 @@ async function runPublishCommand(summaryPath, opts) {
         }
     }
     // Auto-compare against most recent comparable baseline
-    const comparison = opts.dryRun || !store
+    const autoCompareResult = opts.dryRun || !store
         ? null
         : await store.autoCompare(summary, provenance, now);
+    const comparison = autoCompareResult?.comparison ?? null;
+    // Record which report we compared against in lineage
+    if (autoCompareResult) {
+        provenance.lineage = {
+            ...provenance.lineage,
+            comparedAgainst: autoCompareResult.baselineReportId,
+        };
+    }
     const reportId = generateReportId();
     const report = {
         comparison: comparison ?? undefined,

package/dist/orchestration/build-app-context.js CHANGED Viewed

@@ -30,6 +30,7 @@ export function mapToResolvedConfig(opts, rootDir) {
             ?.split(",")
             .map((s) => s.trim())
             .filter(Boolean),
+        tags: opts.tagOption,
         changedDocs: opts.changedDocsOption
             ?.split(",")
             .map((s) => s.trim())

package/dist/orchestration/steps/fetch-docs-step.js CHANGED Viewed

@@ -121,12 +121,13 @@ export class FetchDocsStep {
 // Helpers
 // ---------------------------------------------------------------------------
 function buildFilter(ctx) {
-    const { areas, tasks } = ctx.config;
-    if (!areas && !tasks)
+    const { areas, tasks, tags } = ctx.config;
+    if (!areas && !tasks && !tags)
         return undefined;
     return {
         ...(areas ? { areas } : {}),
         ...(tasks ? { taskIds: tasks } : {}),
+        ...(tags ? { tags } : {}),
     };
 }
 /**

package/dist/orchestration/steps/generate-configs-step.js CHANGED Viewed

@@ -28,8 +28,12 @@ export class GenerateConfigsStep {
         // repo-based, and YAML tasks depending on which adapter is wired.
         let tasks;
         try {
-            const filter = ctx.config.areas || ctx.config.tasks
-                ? { areas: ctx.config.areas, taskIds: ctx.config.tasks }
+            const filter = ctx.config.areas || ctx.config.tasks || ctx.config.tags
+                ? {
+                    areas: ctx.config.areas,
+                    taskIds: ctx.config.tasks,
+                    tags: ctx.config.tags,
+                }
                 : undefined;
             tasks = await ctx.taskSource.loadTasks(filter);
         }
@@ -54,10 +58,11 @@ export class GenerateConfigsStep {
         try {
             generateConfigs({
                 allowedOrigins: ctx.config.allowedOrigins,
-                filter: ctx.config.areas || ctx.config.tasks
+                filter: ctx.config.areas || ctx.config.tasks || ctx.config.tags
                     ? {
                         areas: ctx.config.areas,
                         taskIds: ctx.config.tasks,
+                        tags: ctx.config.tags,
                     }
                     : undefined,
                 resolvedSource,

package/dist/orchestration/steps/publish-report-step.js CHANGED Viewed

@@ -80,10 +80,19 @@ export class PublishReportStep {
         const now = new Date().toISOString();
         const reportId = generateReportId();
         const durationMs = Date.now() - this.pipelineStart;
-        // Auto-compare against most recent comparable baseline
-        const comparison = ctx.reportStore
+        // Auto-compare against most recent comparable baseline.
+        // Returns the comparison + baseline report ID for lineage tracking.
+        const autoCompareResult = ctx.reportStore
             ? (await ctx.reportStore.autoCompare(summary, provenance, now))
             : null;
+        const comparison = autoCompareResult?.comparison ?? null;
+        // Record which report we compared against in lineage
+        if (autoCompareResult) {
+            provenance.lineage = {
+                ...provenance.lineage,
+                comparedAgainst: autoCompareResult.baselineReportId,
+            };
+        }
         const report = {
             comparison: comparison ?? undefined,
             completedAt: now,
@@ -168,6 +177,7 @@ function buildProvenanceInput(summary, ctx, options, autoScope) {
         rootDir: ctx.config.rootDir,
         sanityDocumentIds,
         source,
+        sourceReportId: ctx.config.sourceReportId,
         taskIds,
     };
 }

package/dist/orchestration/steps/run-eval-step.js CHANGED Viewed

@@ -40,10 +40,11 @@ export class RunEvalStep {
         // Precondition: canonical context files exist for filtered tasks.
         // Must apply the same area/task filter as fetch-docs so we only
         // check contexts that were actually fetched.
-        const filter = ctx.config.areas || ctx.config.tasks
+        const filter = ctx.config.areas || ctx.config.tasks || ctx.config.tags
             ? {
                 ...(ctx.config.areas ? { areas: ctx.config.areas } : {}),
                 ...(ctx.config.tasks ? { taskIds: ctx.config.tasks } : {}),
+                ...(ctx.config.tags ? { tags: ctx.config.tags } : {}),
             }
             : undefined;
         let tasks = await ctx.taskSource.loadTasks(filter);
@@ -76,10 +77,11 @@ export class RunEvalStep {
         if (!debug?.enabled) {
             try {
                 evalFingerprint = computeEvalFingerprint({
-                    filter: ctx.config.areas || ctx.config.tasks
+                    filter: ctx.config.areas || ctx.config.tasks || ctx.config.tags
                         ? {
                             areas: ctx.config.areas,
                             taskIds: ctx.config.tasks,
+                            tags: ctx.config.tags,
                         }
                         : undefined,
                     graderModel: "default",

package/dist/pipeline/map-request-to-config.js CHANGED Viewed

@@ -55,6 +55,7 @@ export function mapRequestToConfig(request, rootDir) {
         promptfooUrl: undefined,
         studioOriginOverride: undefined,
         sanityDocumentArgs: undefined,
+        sourceReportId: request.sourceReportId,
         beforeOption: undefined,
         repoTasksPath: undefined,
         callerGit: request.callerGit,

package/dist/pipeline/plan.d.ts CHANGED Viewed

@@ -145,6 +145,7 @@ export interface PlanOptions {
     skipEval: boolean;
     skipFetch: boolean;
     source?: string;
+    tagOption?: string[];
     taskOption?: string;
 }
 /**

package/dist/pipeline/plan.js CHANGED Viewed

@@ -117,7 +117,7 @@ export async function buildPipelinePlan(opts, rootDir) {
         .filter((i) => i.severity === "error")
         .map((i) => `[${i.source}] ${i.message}`);
     // 2. Expand tasks with filters
-    const filter = opts.areaOption || opts.taskOption
+    const filter = opts.areaOption || opts.taskOption || opts.tagOption?.length
         ? {
             areas: opts.areaOption
                 ? opts.areaOption.split(",").map((a) => a.trim())
@@ -125,6 +125,7 @@ export async function buildPipelinePlan(opts, rootDir) {
             taskIds: opts.taskOption
                 ? opts.taskOption.split(",").map((t) => t.trim())
                 : undefined,
+            tags: opts.tagOption,
         }
         : undefined;
     let totalTests = 0;

package/dist/pipeline/provenance.d.ts CHANGED Viewed

@@ -41,6 +41,8 @@ export interface ProvenanceInput {
     promptfooUrls?: PromptfooUrlEntry[];
     /** Path to the package root (for reading models.yaml) */
     rootDir: string;
+    /** Report ID that triggered this re-run (becomes lineage.rerunOf) */
+    sourceReportId?: string;
     /** Sanity document IDs targeted */
     sanityDocumentIds?: string[];
     /** Resolved documentation source */

package/dist/pipeline/provenance.js CHANGED Viewed

@@ -35,6 +35,10 @@ export function buildProvenance(input) {
             sha: input.callerGit.sha ?? "unknown",
         }
         : detectGitMetadata();
+    // Build lineage from explicit relationships
+    const lineage = input.sourceReportId
+        ? { rerunOf: input.sourceReportId }
+        : undefined;
     return {
         areas: input.areas,
         autoScope: input.autoScope,
@@ -42,6 +46,7 @@ export function buildProvenance(input) {
         evalFingerprint: input.evalFingerprint,
         git,
         graderModel: models.grader.id,
+        lineage,
         mode: input.mode,
         models: models.models.map((m) => ({ id: m.id, label: m.label })),
         promptfooUrl: input.promptfooUrl,

package/dist/report-store.d.ts CHANGED Viewed

@@ -16,6 +16,16 @@
  */
 import type { SanityClient } from "@sanity/client";
 import type { ComparisonReport, ISOTimestamp, LineageQuery, Report, ReportId, ReportProvenance, ScoreSummary } from "./pipeline/types.js";
+/**
+ * Result of an auto-comparison, bundling the ComparisonReport with the
+ * baseline report ID so the caller can record lineage (comparedAgainst).
+ */
+export interface AutoCompareResult {
+    /** The report ID of the baseline used for comparison */
+    baselineReportId: ReportId;
+    /** The computed comparison report */
+    comparison: ComparisonReport;
+}
 export interface ReportStoreOptions {
     /** Override the Sanity client (for testing) */
     client?: SanityClient;
@@ -33,9 +43,17 @@ export declare class ReportStore {
      * Auto-compare: find the most recent comparable report and compute
      * a ComparisonReport using the existing compare() primitive.
      *
-     * @returns The comparison report, or null if no baseline found or on error
+     * Baseline selection order:
+     * 1. Explicit lineage — if `provenance.lineage.rerunOf` is set,
+     *    compare against that specific report (deterministic re-run comparison)
+     * 2. Fuzzy matching — most recent report with same mode + source name
+     *
+     * Returns the comparison plus the baseline report ID so the caller
+     * can record `provenance.lineage.comparedAgainst`.
+     *
+     * @returns The comparison result with baseline ID, or null if no baseline found
      */
-    autoCompare(currentSummary: ScoreSummary, provenance: ReportProvenance, completedAt: ISOTimestamp): Promise<ComparisonReport | null>;
+    autoCompare(currentSummary: ScoreSummary, provenance: ReportProvenance, completedAt: ISOTimestamp): Promise<AutoCompareResult | null>;
     /**
      * Find a report by its evaluation fingerprint (cross-environment cache lookup).
      *

package/dist/report-store.js CHANGED Viewed

@@ -41,19 +41,43 @@ export class ReportStore {
      * Auto-compare: find the most recent comparable report and compute
      * a ComparisonReport using the existing compare() primitive.
      *
-     * @returns The comparison report, or null if no baseline found or on error
+     * Baseline selection order:
+     * 1. Explicit lineage — if `provenance.lineage.rerunOf` is set,
+     *    compare against that specific report (deterministic re-run comparison)
+     * 2. Fuzzy matching — most recent report with same mode + source name
+     *
+     * Returns the comparison plus the baseline report ID so the caller
+     * can record `provenance.lineage.comparedAgainst`.
+     *
+     * @returns The comparison result with baseline ID, or null if no baseline found
      */
     async autoCompare(currentSummary, provenance, completedAt) {
-        const baseline = await this.findComparableBaseline({
-            before: completedAt,
-            mode: provenance.mode,
-            source: { name: provenance.source.name },
-        });
+        // 1. Prefer explicit lineage source (deterministic re-run comparison)
+        const rerunSourceId = provenance.lineage?.rerunOf;
+        let baseline = null;
+        if (rerunSourceId) {
+            baseline = await this.read(rerunSourceId);
+            if (baseline) {
+                console.log(`  🔗 Comparing against lineage source: ${rerunSourceId}`);
+            }
+            else {
+                console.warn(`  ⚠️  Lineage source ${rerunSourceId} not found, falling back to fuzzy match`);
+            }
+        }
+        // 2. Fall back to fuzzy matching
+        if (!baseline) {
+            baseline = await this.findComparableBaseline({
+                before: completedAt,
+                mode: provenance.mode,
+                source: { name: provenance.source.name },
+            });
+        }
         if (!baseline) {
             return null;
         }
         try {
-            return compare(baseline.summary, currentSummary);
+            const comparison = compare(baseline.summary, currentSummary);
+            return { baselineReportId: baseline.id, comparison };
         }
         catch (error) {
             console.warn(`  ⚠️  Auto-comparison failed: ${error instanceof Error ? error.message : String(error)}`);

package/dist/webhook/eval-request-handler.d.ts CHANGED Viewed

@@ -68,6 +68,8 @@ export interface EvalRequestPayload {
     requestedAt: string;
     /** User ID who requested */
     requestedBy?: string;
+    /** Report ID that triggered this re-run (if any) */
+    sourceReportId?: string;
     /** Request status */
     status: string;
     /** Publish tag */

package/dist/webhook/eval-request-handler.js CHANGED Viewed

@@ -180,6 +180,9 @@ async function dispatchGitHubEval(repo, payload, config) {
             ...(hasAreas ? { areas: payload.areas } : {}),
             ...(payload.debug ? { debug: true } : {}),
             ...(payload.tag ? { publishTag: payload.tag } : {}),
+            ...(payload.sourceReportId
+                ? { sourceReportId: payload.sourceReportId }
+                : {}),
         },
         event_type: "external-eval",
     };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "0.1.27",
+  "version": "0.1.29",
   "private": false,
   "publishConfig": {
     "access": "restricted"