npm - @sanity/ailf - Versions diffs - 0.1.24 → 0.1.26 - Mend

@sanity/ailf 0.1.24 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/dist/_vendor/ailf-core/ports/context.d.ts +2 -0
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
package/dist/_vendor/ailf-core/schemas/eval-config.js +2 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
package/dist/_vendor/ailf-core/types/index.d.ts +45 -0
package/dist/adapters/config-sources/file-config-adapter.js +1 -0
package/dist/commands/calculate-scores.js +1 -0
package/dist/commands/explain-handler.js +1 -0
package/dist/commands/fetch-docs.js +1 -0
package/dist/commands/generate-configs.js +1 -0
package/dist/commands/pipeline-action.d.ts +1 -0
package/dist/commands/pipeline-action.js +1 -0
package/dist/commands/pipeline.d.ts +1 -0
package/dist/commands/pipeline.js +1 -0
package/dist/commands/pr-comment.js +1 -0
package/dist/commands/publish.js +1 -0
package/dist/orchestration/build-app-context.js +1 -0
package/dist/orchestration/steps/fetch-docs-step.d.ts +7 -2
package/dist/orchestration/steps/fetch-docs-step.js +130 -1
package/dist/orchestration/steps/generate-configs-step.d.ts +2 -2
package/dist/orchestration/steps/generate-configs-step.js +12 -1
package/dist/orchestration/steps/publish-report-step.js +17 -2
package/dist/orchestration/steps/run-eval-step.js +6 -1
package/dist/pipeline/map-request-to-config.js +1 -0
package/dist/pipeline/provenance.d.ts +3 -1
package/dist/pipeline/provenance.js +1 -0
package/dist/webhook/eval-request-handler.d.ts +37 -10
package/dist/webhook/eval-request-handler.js +97 -62
package/package.json +1 -1

package/dist/_vendor/ailf-core/ports/context.d.ts CHANGED Viewed

@@ -89,6 +89,8 @@ export interface ResolvedConfig {
     studioOriginOverride?: string;
     /** Sanity document filter args */
     sanityDocumentArgs?: string[];
+    /** Disable release-aware auto-scoping (evaluate all tasks even when perspective is set) */
+    noAutoScope: boolean;
     /** Before option for comparison */
     beforeOption?: string;
     /** Task source adapter selection */

package/dist/_vendor/ailf-core/schemas/eval-config.d.ts CHANGED Viewed

@@ -34,6 +34,7 @@ export declare const EvalConfigSchema: z.ZodObject<{
         full: "full";
         observed: "observed";
     }>>;
+    noAutoScope: z.ZodOptional<z.ZodBoolean>;
     noCache: z.ZodOptional<z.ZodBoolean>;
     noRemoteCache: z.ZodOptional<z.ZodBoolean>;
     publish: z.ZodOptional<z.ZodBoolean>;

package/dist/_vendor/ailf-core/schemas/eval-config.js CHANGED Viewed

@@ -48,6 +48,8 @@ export const EvalConfigSchema = z
     headers: z.record(z.string(), z.string()).optional(),
     /** Evaluation mode */
     mode: z.enum(["baseline", "agentic", "observed", "full"]).optional(),
+    /** Disable release-aware auto-scoping */
+    noAutoScope: z.boolean().optional(),
     /** Disable local cache */
     noCache: z.boolean().optional(),
     /** Disable remote cache */

package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts CHANGED Viewed

@@ -54,6 +54,7 @@ export declare const PipelineRequestSchema: z.ZodObject<{
         full: "full";
         observed: "observed";
     }>>;
+    noAutoScope: z.ZodOptional<z.ZodBoolean>;
     noCache: z.ZodOptional<z.ZodBoolean>;
     noRemoteCache: z.ZodOptional<z.ZodBoolean>;
     perspective: z.ZodOptional<z.ZodString>;

package/dist/_vendor/ailf-core/schemas/pipeline-request.js CHANGED Viewed

@@ -70,6 +70,7 @@ export const PipelineRequestSchema = z.object({
     inlineTasks: z.array(z.record(z.string(), z.unknown())).optional(),
     jobId: z.string().optional(),
     mode: z.enum(["baseline", "agentic", "observed", "full"]).optional(),
+    noAutoScope: z.boolean().optional(),
     noCache: z.boolean().optional(),
     noRemoteCache: z.boolean().optional(),
     perspective: z.string().optional(),

package/dist/_vendor/ailf-core/types/index.d.ts CHANGED Viewed

@@ -438,6 +438,32 @@ export interface PipelineState {
     evalFingerprint?: string;
     /** Promptfoo share URLs produced by RunEvalStep, consumed by PublishReportStep */
     promptfooUrls?: PromptfooUrlEntry[];
+    /**
+     * Release auto-scope metadata. Set by FetchDocsStep when a perspective
+     * is active and release impact identifies affected documents.
+     * Consumed by GenerateConfigsStep and RunEvalStep to narrow scope.
+     */
+    releaseAutoScope?: ReleaseAutoScope;
+}
+/**
+ * Release auto-scope metadata — which tasks are affected by a content
+ * release's document changes. Produced by FetchDocsStep, consumed by
+ * GenerateConfigsStep and RunEvalStep to narrow the evaluation scope.
+ */
+export interface ReleaseAutoScope {
+    /** Task IDs whose canonical docs are affected by the release */
+    affectedTaskIds: string[];
+    /** Task IDs that were skipped (no affected docs) */
+    skippedTaskIds: string[];
+    /** The perspective ID that triggered scoping */
+    perspective: string;
+    /** The release impact data (doc slugs by change type) */
+    impact: {
+        added: string[];
+        modified: string[];
+        removed: string[];
+        unchanged: string[];
+    };
 }
 /** Result of a full pipeline run */
 export interface PipelineResult {
@@ -983,10 +1009,29 @@ export interface Report {
 export type ReportId = string & {
     readonly __brand: "ReportId";
 };
+/** Auto-scope metadata recorded in report provenance */
+export interface ReportAutoScope {
+    /** Whether auto-scoping was active for this evaluation */
+    enabled: boolean;
+    /** Task IDs whose canonical docs are affected by the release */
+    affectedTaskIds: string[];
+    /** Task IDs that were skipped (no affected docs) */
+    skippedTaskIds: string[];
+    /** The perspective ID that triggered scoping */
+    perspective: string;
+    /** Summary of release impact (counts, not full slug lists) */
+    impactSummary: {
+        added: number;
+        modified: number;
+        removed: number;
+    };
+}
 /** Full provenance metadata for an evaluation report */
 export interface ReportProvenance {
     /** Which feature areas were evaluated */
     areas: string[];
+    /** Release auto-scope metadata (when perspective evaluation was scoped to affected tasks) */
+    autoScope?: ReportAutoScope;
     /** Content hash of the documentation context at eval time */
     contextHash?: string;
     /**

package/dist/adapters/config-sources/file-config-adapter.js CHANGED Viewed

@@ -69,6 +69,7 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
     return {
         rootDir,
         mode: config.mode ?? "full",
+        noAutoScope: config.noAutoScope ?? false,
         debug,
         areas: config.areas,
         tasks: config.tasks,

package/dist/commands/calculate-scores.js CHANGED Viewed

@@ -21,6 +21,7 @@ export function createCalculateScoresCommand() {
             const ctx = createAppContext({
                 rootDir: ROOT,
                 mode: "baseline",
+                noAutoScope: false,
                 skipFetch: true,
                 skipEval: true,
                 compareEnabled: false,

package/dist/commands/explain-handler.js CHANGED Viewed

@@ -653,6 +653,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
         allowedOrigin: raw.allowedOrigin ?? [],
         allowedOrigins: raw.allowedOrigins ?? [],
         area: raw.area,
+        autoScope: raw.autoScope ?? true,
         before: raw.before,
         cache: raw.cache ?? true,
         changedDocs: raw.changedDocs,

package/dist/commands/fetch-docs.js CHANGED Viewed

@@ -42,6 +42,7 @@ async function executeFetchDocs(opts) {
     const ctx = createAppContext({
         rootDir: ROOT,
         mode: "baseline",
+        noAutoScope: false,
         skipFetch: false,
         skipEval: true,
         compareEnabled: false,

package/dist/commands/generate-configs.js CHANGED Viewed

@@ -20,6 +20,7 @@ export function createGenerateConfigsCommand() {
             const ctx = createAppContext({
                 rootDir: ROOT,
                 mode: "baseline",
+                noAutoScope: false,
                 skipFetch: true,
                 skipEval: true,
                 compareEnabled: false,

package/dist/commands/pipeline-action.d.ts CHANGED Viewed

@@ -31,6 +31,7 @@ export interface ResolvedOptions {
     headerArgs: string[];
     impactSummary?: ImpactSummary;
     mode: EvalMode;
+    noAutoScope: boolean;
     noCache: boolean;
     noRemoteCache: boolean;
     outputPath?: string;

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -213,6 +213,7 @@ export function computeResolvedOptions(opts) {
         headerArgs,
         impactSummary,
         mode,
+        noAutoScope: opts.autoScope === false,
         noCache: !opts.cache,
         noRemoteCache: opts.remoteCache === false,
         outputPath: opts.output,

package/dist/commands/pipeline.d.ts CHANGED Viewed

@@ -16,6 +16,7 @@ export interface PipelineCliOptions {
     allowedOrigin: string[];
     allowedOrigins: string[];
     area?: string;
+    autoScope: boolean;
     before?: string;
     cache: boolean;
     changedDocs?: string;

package/dist/commands/pipeline.js CHANGED Viewed

@@ -19,6 +19,7 @@ export function createPipelineCommand() {
         .option("--skip-eval", "Recalculate from existing eval results", false)
         .option("--no-cache", "Bypass all pipeline-level caching")
         .option("--no-remote-cache", "Disable Content Lake cache lookup (local cache still active)")
+        .option("--no-auto-scope", "Disable release-aware auto-scoping (evaluate all tasks even when a perspective is set)")
         .option("-a, --area <areas>", "Scope to feature areas (comma-separated)")
         .option("-t, --task <id>", "Scope to specific task ID")
         .option("--changed-docs <slugs>", "Auto-scope to tasks affected by these document slugs")

package/dist/commands/pr-comment.js CHANGED Viewed

@@ -21,6 +21,7 @@ export function createPrCommentCommand() {
             const ctx = createAppContext({
                 rootDir: ROOT,
                 mode: "baseline",
+                noAutoScope: false,
                 skipFetch: true,
                 skipEval: true,
                 compareEnabled: false,

package/dist/commands/publish.js CHANGED Viewed

@@ -83,6 +83,7 @@ async function runPublishCommand(summaryPath, opts) {
         discoveryReportEnabled: false,
         gapAnalysisEnabled: false,
         mode: "baseline",
+        noAutoScope: false,
         noCache: true,
         noRemoteCache: true,
         publishEnabled: true,

package/dist/orchestration/build-app-context.js CHANGED Viewed

@@ -20,6 +20,7 @@ export function mapToResolvedConfig(opts, rootDir) {
     return {
         rootDir,
         mode: opts.mode,
+        noAutoScope: opts.noAutoScope ?? false,
         debug: opts.debug,
         areas: opts.areaOption
             ?.split(",")

package/dist/orchestration/steps/fetch-docs-step.d.ts CHANGED Viewed

@@ -4,11 +4,16 @@
  * Uses ctx.docFetcher (the DocFetcher port) for all fetching. The adapter
  * handles GROQ queries, perspective diffing, document overlays, and URL
  * fetching. This step orchestrates the call and writes metadata files.
+ *
+ * When a source-level perspective is active and release impact data is
+ * available, this step computes which tasks are affected by the release
+ * and stores a `releaseAutoScope` entry in PipelineState. Downstream
+ * steps (GenerateConfigsStep, RunEvalStep) use this to narrow scope.
  */
-import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
+import { type AppContext, type PipelineState, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
 export declare class FetchDocsStep implements PipelineStep {
     readonly name = "fetch-docs";
     check(): ValidationIssue[];
-    execute(ctx: AppContext): Promise<StepResult>;
+    execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
     cacheInputs(ctx: AppContext): string[];
 }

package/dist/orchestration/steps/fetch-docs-step.js CHANGED Viewed

@@ -4,9 +4,15 @@
  * Uses ctx.docFetcher (the DocFetcher port) for all fetching. The adapter
  * handles GROQ queries, perspective diffing, document overlays, and URL
  * fetching. This step orchestrates the call and writes metadata files.
+ *
+ * When a source-level perspective is active and release impact data is
+ * available, this step computes which tasks are affected by the release
+ * and stores a `releaseAutoScope` entry in PipelineState. Downstream
+ * steps (GenerateConfigsStep, RunEvalStep) use this to narrow scope.
  */
 import { mkdirSync, writeFileSync } from "fs";
 import { join } from "path";
+import { isIdRef, isPathRef, isSlugRef, } from "../../_vendor/ailf-core/index.js";
 import { getStepInputPaths } from "../../pipeline/cache.js";
 import { checkCanonicalContextsExist } from "../../pipeline/checks.js";
 import { loadSource } from "../../sources.js";
@@ -16,7 +22,7 @@ export class FetchDocsStep {
     check() {
         return [];
     }
-    async execute(ctx) {
+    async execute(ctx, state) {
         if (ctx.config.skipFetch) {
             return { status: "skipped", reason: "--skip-fetch" };
         }
@@ -55,8 +61,11 @@ export class FetchDocsStep {
             };
         }
         // Execute the fetch via the DocFetcher port
+        let releaseImpact;
         try {
             const result = await ctx.docFetcher.fetch(tasksWithDocs, resolvedSource);
+            // Capture release impact for auto-scoping
+            releaseImpact = result.metadata?.releaseImpact;
             // Write metadata files for downstream pipeline consumption
             if (result.metadata) {
                 writeMetadataFiles(ctx.config.rootDir, result.metadata);
@@ -80,6 +89,23 @@ export class FetchDocsStep {
                 status: "failed",
             };
         }
+        // -----------------------------------------------------------------
+        // Release auto-scope: compute which tasks are affected by the
+        // content release. This only activates when:
+        //   1. A source-level perspective is active
+        //   2. Release impact data was computed (docs were fetched)
+        //   3. Auto-scoping was not explicitly disabled (--no-auto-scope)
+        //   4. No explicit area/task filters are set (those take precedence)
+        // -----------------------------------------------------------------
+        if (resolvedSource.perspective &&
+            releaseImpact &&
+            !ctx.config.noAutoScope) {
+            const autoScope = computeAutoScope(tasks, releaseImpact, resolvedSource.perspective);
+            if (autoScope) {
+                state.releaseAutoScope = autoScope;
+                logAutoScope(autoScope);
+            }
+        }
         const durationMs = Date.now() - start;
         return {
             durationMs,
@@ -133,3 +159,106 @@ function writeMetadataFiles(rootDir, metadata) {
         console.log("  📄 URL fetch metadata written to contexts/url-fetch.json");
     }
 }
+// ---------------------------------------------------------------------------
+// Release auto-scope computation
+// ---------------------------------------------------------------------------
+/**
+ * Extract display slugs from a task's canonical doc refs.
+ *
+ * Uses slug, path (last segment), or id-based slug annotation.
+ * Perspective refs are excluded — they resolve at fetch time, not here.
+ */
+function extractSlugsFromTask(task) {
+    const slugs = [];
+    for (const ref of task.canonicalDocs) {
+        if (isSlugRef(ref)) {
+            slugs.push(ref.slug);
+        }
+        else if (isPathRef(ref)) {
+            // Path refs use the last segment as slug (e.g., "groq/groq-introduction" → "groq-introduction")
+            const segments = ref.path.split("/");
+            slugs.push(segments[segments.length - 1]);
+        }
+        else if (isIdRef(ref) && ref.slug) {
+            // IdDocRef may carry an optional slug annotation for display
+            slugs.push(ref.slug);
+        }
+        // PerspectiveDocRef — one-to-many, slugs unknown at this point; skip
+    }
+    return slugs;
+}
+/**
+ * Compute which tasks are affected by a content release.
+ *
+ * Builds a reverse mapping (slug → task IDs) from all tasks' canonical
+ * doc refs, then intersects with the release impact's added/modified slugs.
+ *
+ * Returns null if auto-scoping should not apply (e.g., all tasks affected).
+ */
+function computeAutoScope(tasks, releaseImpact, perspective) {
+    // Build reverse mapping: slug → task IDs
+    const slugToTaskIds = new Map();
+    for (const task of tasks) {
+        for (const slug of extractSlugsFromTask(task)) {
+            let taskIds = slugToTaskIds.get(slug);
+            if (!taskIds) {
+                taskIds = new Set();
+                slugToTaskIds.set(slug, taskIds);
+            }
+            taskIds.add(task.id);
+        }
+    }
+    // Compute affected slugs (added + modified — these have changed content)
+    const affectedSlugs = new Set([
+        ...releaseImpact.added,
+        ...releaseImpact.modified,
+    ]);
+    // Find tasks affected by the release
+    const affectedTaskIds = new Set();
+    for (const slug of affectedSlugs) {
+        const taskIds = slugToTaskIds.get(slug);
+        if (taskIds) {
+            for (const id of taskIds) {
+                affectedTaskIds.add(id);
+            }
+        }
+    }
+    // If all tasks are affected, auto-scoping adds no value
+    if (affectedTaskIds.size >= tasks.length)
+        return null;
+    // If no tasks are affected, still return the scope info so downstream
+    // steps know nothing needs evaluation (avoids wasting eval budget)
+    const allTaskIds = new Set(tasks.map((t) => t.id));
+    const skippedTaskIds = [...allTaskIds].filter((id) => !affectedTaskIds.has(id));
+    return {
+        affectedTaskIds: [...affectedTaskIds],
+        skippedTaskIds,
+        perspective,
+        impact: {
+            added: releaseImpact.added,
+            modified: releaseImpact.modified,
+            removed: releaseImpact.removed,
+            unchanged: releaseImpact.unchanged,
+        },
+    };
+}
+/**
+ * Log auto-scope results to the console.
+ */
+function logAutoScope(autoScope) {
+    const { affectedTaskIds, skippedTaskIds, impact } = autoScope;
+    const totalTasks = affectedTaskIds.length + skippedTaskIds.length;
+    const affectedDocs = impact.added.length + impact.modified.length;
+    console.log(`  🎯 Release auto-scope: ${affectedTaskIds.length} of ${totalTasks} tasks affected`);
+    if (affectedDocs > 0) {
+        console.log(`     ${affectedDocs} doc(s) changed → ${affectedTaskIds.length} task(s) to evaluate`);
+    }
+    if (affectedTaskIds.length > 0) {
+        for (const id of affectedTaskIds) {
+            console.log(`     ✓ ${id}`);
+        }
+    }
+    if (skippedTaskIds.length > 0) {
+        console.log(`     ⏭ ${skippedTaskIds.length} task(s) skipped (no docs affected by release)`);
+    }
+}

package/dist/orchestration/steps/generate-configs-step.d.ts CHANGED Viewed

@@ -5,10 +5,10 @@
  * derived from AppContext. No env bridge needed — source is resolved and
  * passed directly.
  */
-import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
+import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
 export declare class GenerateConfigsStep implements PipelineStep {
     readonly name = "generate-configs";
     check(ctx: AppContext): ValidationIssue[];
-    execute(ctx: AppContext): Promise<StepResult>;
+    execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
     cacheInputs(ctx: AppContext): string[];
 }

package/dist/orchestration/steps/generate-configs-step.js CHANGED Viewed

@@ -17,7 +17,7 @@ export class GenerateConfigsStep {
         const issues = validateModelsYaml(ctx.config.rootDir);
         return issues.filter((i) => i.severity === "error");
     }
-    async execute(ctx) {
+    async execute(ctx, state) {
         const start = Date.now();
         // Resolve source once with typed overrides
         const overrides = configToSourceOverrides(ctx.config);
@@ -40,6 +40,17 @@ export class GenerateConfigsStep {
                 status: "failed",
             };
         }
+        // Release auto-scope: narrow tasks to those affected by the release.
+        // When explicit area/task filters are also active, this produces the
+        // intersection (only tasks matching BOTH the explicit filter AND the
+        // release impact are included).
+        if (state.releaseAutoScope && !ctx.config.noAutoScope) {
+            const scopedIds = new Set(state.releaseAutoScope.affectedTaskIds);
+            const beforeCount = tasks.length;
+            tasks = tasks.filter((t) => scopedIds.has(t.id));
+            console.log(`  🎯 Auto-scoped to ${tasks.length} of ${beforeCount} task(s) affected by release` +
+                ` (${beforeCount - tasks.length} skipped, --no-auto-scope to override)`);
+        }
         try {
             generateConfigs({
                 allowedOrigins: ctx.config.allowedOrigins,

package/dist/orchestration/steps/publish-report-step.js CHANGED Viewed

@@ -60,7 +60,21 @@ export class PublishReportStep {
             evalFingerprint: state.evalFingerprint ?? this.options.evalFingerprint,
             promptfooUrls: state.promptfooUrls ?? this.options.promptfooUrls,
         };
-        const provenanceInput = buildProvenanceInput(summary, ctx, provenanceOptions);
+        // Build auto-scope provenance from pipeline state
+        const autoScope = state.releaseAutoScope
+            ? {
+                enabled: true,
+                affectedTaskIds: state.releaseAutoScope.affectedTaskIds,
+                skippedTaskIds: state.releaseAutoScope.skippedTaskIds,
+                perspective: state.releaseAutoScope.perspective,
+                impactSummary: {
+                    added: state.releaseAutoScope.impact.added.length,
+                    modified: state.releaseAutoScope.impact.modified.length,
+                    removed: state.releaseAutoScope.impact.removed.length,
+                },
+            }
+            : undefined;
+        const provenanceInput = buildProvenanceInput(summary, ctx, provenanceOptions, autoScope);
         const provenance = buildProvenance(provenanceInput);
         // Create report
         const now = new Date().toISOString();
@@ -118,7 +132,7 @@ export class PublishReportStep {
 /**
  * Assemble provenance input from the score summary and pipeline context.
  */
-function buildProvenanceInput(summary, ctx, options) {
+function buildProvenanceInput(summary, ctx, options, autoScope) {
     const areas = summary.scores.map((s) => s.feature);
     const mode = ctx.config.mode;
     // Read document IDs from config
@@ -146,6 +160,7 @@ function buildProvenanceInput(summary, ctx, options) {
         : undefined;
     return {
         areas,
+        autoScope,
         callerGit: ctx.config.callerGit,
         evalFingerprint,
         mode,

package/dist/orchestration/steps/run-eval-step.js CHANGED Viewed

@@ -46,7 +46,12 @@ export class RunEvalStep {
                 ...(ctx.config.tasks ? { taskIds: ctx.config.tasks } : {}),
             }
             : undefined;
-        const tasks = await ctx.taskSource.loadTasks(filter);
+        let tasks = await ctx.taskSource.loadTasks(filter);
+        // Release auto-scope: narrow to affected tasks (mirrors GenerateConfigsStep)
+        if (state.releaseAutoScope && !ctx.config.noAutoScope) {
+            const scopedIds = new Set(state.releaseAutoScope.affectedTaskIds);
+            tasks = tasks.filter((t) => scopedIds.has(t.id));
+        }
         // Only check context files for tasks that have canonical docs.
         // Tasks without canonical docs are skipped by FetchDocsStep (they
         // have no docs to fetch), so no context file is written for them.

package/dist/pipeline/map-request-to-config.js CHANGED Viewed

@@ -38,6 +38,7 @@ export function mapRequestToConfig(request, rootDir) {
         discoveryReportEnabled: request.discoveryReport ?? false,
         publishEnabled: request.publish ?? publishDefault,
         publishTag: request.publishTag,
+        noAutoScope: request.noAutoScope ?? false,
         noCache: request.noCache ?? false,
         noRemoteCache: request.noRemoteCache ?? false,
         graderReplications: request.graderReplications,

package/dist/pipeline/provenance.d.ts CHANGED Viewed

@@ -12,10 +12,12 @@
  * @see docs/design-docs/report-store/architecture.md — Provenance collection
  */
 import type { ResolvedSourceConfig } from "../sources.js";
-import type { EvalMode, PromptfooUrlEntry, ReportProvenance } from "./types.js";
+import type { EvalMode, PromptfooUrlEntry, ReportAutoScope, ReportProvenance } from "./types.js";
 export interface ProvenanceInput {
     /** Feature areas that were evaluated */
     areas: string[];
+    /** Release auto-scope metadata (when perspective evaluation was scoped) */
+    autoScope?: ReportAutoScope;
     /**
      * Git metadata from the *calling* repository (cross-repo evaluations).
      * When provided, overrides CI env var detection so provenance attributes

package/dist/pipeline/provenance.js CHANGED Viewed

@@ -37,6 +37,7 @@ export function buildProvenance(input) {
         : detectGitMetadata();
     return {
         areas: input.areas,
+        autoScope: input.autoScope,
         contextHash: input.contextHash,
         evalFingerprint: input.evalFingerprint,
         git,

package/dist/webhook/eval-request-handler.d.ts CHANGED Viewed

@@ -3,23 +3,44 @@
  *
  * Server-side handler for `ailf.evalRequest` documents from the Sanity
  * Content Lake. This is the counterpart to the Studio's "Request Evaluation"
- * action — when a content editor creates an eval request document via the
- * Studio UI, a Sanity webhook fires and calls this handler.
+ * and "Run Task Eval" actions — when a content editor creates an eval
+ * request document via the Studio UI, a Sanity webhook fires and calls
+ * this handler.
  *
  * Designed to run in any HTTP environment: Cloudflare Workers, Vercel
  * functions, Express, Hono, etc.
  *
+ * Supports two scoping modes:
+ * - **Release-scoped** — requires `perspective` field
+ * - **Task-scoped** — requires `tasks` array (optionally with `areas`)
+ *
+ * At least one of `perspective` or `tasks` must be present.
+ *
  * Flow:
  * 1. Receive eval request payload (from Sanity webhook projection)
  * 2. Validate: must be `ailf.evalRequest` type, `pending` status,
- *    non-empty `perspective`
- * 3. Dispatch a full evaluation to GitHub Actions via `repository_dispatch`
- *    with `external-eval` event type and release-scoped client payload
+ *    with either `perspective` or `tasks`
+ * 3. Dispatch evaluation to GitHub Actions via `repository_dispatch`
+ *    with `external-eval` event type and scoped client payload
  * 4. On success: PATCH the eval request document → `status: "dispatched"`
  * 5. On failure: PATCH the eval request document → `status: "failed"` + error
  * 6. Return a structured result
  *
- * @see packages/studio/src/actions/dispatch.ts — Studio-side dispatch
+ * ## Sanity Manage Webhook Configuration
+ *
+ * The Sanity webhook projection MUST include all fields consumed by
+ * EvalRequestPayload. The recommended projection is `{...}` (spread),
+ * which passes the entire document and avoids silently dropping fields
+ * when the schema evolves.
+ *
+ * Recommended projection:
+ * ```
+ * {...}
+ * ```
+ *
+ * @see packages/api/src/routes/webhooks.ts — API gateway webhook handler
+ * @see packages/studio/src/actions/RunEvaluationAction.tsx — release eval
+ * @see packages/studio/src/actions/RunTaskEvaluationAction.tsx — task eval
  * @see .github/workflows/external-eval.yml — receiving workflow
  * @see docs/design-docs/report-store/visibility-workflows.md
  */
@@ -29,24 +50,30 @@ export interface EvalRequestPayload {
     _id: string;
     /** The Sanity document _type (should be "ailf.evalRequest") */
     _type: string;
+    /** Feature areas to scope the evaluation (task-scoped evals) */
+    areas?: string[];
     /** Sanity dataset */
     dataset: string;
+    /** Run in debug mode */
+    debug?: boolean;
     /** Error message (only if status is "failed") */
     error?: string;
     /** Evaluation mode */
     mode: string;
-    /** Content release perspective ID */
-    perspective: string;
+    /** Content release perspective ID (release-scoped evals) */
+    perspective?: string;
     /** Sanity project ID */
     projectId: string;
     /** ISO datetime of when the request was created */
     requestedAt: string;
     /** User ID who requested */
     requestedBy?: string;
-    /** Publish tag */
-    tag?: string;
     /** Request status */
     status: string;
+    /** Publish tag */
+    tag?: string;
+    /** Specific task IDs to evaluate (task-scoped evals) */
+    tasks?: string[];
 }
 /** Configuration for the eval request handler. */
 export interface EvalRequestHandlerConfig {

package/dist/webhook/eval-request-handler.js CHANGED Viewed

@@ -3,23 +3,44 @@
  *
  * Server-side handler for `ailf.evalRequest` documents from the Sanity
  * Content Lake. This is the counterpart to the Studio's "Request Evaluation"
- * action — when a content editor creates an eval request document via the
- * Studio UI, a Sanity webhook fires and calls this handler.
+ * and "Run Task Eval" actions — when a content editor creates an eval
+ * request document via the Studio UI, a Sanity webhook fires and calls
+ * this handler.
  *
  * Designed to run in any HTTP environment: Cloudflare Workers, Vercel
  * functions, Express, Hono, etc.
  *
+ * Supports two scoping modes:
+ * - **Release-scoped** — requires `perspective` field
+ * - **Task-scoped** — requires `tasks` array (optionally with `areas`)
+ *
+ * At least one of `perspective` or `tasks` must be present.
+ *
  * Flow:
  * 1. Receive eval request payload (from Sanity webhook projection)
  * 2. Validate: must be `ailf.evalRequest` type, `pending` status,
- *    non-empty `perspective`
- * 3. Dispatch a full evaluation to GitHub Actions via `repository_dispatch`
- *    with `external-eval` event type and release-scoped client payload
+ *    with either `perspective` or `tasks`
+ * 3. Dispatch evaluation to GitHub Actions via `repository_dispatch`
+ *    with `external-eval` event type and scoped client payload
  * 4. On success: PATCH the eval request document → `status: "dispatched"`
  * 5. On failure: PATCH the eval request document → `status: "failed"` + error
  * 6. Return a structured result
  *
- * @see packages/studio/src/actions/dispatch.ts — Studio-side dispatch
+ * ## Sanity Manage Webhook Configuration
+ *
+ * The Sanity webhook projection MUST include all fields consumed by
+ * EvalRequestPayload. The recommended projection is `{...}` (spread),
+ * which passes the entire document and avoids silently dropping fields
+ * when the schema evolves.
+ *
+ * Recommended projection:
+ * ```
+ * {...}
+ * ```
+ *
+ * @see packages/api/src/routes/webhooks.ts — API gateway webhook handler
+ * @see packages/studio/src/actions/RunEvaluationAction.tsx — release eval
+ * @see packages/studio/src/actions/RunTaskEvaluationAction.tsx — task eval
  * @see .github/workflows/external-eval.yml — receiving workflow
  * @see docs/design-docs/report-store/visibility-workflows.md
  */
@@ -53,98 +74,112 @@ const GITHUB_API = "https://api.github.com";
 export async function handleEvalRequest(payload, config) {
     const requestId = payload._id ?? "unknown";
     // -------------------------------------------------------------------------
-    // 1. Validate payload
+    // 1. Create Sanity client early so validation failures can mark the
+    //    document as "failed" instead of leaving it stuck at "pending".
+    // -------------------------------------------------------------------------
+    const client = payload.projectId && payload.dataset
+        ? createClient({
+            apiVersion: "2026-03-11",
+            dataset: payload.dataset,
+            projectId: payload.projectId,
+            token: config.sanityToken,
+            useCdn: false,
+        })
+        : null;
+    // Helper: mark the eval request as failed in the Content Lake so the
+    // Studio UI can show the error instead of polling forever.
+    async function markFailed(errorMessage) {
+        if (client && payload._id) {
+            try {
+                await client
+                    .patch(payload._id)
+                    .set({ error: errorMessage, status: "failed" })
+                    .commit();
+            }
+            catch (err) {
+                console.warn(`  ⚠️  Failed to update document with error status: ${err instanceof Error ? err.message : String(err)}`);
+            }
+        }
+        return { error: errorMessage, ok: false, requestId };
+    }
+    // -------------------------------------------------------------------------
+    // 2. Validate payload
     // -------------------------------------------------------------------------
     if (payload._type !== "ailf.evalRequest") {
-        return {
-            error: `Unexpected document type: "${payload._type}" (expected "ailf.evalRequest")`,
-            ok: false,
-            requestId,
-        };
+        return markFailed(`Unexpected document type: "${payload._type}" (expected "ailf.evalRequest")`);
     }
     if (payload.status !== "pending") {
+        // Don't mark as failed — it's already in a non-pending state
         return {
             error: `Eval request is not pending (status: "${payload.status}")`,
             ok: false,
             requestId,
         };
     }
-    if (!payload.perspective) {
-        return {
-            error: "Missing required field: perspective",
-            ok: false,
-            requestId,
-        };
+    const hasPerspective = !!payload.perspective;
+    const hasTasks = Array.isArray(payload.tasks) && payload.tasks.length > 0;
+    if (!hasPerspective && !hasTasks) {
+        return markFailed("Missing required field: perspective or tasks. " +
+            "Provide a content release perspective for release evals, " +
+            "or a tasks array for task-scoped evals.");
     }
     // -------------------------------------------------------------------------
-    // 2. Dispatch evaluation via GitHub Actions
+    // 3. Dispatch evaluation via GitHub Actions
     // -------------------------------------------------------------------------
     const repo = config.githubRepo ?? DEFAULT_REPO;
     const dispatchResult = await dispatchGitHubEval(repo, payload, config);
     // -------------------------------------------------------------------------
-    // 3. Update eval request document status
+    // 4. Update eval request document status
     // -------------------------------------------------------------------------
-    const client = createClient({
-        apiVersion: "2026-03-11",
-        dataset: payload.dataset,
-        projectId: payload.projectId,
-        token: config.sanityToken,
-        useCdn: false,
-    });
     if (dispatchResult.ok) {
-        try {
-            await client
-                .patch(payload._id)
-                .set({
-                dispatchedAt: new Date().toISOString(),
-                status: "dispatched",
-            })
-                .commit();
-        }
-        catch (err) {
-            // Dispatch succeeded but status update failed — log and still return ok
-            console.warn(`  ⚠️  Dispatch succeeded but failed to update document status: ${err instanceof Error ? err.message : String(err)}`);
+        if (client) {
+            try {
+                await client
+                    .patch(payload._id)
+                    .set({
+                    dispatchedAt: new Date().toISOString(),
+                    status: "dispatched",
+                })
+                    .commit();
+            }
+            catch (err) {
+                // Dispatch succeeded but status update failed — log and still return ok
+                console.warn(`  ⚠️  Dispatch succeeded but failed to update document status: ${err instanceof Error ? err.message : String(err)}`);
+            }
         }
         return { ok: true, requestId };
     }
     // Dispatch failed — mark the document as failed
-    try {
-        await client
-            .patch(payload._id)
-            .set({
-            error: dispatchResult.error ?? "Unknown dispatch error",
-            status: "failed",
-        })
-            .commit();
-    }
-    catch (err) {
-        console.warn(`  ⚠️  Failed to update document with error status: ${err instanceof Error ? err.message : String(err)}`);
-    }
-    return {
-        error: dispatchResult.error,
-        ok: false,
-        requestId,
-    };
+    return markFailed(dispatchResult.error ?? "Unknown dispatch error");
 }
 /**
- * Dispatch a release-scoped evaluation via GitHub Actions repository_dispatch.
+ * Dispatch an evaluation via GitHub Actions repository_dispatch.
  *
- * Uses the `external-eval` event type with a client_payload conforming to
- * PipelineRequestSchema. The workflow passes it directly to the CLI via
- * `--config` without field translation.
+ * Supports both release-scoped (perspective) and task-scoped (tasks/areas)
+ * evaluations. Uses the `external-eval` event type with a client_payload
+ * conforming to PipelineRequestSchema. The workflow passes it directly to
+ * the CLI via `--config` without field translation.
  */
 async function dispatchGitHubEval(repo, payload, config) {
     const url = `${GITHUB_API}/repos/${repo}/dispatches`;
+    const hasPerspective = !!payload.perspective;
+    const hasTasks = Array.isArray(payload.tasks) && payload.tasks.length > 0;
+    const hasAreas = Array.isArray(payload.areas) && payload.areas.length > 0;
     const body = {
         client_payload: {
             caller_repo: "sanity-io/www-sanity-io",
             dataset: payload.dataset,
             mode: payload.mode,
-            perspective: payload.perspective,
             projectId: payload.projectId,
             publish: true,
-            ...(payload.tag ? { publishTag: payload.tag } : {}),
             source: "production",
+            // Release-scoped fields
+            ...(hasPerspective ? { perspective: payload.perspective } : {}),
+            // Task-scoped fields
+            ...(hasTasks ? { tasks: payload.tasks } : {}),
+            ...(hasAreas ? { areas: payload.areas } : {}),
+            ...(payload.debug ? { debug: true } : {}),
+            ...(payload.tag ? { publishTag: payload.tag } : {}),
         },
         event_type: "external-eval",
     };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "0.1.24",
+  "version": "0.1.26",
   "private": false,
   "publishConfig": {
     "access": "restricted"