@sanity/ailf 0.1.24 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/_vendor/ailf-core/ports/context.d.ts +2 -0
  2. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  3. package/dist/_vendor/ailf-core/schemas/eval-config.js +2 -0
  4. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
  5. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
  6. package/dist/_vendor/ailf-core/types/index.d.ts +45 -0
  7. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  8. package/dist/commands/calculate-scores.js +1 -0
  9. package/dist/commands/explain-handler.js +1 -0
  10. package/dist/commands/fetch-docs.js +1 -0
  11. package/dist/commands/generate-configs.js +1 -0
  12. package/dist/commands/pipeline-action.d.ts +1 -0
  13. package/dist/commands/pipeline-action.js +1 -0
  14. package/dist/commands/pipeline.d.ts +1 -0
  15. package/dist/commands/pipeline.js +1 -0
  16. package/dist/commands/pr-comment.js +1 -0
  17. package/dist/commands/publish.js +1 -0
  18. package/dist/orchestration/build-app-context.js +1 -0
  19. package/dist/orchestration/steps/fetch-docs-step.d.ts +7 -2
  20. package/dist/orchestration/steps/fetch-docs-step.js +130 -1
  21. package/dist/orchestration/steps/generate-configs-step.d.ts +2 -2
  22. package/dist/orchestration/steps/generate-configs-step.js +12 -1
  23. package/dist/orchestration/steps/publish-report-step.js +17 -2
  24. package/dist/orchestration/steps/run-eval-step.js +6 -1
  25. package/dist/pipeline/map-request-to-config.js +1 -0
  26. package/dist/pipeline/provenance.d.ts +3 -1
  27. package/dist/pipeline/provenance.js +1 -0
  28. package/dist/webhook/eval-request-handler.d.ts +37 -10
  29. package/dist/webhook/eval-request-handler.js +97 -62
  30. package/package.json +1 -1
@@ -89,6 +89,8 @@ export interface ResolvedConfig {
89
89
  studioOriginOverride?: string;
90
90
  /** Sanity document filter args */
91
91
  sanityDocumentArgs?: string[];
92
+ /** Disable release-aware auto-scoping (evaluate all tasks even when perspective is set) */
93
+ noAutoScope: boolean;
92
94
  /** Before option for comparison */
93
95
  beforeOption?: string;
94
96
  /** Task source adapter selection */
@@ -34,6 +34,7 @@ export declare const EvalConfigSchema: z.ZodObject<{
34
34
  full: "full";
35
35
  observed: "observed";
36
36
  }>>;
37
+ noAutoScope: z.ZodOptional<z.ZodBoolean>;
37
38
  noCache: z.ZodOptional<z.ZodBoolean>;
38
39
  noRemoteCache: z.ZodOptional<z.ZodBoolean>;
39
40
  publish: z.ZodOptional<z.ZodBoolean>;
@@ -48,6 +48,8 @@ export const EvalConfigSchema = z
48
48
  headers: z.record(z.string(), z.string()).optional(),
49
49
  /** Evaluation mode */
50
50
  mode: z.enum(["baseline", "agentic", "observed", "full"]).optional(),
51
+ /** Disable release-aware auto-scoping */
52
+ noAutoScope: z.boolean().optional(),
51
53
  /** Disable local cache */
52
54
  noCache: z.boolean().optional(),
53
55
  /** Disable remote cache */
@@ -54,6 +54,7 @@ export declare const PipelineRequestSchema: z.ZodObject<{
54
54
  full: "full";
55
55
  observed: "observed";
56
56
  }>>;
57
+ noAutoScope: z.ZodOptional<z.ZodBoolean>;
57
58
  noCache: z.ZodOptional<z.ZodBoolean>;
58
59
  noRemoteCache: z.ZodOptional<z.ZodBoolean>;
59
60
  perspective: z.ZodOptional<z.ZodString>;
@@ -70,6 +70,7 @@ export const PipelineRequestSchema = z.object({
70
70
  inlineTasks: z.array(z.record(z.string(), z.unknown())).optional(),
71
71
  jobId: z.string().optional(),
72
72
  mode: z.enum(["baseline", "agentic", "observed", "full"]).optional(),
73
+ noAutoScope: z.boolean().optional(),
73
74
  noCache: z.boolean().optional(),
74
75
  noRemoteCache: z.boolean().optional(),
75
76
  perspective: z.string().optional(),
@@ -438,6 +438,32 @@ export interface PipelineState {
438
438
  evalFingerprint?: string;
439
439
  /** Promptfoo share URLs produced by RunEvalStep, consumed by PublishReportStep */
440
440
  promptfooUrls?: PromptfooUrlEntry[];
441
+ /**
442
+ * Release auto-scope metadata. Set by FetchDocsStep when a perspective
443
+ * is active and release impact identifies affected documents.
444
+ * Consumed by GenerateConfigsStep and RunEvalStep to narrow scope.
445
+ */
446
+ releaseAutoScope?: ReleaseAutoScope;
447
+ }
448
+ /**
449
+ * Release auto-scope metadata — which tasks are affected by a content
450
+ * release's document changes. Produced by FetchDocsStep, consumed by
451
+ * GenerateConfigsStep and RunEvalStep to narrow the evaluation scope.
452
+ */
453
+ export interface ReleaseAutoScope {
454
+ /** Task IDs whose canonical docs are affected by the release */
455
+ affectedTaskIds: string[];
456
+ /** Task IDs that were skipped (no affected docs) */
457
+ skippedTaskIds: string[];
458
+ /** The perspective ID that triggered scoping */
459
+ perspective: string;
460
+ /** The release impact data (doc slugs by change type) */
461
+ impact: {
462
+ added: string[];
463
+ modified: string[];
464
+ removed: string[];
465
+ unchanged: string[];
466
+ };
441
467
  }
442
468
  /** Result of a full pipeline run */
443
469
  export interface PipelineResult {
@@ -983,10 +1009,29 @@ export interface Report {
983
1009
  export type ReportId = string & {
984
1010
  readonly __brand: "ReportId";
985
1011
  };
1012
+ /** Auto-scope metadata recorded in report provenance */
1013
+ export interface ReportAutoScope {
1014
+ /** Whether auto-scoping was active for this evaluation */
1015
+ enabled: boolean;
1016
+ /** Task IDs whose canonical docs are affected by the release */
1017
+ affectedTaskIds: string[];
1018
+ /** Task IDs that were skipped (no affected docs) */
1019
+ skippedTaskIds: string[];
1020
+ /** The perspective ID that triggered scoping */
1021
+ perspective: string;
1022
+ /** Summary of release impact (counts, not full slug lists) */
1023
+ impactSummary: {
1024
+ added: number;
1025
+ modified: number;
1026
+ removed: number;
1027
+ };
1028
+ }
986
1029
  /** Full provenance metadata for an evaluation report */
987
1030
  export interface ReportProvenance {
988
1031
  /** Which feature areas were evaluated */
989
1032
  areas: string[];
1033
+ /** Release auto-scope metadata (when perspective evaluation was scoped to affected tasks) */
1034
+ autoScope?: ReportAutoScope;
990
1035
  /** Content hash of the documentation context at eval time */
991
1036
  contextHash?: string;
992
1037
  /**
@@ -69,6 +69,7 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
69
69
  return {
70
70
  rootDir,
71
71
  mode: config.mode ?? "full",
72
+ noAutoScope: config.noAutoScope ?? false,
72
73
  debug,
73
74
  areas: config.areas,
74
75
  tasks: config.tasks,
@@ -21,6 +21,7 @@ export function createCalculateScoresCommand() {
21
21
  const ctx = createAppContext({
22
22
  rootDir: ROOT,
23
23
  mode: "baseline",
24
+ noAutoScope: false,
24
25
  skipFetch: true,
25
26
  skipEval: true,
26
27
  compareEnabled: false,
@@ -653,6 +653,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
653
653
  allowedOrigin: raw.allowedOrigin ?? [],
654
654
  allowedOrigins: raw.allowedOrigins ?? [],
655
655
  area: raw.area,
656
+ autoScope: raw.autoScope ?? true,
656
657
  before: raw.before,
657
658
  cache: raw.cache ?? true,
658
659
  changedDocs: raw.changedDocs,
@@ -42,6 +42,7 @@ async function executeFetchDocs(opts) {
42
42
  const ctx = createAppContext({
43
43
  rootDir: ROOT,
44
44
  mode: "baseline",
45
+ noAutoScope: false,
45
46
  skipFetch: false,
46
47
  skipEval: true,
47
48
  compareEnabled: false,
@@ -20,6 +20,7 @@ export function createGenerateConfigsCommand() {
20
20
  const ctx = createAppContext({
21
21
  rootDir: ROOT,
22
22
  mode: "baseline",
23
+ noAutoScope: false,
23
24
  skipFetch: true,
24
25
  skipEval: true,
25
26
  compareEnabled: false,
@@ -31,6 +31,7 @@ export interface ResolvedOptions {
31
31
  headerArgs: string[];
32
32
  impactSummary?: ImpactSummary;
33
33
  mode: EvalMode;
34
+ noAutoScope: boolean;
34
35
  noCache: boolean;
35
36
  noRemoteCache: boolean;
36
37
  outputPath?: string;
@@ -213,6 +213,7 @@ export function computeResolvedOptions(opts) {
213
213
  headerArgs,
214
214
  impactSummary,
215
215
  mode,
216
+ noAutoScope: opts.autoScope === false,
216
217
  noCache: !opts.cache,
217
218
  noRemoteCache: opts.remoteCache === false,
218
219
  outputPath: opts.output,
@@ -16,6 +16,7 @@ export interface PipelineCliOptions {
16
16
  allowedOrigin: string[];
17
17
  allowedOrigins: string[];
18
18
  area?: string;
19
+ autoScope: boolean;
19
20
  before?: string;
20
21
  cache: boolean;
21
22
  changedDocs?: string;
@@ -19,6 +19,7 @@ export function createPipelineCommand() {
19
19
  .option("--skip-eval", "Recalculate from existing eval results", false)
20
20
  .option("--no-cache", "Bypass all pipeline-level caching")
21
21
  .option("--no-remote-cache", "Disable Content Lake cache lookup (local cache still active)")
22
+ .option("--no-auto-scope", "Disable release-aware auto-scoping (evaluate all tasks even when a perspective is set)")
22
23
  .option("-a, --area <areas>", "Scope to feature areas (comma-separated)")
23
24
  .option("-t, --task <id>", "Scope to specific task ID")
24
25
  .option("--changed-docs <slugs>", "Auto-scope to tasks affected by these document slugs")
@@ -21,6 +21,7 @@ export function createPrCommentCommand() {
21
21
  const ctx = createAppContext({
22
22
  rootDir: ROOT,
23
23
  mode: "baseline",
24
+ noAutoScope: false,
24
25
  skipFetch: true,
25
26
  skipEval: true,
26
27
  compareEnabled: false,
@@ -83,6 +83,7 @@ async function runPublishCommand(summaryPath, opts) {
83
83
  discoveryReportEnabled: false,
84
84
  gapAnalysisEnabled: false,
85
85
  mode: "baseline",
86
+ noAutoScope: false,
86
87
  noCache: true,
87
88
  noRemoteCache: true,
88
89
  publishEnabled: true,
@@ -20,6 +20,7 @@ export function mapToResolvedConfig(opts, rootDir) {
20
20
  return {
21
21
  rootDir,
22
22
  mode: opts.mode,
23
+ noAutoScope: opts.noAutoScope ?? false,
23
24
  debug: opts.debug,
24
25
  areas: opts.areaOption
25
26
  ?.split(",")
@@ -4,11 +4,16 @@
4
4
  * Uses ctx.docFetcher (the DocFetcher port) for all fetching. The adapter
5
5
  * handles GROQ queries, perspective diffing, document overlays, and URL
6
6
  * fetching. This step orchestrates the call and writes metadata files.
7
+ *
8
+ * When a source-level perspective is active and release impact data is
9
+ * available, this step computes which tasks are affected by the release
10
+ * and stores a `releaseAutoScope` entry in PipelineState. Downstream
11
+ * steps (GenerateConfigsStep, RunEvalStep) use this to narrow scope.
7
12
  */
8
- import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
13
+ import { type AppContext, type PipelineState, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
9
14
  export declare class FetchDocsStep implements PipelineStep {
10
15
  readonly name = "fetch-docs";
11
16
  check(): ValidationIssue[];
12
- execute(ctx: AppContext): Promise<StepResult>;
17
+ execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
13
18
  cacheInputs(ctx: AppContext): string[];
14
19
  }
@@ -4,9 +4,15 @@
4
4
  * Uses ctx.docFetcher (the DocFetcher port) for all fetching. The adapter
5
5
  * handles GROQ queries, perspective diffing, document overlays, and URL
6
6
  * fetching. This step orchestrates the call and writes metadata files.
7
+ *
8
+ * When a source-level perspective is active and release impact data is
9
+ * available, this step computes which tasks are affected by the release
10
+ * and stores a `releaseAutoScope` entry in PipelineState. Downstream
11
+ * steps (GenerateConfigsStep, RunEvalStep) use this to narrow scope.
7
12
  */
8
13
  import { mkdirSync, writeFileSync } from "fs";
9
14
  import { join } from "path";
15
+ import { isIdRef, isPathRef, isSlugRef, } from "../../_vendor/ailf-core/index.js";
10
16
  import { getStepInputPaths } from "../../pipeline/cache.js";
11
17
  import { checkCanonicalContextsExist } from "../../pipeline/checks.js";
12
18
  import { loadSource } from "../../sources.js";
@@ -16,7 +22,7 @@ export class FetchDocsStep {
16
22
  check() {
17
23
  return [];
18
24
  }
19
- async execute(ctx) {
25
+ async execute(ctx, state) {
20
26
  if (ctx.config.skipFetch) {
21
27
  return { status: "skipped", reason: "--skip-fetch" };
22
28
  }
@@ -55,8 +61,11 @@ export class FetchDocsStep {
55
61
  };
56
62
  }
57
63
  // Execute the fetch via the DocFetcher port
64
+ let releaseImpact;
58
65
  try {
59
66
  const result = await ctx.docFetcher.fetch(tasksWithDocs, resolvedSource);
67
+ // Capture release impact for auto-scoping
68
+ releaseImpact = result.metadata?.releaseImpact;
60
69
  // Write metadata files for downstream pipeline consumption
61
70
  if (result.metadata) {
62
71
  writeMetadataFiles(ctx.config.rootDir, result.metadata);
@@ -80,6 +89,23 @@ export class FetchDocsStep {
80
89
  status: "failed",
81
90
  };
82
91
  }
92
+ // -----------------------------------------------------------------
93
+ // Release auto-scope: compute which tasks are affected by the
94
+ // content release. This only activates when:
95
+ // 1. A source-level perspective is active
96
+ // 2. Release impact data was computed (docs were fetched)
97
+ // 3. Auto-scoping was not explicitly disabled (--no-auto-scope)
98
+ // 4. No explicit area/task filters are set (those take precedence)
99
+ // -----------------------------------------------------------------
100
+ if (resolvedSource.perspective &&
101
+ releaseImpact &&
102
+ !ctx.config.noAutoScope) {
103
+ const autoScope = computeAutoScope(tasks, releaseImpact, resolvedSource.perspective);
104
+ if (autoScope) {
105
+ state.releaseAutoScope = autoScope;
106
+ logAutoScope(autoScope);
107
+ }
108
+ }
83
109
  const durationMs = Date.now() - start;
84
110
  return {
85
111
  durationMs,
@@ -133,3 +159,106 @@ function writeMetadataFiles(rootDir, metadata) {
133
159
  console.log(" 📄 URL fetch metadata written to contexts/url-fetch.json");
134
160
  }
135
161
  }
162
+ // ---------------------------------------------------------------------------
163
+ // Release auto-scope computation
164
+ // ---------------------------------------------------------------------------
165
+ /**
166
+ * Extract display slugs from a task's canonical doc refs.
167
+ *
168
+ * Uses slug, path (last segment), or id-based slug annotation.
169
+ * Perspective refs are excluded — they resolve at fetch time, not here.
170
+ */
171
+ function extractSlugsFromTask(task) {
172
+ const slugs = [];
173
+ for (const ref of task.canonicalDocs) {
174
+ if (isSlugRef(ref)) {
175
+ slugs.push(ref.slug);
176
+ }
177
+ else if (isPathRef(ref)) {
178
+ // Path refs use the last segment as slug (e.g., "groq/groq-introduction" → "groq-introduction")
179
+ const segments = ref.path.split("/");
180
+ slugs.push(segments[segments.length - 1]);
181
+ }
182
+ else if (isIdRef(ref) && ref.slug) {
183
+ // IdDocRef may carry an optional slug annotation for display
184
+ slugs.push(ref.slug);
185
+ }
186
+ // PerspectiveDocRef — one-to-many, slugs unknown at this point; skip
187
+ }
188
+ return slugs;
189
+ }
190
+ /**
191
+ * Compute which tasks are affected by a content release.
192
+ *
193
+ * Builds a reverse mapping (slug → task IDs) from all tasks' canonical
194
+ * doc refs, then intersects with the release impact's added/modified slugs.
195
+ *
196
+ * Returns null if auto-scoping should not apply (e.g., all tasks affected).
197
+ */
198
+ function computeAutoScope(tasks, releaseImpact, perspective) {
199
+ // Build reverse mapping: slug → task IDs
200
+ const slugToTaskIds = new Map();
201
+ for (const task of tasks) {
202
+ for (const slug of extractSlugsFromTask(task)) {
203
+ let taskIds = slugToTaskIds.get(slug);
204
+ if (!taskIds) {
205
+ taskIds = new Set();
206
+ slugToTaskIds.set(slug, taskIds);
207
+ }
208
+ taskIds.add(task.id);
209
+ }
210
+ }
211
+ // Compute affected slugs (added + modified — these have changed content)
212
+ const affectedSlugs = new Set([
213
+ ...releaseImpact.added,
214
+ ...releaseImpact.modified,
215
+ ]);
216
+ // Find tasks affected by the release
217
+ const affectedTaskIds = new Set();
218
+ for (const slug of affectedSlugs) {
219
+ const taskIds = slugToTaskIds.get(slug);
220
+ if (taskIds) {
221
+ for (const id of taskIds) {
222
+ affectedTaskIds.add(id);
223
+ }
224
+ }
225
+ }
226
+ // If all tasks are affected, auto-scoping adds no value
227
+ if (affectedTaskIds.size >= tasks.length)
228
+ return null;
229
+ // If no tasks are affected, still return the scope info so downstream
230
+ // steps know nothing needs evaluation (avoids wasting eval budget)
231
+ const allTaskIds = new Set(tasks.map((t) => t.id));
232
+ const skippedTaskIds = [...allTaskIds].filter((id) => !affectedTaskIds.has(id));
233
+ return {
234
+ affectedTaskIds: [...affectedTaskIds],
235
+ skippedTaskIds,
236
+ perspective,
237
+ impact: {
238
+ added: releaseImpact.added,
239
+ modified: releaseImpact.modified,
240
+ removed: releaseImpact.removed,
241
+ unchanged: releaseImpact.unchanged,
242
+ },
243
+ };
244
+ }
245
+ /**
246
+ * Log auto-scope results to the console.
247
+ */
248
+ function logAutoScope(autoScope) {
249
+ const { affectedTaskIds, skippedTaskIds, impact } = autoScope;
250
+ const totalTasks = affectedTaskIds.length + skippedTaskIds.length;
251
+ const affectedDocs = impact.added.length + impact.modified.length;
252
+ console.log(` 🎯 Release auto-scope: ${affectedTaskIds.length} of ${totalTasks} tasks affected`);
253
+ if (affectedDocs > 0) {
254
+ console.log(` ${affectedDocs} doc(s) changed → ${affectedTaskIds.length} task(s) to evaluate`);
255
+ }
256
+ if (affectedTaskIds.length > 0) {
257
+ for (const id of affectedTaskIds) {
258
+ console.log(` ✓ ${id}`);
259
+ }
260
+ }
261
+ if (skippedTaskIds.length > 0) {
262
+ console.log(` ⏭ ${skippedTaskIds.length} task(s) skipped (no docs affected by release)`);
263
+ }
264
+ }
@@ -5,10 +5,10 @@
5
5
  * derived from AppContext. No env bridge needed — source is resolved and
6
6
  * passed directly.
7
7
  */
8
- import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
+ import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
9
9
  export declare class GenerateConfigsStep implements PipelineStep {
10
10
  readonly name = "generate-configs";
11
11
  check(ctx: AppContext): ValidationIssue[];
12
- execute(ctx: AppContext): Promise<StepResult>;
12
+ execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
13
13
  cacheInputs(ctx: AppContext): string[];
14
14
  }
@@ -17,7 +17,7 @@ export class GenerateConfigsStep {
17
17
  const issues = validateModelsYaml(ctx.config.rootDir);
18
18
  return issues.filter((i) => i.severity === "error");
19
19
  }
20
- async execute(ctx) {
20
+ async execute(ctx, state) {
21
21
  const start = Date.now();
22
22
  // Resolve source once with typed overrides
23
23
  const overrides = configToSourceOverrides(ctx.config);
@@ -40,6 +40,17 @@ export class GenerateConfigsStep {
40
40
  status: "failed",
41
41
  };
42
42
  }
43
+ // Release auto-scope: narrow tasks to those affected by the release.
44
+ // When explicit area/task filters are also active, this produces the
45
+ // intersection (only tasks matching BOTH the explicit filter AND the
46
+ // release impact are included).
47
+ if (state.releaseAutoScope && !ctx.config.noAutoScope) {
48
+ const scopedIds = new Set(state.releaseAutoScope.affectedTaskIds);
49
+ const beforeCount = tasks.length;
50
+ tasks = tasks.filter((t) => scopedIds.has(t.id));
51
+ console.log(` 🎯 Auto-scoped to ${tasks.length} of ${beforeCount} task(s) affected by release` +
52
+ ` (${beforeCount - tasks.length} skipped, --no-auto-scope to override)`);
53
+ }
43
54
  try {
44
55
  generateConfigs({
45
56
  allowedOrigins: ctx.config.allowedOrigins,
@@ -60,7 +60,21 @@ export class PublishReportStep {
60
60
  evalFingerprint: state.evalFingerprint ?? this.options.evalFingerprint,
61
61
  promptfooUrls: state.promptfooUrls ?? this.options.promptfooUrls,
62
62
  };
63
- const provenanceInput = buildProvenanceInput(summary, ctx, provenanceOptions);
63
+ // Build auto-scope provenance from pipeline state
64
+ const autoScope = state.releaseAutoScope
65
+ ? {
66
+ enabled: true,
67
+ affectedTaskIds: state.releaseAutoScope.affectedTaskIds,
68
+ skippedTaskIds: state.releaseAutoScope.skippedTaskIds,
69
+ perspective: state.releaseAutoScope.perspective,
70
+ impactSummary: {
71
+ added: state.releaseAutoScope.impact.added.length,
72
+ modified: state.releaseAutoScope.impact.modified.length,
73
+ removed: state.releaseAutoScope.impact.removed.length,
74
+ },
75
+ }
76
+ : undefined;
77
+ const provenanceInput = buildProvenanceInput(summary, ctx, provenanceOptions, autoScope);
64
78
  const provenance = buildProvenance(provenanceInput);
65
79
  // Create report
66
80
  const now = new Date().toISOString();
@@ -118,7 +132,7 @@ export class PublishReportStep {
118
132
  /**
119
133
  * Assemble provenance input from the score summary and pipeline context.
120
134
  */
121
- function buildProvenanceInput(summary, ctx, options) {
135
+ function buildProvenanceInput(summary, ctx, options, autoScope) {
122
136
  const areas = summary.scores.map((s) => s.feature);
123
137
  const mode = ctx.config.mode;
124
138
  // Read document IDs from config
@@ -146,6 +160,7 @@ function buildProvenanceInput(summary, ctx, options) {
146
160
  : undefined;
147
161
  return {
148
162
  areas,
163
+ autoScope,
149
164
  callerGit: ctx.config.callerGit,
150
165
  evalFingerprint,
151
166
  mode,
@@ -46,7 +46,12 @@ export class RunEvalStep {
46
46
  ...(ctx.config.tasks ? { taskIds: ctx.config.tasks } : {}),
47
47
  }
48
48
  : undefined;
49
- const tasks = await ctx.taskSource.loadTasks(filter);
49
+ let tasks = await ctx.taskSource.loadTasks(filter);
50
+ // Release auto-scope: narrow to affected tasks (mirrors GenerateConfigsStep)
51
+ if (state.releaseAutoScope && !ctx.config.noAutoScope) {
52
+ const scopedIds = new Set(state.releaseAutoScope.affectedTaskIds);
53
+ tasks = tasks.filter((t) => scopedIds.has(t.id));
54
+ }
50
55
  // Only check context files for tasks that have canonical docs.
51
56
  // Tasks without canonical docs are skipped by FetchDocsStep (they
52
57
  // have no docs to fetch), so no context file is written for them.
@@ -38,6 +38,7 @@ export function mapRequestToConfig(request, rootDir) {
38
38
  discoveryReportEnabled: request.discoveryReport ?? false,
39
39
  publishEnabled: request.publish ?? publishDefault,
40
40
  publishTag: request.publishTag,
41
+ noAutoScope: request.noAutoScope ?? false,
41
42
  noCache: request.noCache ?? false,
42
43
  noRemoteCache: request.noRemoteCache ?? false,
43
44
  graderReplications: request.graderReplications,
@@ -12,10 +12,12 @@
12
12
  * @see docs/design-docs/report-store/architecture.md — Provenance collection
13
13
  */
14
14
  import type { ResolvedSourceConfig } from "../sources.js";
15
- import type { EvalMode, PromptfooUrlEntry, ReportProvenance } from "./types.js";
15
+ import type { EvalMode, PromptfooUrlEntry, ReportAutoScope, ReportProvenance } from "./types.js";
16
16
  export interface ProvenanceInput {
17
17
  /** Feature areas that were evaluated */
18
18
  areas: string[];
19
+ /** Release auto-scope metadata (when perspective evaluation was scoped) */
20
+ autoScope?: ReportAutoScope;
19
21
  /**
20
22
  * Git metadata from the *calling* repository (cross-repo evaluations).
21
23
  * When provided, overrides CI env var detection so provenance attributes
@@ -37,6 +37,7 @@ export function buildProvenance(input) {
37
37
  : detectGitMetadata();
38
38
  return {
39
39
  areas: input.areas,
40
+ autoScope: input.autoScope,
40
41
  contextHash: input.contextHash,
41
42
  evalFingerprint: input.evalFingerprint,
42
43
  git,
@@ -3,23 +3,44 @@
3
3
  *
4
4
  * Server-side handler for `ailf.evalRequest` documents from the Sanity
5
5
  * Content Lake. This is the counterpart to the Studio's "Request Evaluation"
6
- * action — when a content editor creates an eval request document via the
7
- * Studio UI, a Sanity webhook fires and calls this handler.
6
+ * and "Run Task Eval" actions — when a content editor creates an eval
7
+ * request document via the Studio UI, a Sanity webhook fires and calls
8
+ * this handler.
8
9
  *
9
10
  * Designed to run in any HTTP environment: Cloudflare Workers, Vercel
10
11
  * functions, Express, Hono, etc.
11
12
  *
13
+ * Supports two scoping modes:
14
+ * - **Release-scoped** — requires `perspective` field
15
+ * - **Task-scoped** — requires `tasks` array (optionally with `areas`)
16
+ *
17
+ * At least one of `perspective` or `tasks` must be present.
18
+ *
12
19
  * Flow:
13
20
  * 1. Receive eval request payload (from Sanity webhook projection)
14
21
  * 2. Validate: must be `ailf.evalRequest` type, `pending` status,
15
- * non-empty `perspective`
16
- * 3. Dispatch a full evaluation to GitHub Actions via `repository_dispatch`
17
- * with `external-eval` event type and release-scoped client payload
22
+ * with either `perspective` or `tasks`
23
+ * 3. Dispatch evaluation to GitHub Actions via `repository_dispatch`
24
+ * with `external-eval` event type and scoped client payload
18
25
  * 4. On success: PATCH the eval request document → `status: "dispatched"`
19
26
  * 5. On failure: PATCH the eval request document → `status: "failed"` + error
20
27
  * 6. Return a structured result
21
28
  *
22
- * @see packages/studio/src/actions/dispatch.ts Studio-side dispatch
29
+ * ## Sanity Manage Webhook Configuration
30
+ *
31
+ * The Sanity webhook projection MUST include all fields consumed by
32
+ * EvalRequestPayload. The recommended projection is `{...}` (spread),
33
+ * which passes the entire document and avoids silently dropping fields
34
+ * when the schema evolves.
35
+ *
36
+ * Recommended projection:
37
+ * ```
38
+ * {...}
39
+ * ```
40
+ *
41
+ * @see packages/api/src/routes/webhooks.ts — API gateway webhook handler
42
+ * @see packages/studio/src/actions/RunEvaluationAction.tsx — release eval
43
+ * @see packages/studio/src/actions/RunTaskEvaluationAction.tsx — task eval
23
44
  * @see .github/workflows/external-eval.yml — receiving workflow
24
45
  * @see docs/design-docs/report-store/visibility-workflows.md
25
46
  */
@@ -29,24 +50,30 @@ export interface EvalRequestPayload {
29
50
  _id: string;
30
51
  /** The Sanity document _type (should be "ailf.evalRequest") */
31
52
  _type: string;
53
+ /** Feature areas to scope the evaluation (task-scoped evals) */
54
+ areas?: string[];
32
55
  /** Sanity dataset */
33
56
  dataset: string;
57
+ /** Run in debug mode */
58
+ debug?: boolean;
34
59
  /** Error message (only if status is "failed") */
35
60
  error?: string;
36
61
  /** Evaluation mode */
37
62
  mode: string;
38
- /** Content release perspective ID */
39
- perspective: string;
63
+ /** Content release perspective ID (release-scoped evals) */
64
+ perspective?: string;
40
65
  /** Sanity project ID */
41
66
  projectId: string;
42
67
  /** ISO datetime of when the request was created */
43
68
  requestedAt: string;
44
69
  /** User ID who requested */
45
70
  requestedBy?: string;
46
- /** Publish tag */
47
- tag?: string;
48
71
  /** Request status */
49
72
  status: string;
73
+ /** Publish tag */
74
+ tag?: string;
75
+ /** Specific task IDs to evaluate (task-scoped evals) */
76
+ tasks?: string[];
50
77
  }
51
78
  /** Configuration for the eval request handler. */
52
79
  export interface EvalRequestHandlerConfig {
@@ -3,23 +3,44 @@
3
3
  *
4
4
  * Server-side handler for `ailf.evalRequest` documents from the Sanity
5
5
  * Content Lake. This is the counterpart to the Studio's "Request Evaluation"
6
- * action — when a content editor creates an eval request document via the
7
- * Studio UI, a Sanity webhook fires and calls this handler.
6
+ * and "Run Task Eval" actions — when a content editor creates an eval
7
+ * request document via the Studio UI, a Sanity webhook fires and calls
8
+ * this handler.
8
9
  *
9
10
  * Designed to run in any HTTP environment: Cloudflare Workers, Vercel
10
11
  * functions, Express, Hono, etc.
11
12
  *
13
+ * Supports two scoping modes:
14
+ * - **Release-scoped** — requires `perspective` field
15
+ * - **Task-scoped** — requires `tasks` array (optionally with `areas`)
16
+ *
17
+ * At least one of `perspective` or `tasks` must be present.
18
+ *
12
19
  * Flow:
13
20
  * 1. Receive eval request payload (from Sanity webhook projection)
14
21
  * 2. Validate: must be `ailf.evalRequest` type, `pending` status,
15
- * non-empty `perspective`
16
- * 3. Dispatch a full evaluation to GitHub Actions via `repository_dispatch`
17
- * with `external-eval` event type and release-scoped client payload
22
+ * with either `perspective` or `tasks`
23
+ * 3. Dispatch evaluation to GitHub Actions via `repository_dispatch`
24
+ * with `external-eval` event type and scoped client payload
18
25
  * 4. On success: PATCH the eval request document → `status: "dispatched"`
19
26
  * 5. On failure: PATCH the eval request document → `status: "failed"` + error
20
27
  * 6. Return a structured result
21
28
  *
22
- * @see packages/studio/src/actions/dispatch.ts Studio-side dispatch
29
+ * ## Sanity Manage Webhook Configuration
30
+ *
31
+ * The Sanity webhook projection MUST include all fields consumed by
32
+ * EvalRequestPayload. The recommended projection is `{...}` (spread),
33
+ * which passes the entire document and avoids silently dropping fields
34
+ * when the schema evolves.
35
+ *
36
+ * Recommended projection:
37
+ * ```
38
+ * {...}
39
+ * ```
40
+ *
41
+ * @see packages/api/src/routes/webhooks.ts — API gateway webhook handler
42
+ * @see packages/studio/src/actions/RunEvaluationAction.tsx — release eval
43
+ * @see packages/studio/src/actions/RunTaskEvaluationAction.tsx — task eval
23
44
  * @see .github/workflows/external-eval.yml — receiving workflow
24
45
  * @see docs/design-docs/report-store/visibility-workflows.md
25
46
  */
@@ -53,98 +74,112 @@ const GITHUB_API = "https://api.github.com";
53
74
  export async function handleEvalRequest(payload, config) {
54
75
  const requestId = payload._id ?? "unknown";
55
76
  // -------------------------------------------------------------------------
56
- // 1. Validate payload
77
+ // 1. Create Sanity client early so validation failures can mark the
78
+ // document as "failed" instead of leaving it stuck at "pending".
79
+ // -------------------------------------------------------------------------
80
+ const client = payload.projectId && payload.dataset
81
+ ? createClient({
82
+ apiVersion: "2026-03-11",
83
+ dataset: payload.dataset,
84
+ projectId: payload.projectId,
85
+ token: config.sanityToken,
86
+ useCdn: false,
87
+ })
88
+ : null;
89
+ // Helper: mark the eval request as failed in the Content Lake so the
90
+ // Studio UI can show the error instead of polling forever.
91
+ async function markFailed(errorMessage) {
92
+ if (client && payload._id) {
93
+ try {
94
+ await client
95
+ .patch(payload._id)
96
+ .set({ error: errorMessage, status: "failed" })
97
+ .commit();
98
+ }
99
+ catch (err) {
100
+ console.warn(` ⚠️ Failed to update document with error status: ${err instanceof Error ? err.message : String(err)}`);
101
+ }
102
+ }
103
+ return { error: errorMessage, ok: false, requestId };
104
+ }
105
+ // -------------------------------------------------------------------------
106
+ // 2. Validate payload
57
107
  // -------------------------------------------------------------------------
58
108
  if (payload._type !== "ailf.evalRequest") {
59
- return {
60
- error: `Unexpected document type: "${payload._type}" (expected "ailf.evalRequest")`,
61
- ok: false,
62
- requestId,
63
- };
109
+ return markFailed(`Unexpected document type: "${payload._type}" (expected "ailf.evalRequest")`);
64
110
  }
65
111
  if (payload.status !== "pending") {
112
+ // Don't mark as failed — it's already in a non-pending state
66
113
  return {
67
114
  error: `Eval request is not pending (status: "${payload.status}")`,
68
115
  ok: false,
69
116
  requestId,
70
117
  };
71
118
  }
72
- if (!payload.perspective) {
73
- return {
74
- error: "Missing required field: perspective",
75
- ok: false,
76
- requestId,
77
- };
119
+ const hasPerspective = !!payload.perspective;
120
+ const hasTasks = Array.isArray(payload.tasks) && payload.tasks.length > 0;
121
+ if (!hasPerspective && !hasTasks) {
122
+ return markFailed("Missing required field: perspective or tasks. " +
123
+ "Provide a content release perspective for release evals, " +
124
+ "or a tasks array for task-scoped evals.");
78
125
  }
79
126
  // -------------------------------------------------------------------------
80
- // 2. Dispatch evaluation via GitHub Actions
127
+ // 3. Dispatch evaluation via GitHub Actions
81
128
  // -------------------------------------------------------------------------
82
129
  const repo = config.githubRepo ?? DEFAULT_REPO;
83
130
  const dispatchResult = await dispatchGitHubEval(repo, payload, config);
84
131
  // -------------------------------------------------------------------------
85
- // 3. Update eval request document status
132
+ // 4. Update eval request document status
86
133
  // -------------------------------------------------------------------------
87
- const client = createClient({
88
- apiVersion: "2026-03-11",
89
- dataset: payload.dataset,
90
- projectId: payload.projectId,
91
- token: config.sanityToken,
92
- useCdn: false,
93
- });
94
134
  if (dispatchResult.ok) {
95
- try {
96
- await client
97
- .patch(payload._id)
98
- .set({
99
- dispatchedAt: new Date().toISOString(),
100
- status: "dispatched",
101
- })
102
- .commit();
103
- }
104
- catch (err) {
105
- // Dispatch succeeded but status update failed — log and still return ok
106
- console.warn(` ⚠️ Dispatch succeeded but failed to update document status: ${err instanceof Error ? err.message : String(err)}`);
135
+ if (client) {
136
+ try {
137
+ await client
138
+ .patch(payload._id)
139
+ .set({
140
+ dispatchedAt: new Date().toISOString(),
141
+ status: "dispatched",
142
+ })
143
+ .commit();
144
+ }
145
+ catch (err) {
146
+ // Dispatch succeeded but status update failed log and still return ok
147
+ console.warn(` ⚠️ Dispatch succeeded but failed to update document status: ${err instanceof Error ? err.message : String(err)}`);
148
+ }
107
149
  }
108
150
  return { ok: true, requestId };
109
151
  }
110
152
  // Dispatch failed — mark the document as failed
111
- try {
112
- await client
113
- .patch(payload._id)
114
- .set({
115
- error: dispatchResult.error ?? "Unknown dispatch error",
116
- status: "failed",
117
- })
118
- .commit();
119
- }
120
- catch (err) {
121
- console.warn(` ⚠️ Failed to update document with error status: ${err instanceof Error ? err.message : String(err)}`);
122
- }
123
- return {
124
- error: dispatchResult.error,
125
- ok: false,
126
- requestId,
127
- };
153
+ return markFailed(dispatchResult.error ?? "Unknown dispatch error");
128
154
  }
129
155
  /**
130
- * Dispatch a release-scoped evaluation via GitHub Actions repository_dispatch.
156
+ * Dispatch an evaluation via GitHub Actions repository_dispatch.
131
157
  *
132
- * Uses the `external-eval` event type with a client_payload conforming to
133
- * PipelineRequestSchema. The workflow passes it directly to the CLI via
134
- * `--config` without field translation.
158
+ * Supports both release-scoped (perspective) and task-scoped (tasks/areas)
159
+ * evaluations. Uses the `external-eval` event type with a client_payload
160
+ * conforming to PipelineRequestSchema. The workflow passes it directly to
161
+ * the CLI via `--config` without field translation.
135
162
  */
136
163
  async function dispatchGitHubEval(repo, payload, config) {
137
164
  const url = `${GITHUB_API}/repos/${repo}/dispatches`;
165
+ const hasPerspective = !!payload.perspective;
166
+ const hasTasks = Array.isArray(payload.tasks) && payload.tasks.length > 0;
167
+ const hasAreas = Array.isArray(payload.areas) && payload.areas.length > 0;
138
168
  const body = {
139
169
  client_payload: {
140
170
  caller_repo: "sanity-io/www-sanity-io",
141
171
  dataset: payload.dataset,
142
172
  mode: payload.mode,
143
- perspective: payload.perspective,
144
173
  projectId: payload.projectId,
145
174
  publish: true,
146
- ...(payload.tag ? { publishTag: payload.tag } : {}),
147
175
  source: "production",
176
+ // Release-scoped fields
177
+ ...(hasPerspective ? { perspective: payload.perspective } : {}),
178
+ // Task-scoped fields
179
+ ...(hasTasks ? { tasks: payload.tasks } : {}),
180
+ ...(hasAreas ? { areas: payload.areas } : {}),
181
+ ...(payload.debug ? { debug: true } : {}),
182
+ ...(payload.tag ? { publishTag: payload.tag } : {}),
148
183
  },
149
184
  event_type: "external-eval",
150
185
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "0.1.24",
3
+ "version": "0.1.26",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "restricted"