@sanity/ailf 0.1.23 → 0.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/ports/context.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +2 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -0
- package/dist/adapters/config-sources/file-config-adapter.js +1 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/explain-handler.js +1 -0
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.js +1 -0
- package/dist/commands/pipeline-action.d.ts +1 -0
- package/dist/commands/pipeline-action.js +1 -0
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +1 -0
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +7 -2
- package/dist/orchestration/steps/fetch-docs-step.js +130 -1
- package/dist/orchestration/steps/generate-configs-step.d.ts +2 -2
- package/dist/orchestration/steps/generate-configs-step.js +12 -1
- package/dist/orchestration/steps/publish-report-step.js +17 -2
- package/dist/orchestration/steps/run-eval-step.js +14 -2
- package/dist/pipeline/expand-tasks.js +8 -1
- package/dist/pipeline/map-request-to-config.js +1 -0
- package/dist/pipeline/provenance.d.ts +3 -1
- package/dist/pipeline/provenance.js +1 -0
- package/package.json +1 -1
|
@@ -89,6 +89,8 @@ export interface ResolvedConfig {
|
|
|
89
89
|
studioOriginOverride?: string;
|
|
90
90
|
/** Sanity document filter args */
|
|
91
91
|
sanityDocumentArgs?: string[];
|
|
92
|
+
/** Disable release-aware auto-scoping (evaluate all tasks even when perspective is set) */
|
|
93
|
+
noAutoScope: boolean;
|
|
92
94
|
/** Before option for comparison */
|
|
93
95
|
beforeOption?: string;
|
|
94
96
|
/** Task source adapter selection */
|
|
@@ -34,6 +34,7 @@ export declare const EvalConfigSchema: z.ZodObject<{
|
|
|
34
34
|
full: "full";
|
|
35
35
|
observed: "observed";
|
|
36
36
|
}>>;
|
|
37
|
+
noAutoScope: z.ZodOptional<z.ZodBoolean>;
|
|
37
38
|
noCache: z.ZodOptional<z.ZodBoolean>;
|
|
38
39
|
noRemoteCache: z.ZodOptional<z.ZodBoolean>;
|
|
39
40
|
publish: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -48,6 +48,8 @@ export const EvalConfigSchema = z
|
|
|
48
48
|
headers: z.record(z.string(), z.string()).optional(),
|
|
49
49
|
/** Evaluation mode */
|
|
50
50
|
mode: z.enum(["baseline", "agentic", "observed", "full"]).optional(),
|
|
51
|
+
/** Disable release-aware auto-scoping */
|
|
52
|
+
noAutoScope: z.boolean().optional(),
|
|
51
53
|
/** Disable local cache */
|
|
52
54
|
noCache: z.boolean().optional(),
|
|
53
55
|
/** Disable remote cache */
|
|
@@ -54,6 +54,7 @@ export declare const PipelineRequestSchema: z.ZodObject<{
|
|
|
54
54
|
full: "full";
|
|
55
55
|
observed: "observed";
|
|
56
56
|
}>>;
|
|
57
|
+
noAutoScope: z.ZodOptional<z.ZodBoolean>;
|
|
57
58
|
noCache: z.ZodOptional<z.ZodBoolean>;
|
|
58
59
|
noRemoteCache: z.ZodOptional<z.ZodBoolean>;
|
|
59
60
|
perspective: z.ZodOptional<z.ZodString>;
|
|
@@ -70,6 +70,7 @@ export const PipelineRequestSchema = z.object({
|
|
|
70
70
|
inlineTasks: z.array(z.record(z.string(), z.unknown())).optional(),
|
|
71
71
|
jobId: z.string().optional(),
|
|
72
72
|
mode: z.enum(["baseline", "agentic", "observed", "full"]).optional(),
|
|
73
|
+
noAutoScope: z.boolean().optional(),
|
|
73
74
|
noCache: z.boolean().optional(),
|
|
74
75
|
noRemoteCache: z.boolean().optional(),
|
|
75
76
|
perspective: z.string().optional(),
|
|
@@ -438,6 +438,32 @@ export interface PipelineState {
|
|
|
438
438
|
evalFingerprint?: string;
|
|
439
439
|
/** Promptfoo share URLs produced by RunEvalStep, consumed by PublishReportStep */
|
|
440
440
|
promptfooUrls?: PromptfooUrlEntry[];
|
|
441
|
+
/**
|
|
442
|
+
* Release auto-scope metadata. Set by FetchDocsStep when a perspective
|
|
443
|
+
* is active and release impact identifies affected documents.
|
|
444
|
+
* Consumed by GenerateConfigsStep and RunEvalStep to narrow scope.
|
|
445
|
+
*/
|
|
446
|
+
releaseAutoScope?: ReleaseAutoScope;
|
|
447
|
+
}
|
|
448
|
+
/**
|
|
449
|
+
* Release auto-scope metadata — which tasks are affected by a content
|
|
450
|
+
* release's document changes. Produced by FetchDocsStep, consumed by
|
|
451
|
+
* GenerateConfigsStep and RunEvalStep to narrow the evaluation scope.
|
|
452
|
+
*/
|
|
453
|
+
export interface ReleaseAutoScope {
|
|
454
|
+
/** Task IDs whose canonical docs are affected by the release */
|
|
455
|
+
affectedTaskIds: string[];
|
|
456
|
+
/** Task IDs that were skipped (no affected docs) */
|
|
457
|
+
skippedTaskIds: string[];
|
|
458
|
+
/** The perspective ID that triggered scoping */
|
|
459
|
+
perspective: string;
|
|
460
|
+
/** The release impact data (doc slugs by change type) */
|
|
461
|
+
impact: {
|
|
462
|
+
added: string[];
|
|
463
|
+
modified: string[];
|
|
464
|
+
removed: string[];
|
|
465
|
+
unchanged: string[];
|
|
466
|
+
};
|
|
441
467
|
}
|
|
442
468
|
/** Result of a full pipeline run */
|
|
443
469
|
export interface PipelineResult {
|
|
@@ -983,10 +1009,29 @@ export interface Report {
|
|
|
983
1009
|
export type ReportId = string & {
|
|
984
1010
|
readonly __brand: "ReportId";
|
|
985
1011
|
};
|
|
1012
|
+
/** Auto-scope metadata recorded in report provenance */
|
|
1013
|
+
export interface ReportAutoScope {
|
|
1014
|
+
/** Whether auto-scoping was active for this evaluation */
|
|
1015
|
+
enabled: boolean;
|
|
1016
|
+
/** Task IDs whose canonical docs are affected by the release */
|
|
1017
|
+
affectedTaskIds: string[];
|
|
1018
|
+
/** Task IDs that were skipped (no affected docs) */
|
|
1019
|
+
skippedTaskIds: string[];
|
|
1020
|
+
/** The perspective ID that triggered scoping */
|
|
1021
|
+
perspective: string;
|
|
1022
|
+
/** Summary of release impact (counts, not full slug lists) */
|
|
1023
|
+
impactSummary: {
|
|
1024
|
+
added: number;
|
|
1025
|
+
modified: number;
|
|
1026
|
+
removed: number;
|
|
1027
|
+
};
|
|
1028
|
+
}
|
|
986
1029
|
/** Full provenance metadata for an evaluation report */
|
|
987
1030
|
export interface ReportProvenance {
|
|
988
1031
|
/** Which feature areas were evaluated */
|
|
989
1032
|
areas: string[];
|
|
1033
|
+
/** Release auto-scope metadata (when perspective evaluation was scoped to affected tasks) */
|
|
1034
|
+
autoScope?: ReportAutoScope;
|
|
990
1035
|
/** Content hash of the documentation context at eval time */
|
|
991
1036
|
contextHash?: string;
|
|
992
1037
|
/**
|
|
@@ -653,6 +653,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
|
|
|
653
653
|
allowedOrigin: raw.allowedOrigin ?? [],
|
|
654
654
|
allowedOrigins: raw.allowedOrigins ?? [],
|
|
655
655
|
area: raw.area,
|
|
656
|
+
autoScope: raw.autoScope ?? true,
|
|
656
657
|
before: raw.before,
|
|
657
658
|
cache: raw.cache ?? true,
|
|
658
659
|
changedDocs: raw.changedDocs,
|
|
@@ -19,6 +19,7 @@ export function createPipelineCommand() {
|
|
|
19
19
|
.option("--skip-eval", "Recalculate from existing eval results", false)
|
|
20
20
|
.option("--no-cache", "Bypass all pipeline-level caching")
|
|
21
21
|
.option("--no-remote-cache", "Disable Content Lake cache lookup (local cache still active)")
|
|
22
|
+
.option("--no-auto-scope", "Disable release-aware auto-scoping (evaluate all tasks even when a perspective is set)")
|
|
22
23
|
.option("-a, --area <areas>", "Scope to feature areas (comma-separated)")
|
|
23
24
|
.option("-t, --task <id>", "Scope to specific task ID")
|
|
24
25
|
.option("--changed-docs <slugs>", "Auto-scope to tasks affected by these document slugs")
|
package/dist/commands/publish.js
CHANGED
|
@@ -4,11 +4,16 @@
|
|
|
4
4
|
* Uses ctx.docFetcher (the DocFetcher port) for all fetching. The adapter
|
|
5
5
|
* handles GROQ queries, perspective diffing, document overlays, and URL
|
|
6
6
|
* fetching. This step orchestrates the call and writes metadata files.
|
|
7
|
+
*
|
|
8
|
+
* When a source-level perspective is active and release impact data is
|
|
9
|
+
* available, this step computes which tasks are affected by the release
|
|
10
|
+
* and stores a `releaseAutoScope` entry in PipelineState. Downstream
|
|
11
|
+
* steps (GenerateConfigsStep, RunEvalStep) use this to narrow scope.
|
|
7
12
|
*/
|
|
8
|
-
import type
|
|
13
|
+
import { type AppContext, type PipelineState, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
9
14
|
export declare class FetchDocsStep implements PipelineStep {
|
|
10
15
|
readonly name = "fetch-docs";
|
|
11
16
|
check(): ValidationIssue[];
|
|
12
|
-
execute(ctx: AppContext): Promise<StepResult>;
|
|
17
|
+
execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
|
|
13
18
|
cacheInputs(ctx: AppContext): string[];
|
|
14
19
|
}
|
|
@@ -4,9 +4,15 @@
|
|
|
4
4
|
* Uses ctx.docFetcher (the DocFetcher port) for all fetching. The adapter
|
|
5
5
|
* handles GROQ queries, perspective diffing, document overlays, and URL
|
|
6
6
|
* fetching. This step orchestrates the call and writes metadata files.
|
|
7
|
+
*
|
|
8
|
+
* When a source-level perspective is active and release impact data is
|
|
9
|
+
* available, this step computes which tasks are affected by the release
|
|
10
|
+
* and stores a `releaseAutoScope` entry in PipelineState. Downstream
|
|
11
|
+
* steps (GenerateConfigsStep, RunEvalStep) use this to narrow scope.
|
|
7
12
|
*/
|
|
8
13
|
import { mkdirSync, writeFileSync } from "fs";
|
|
9
14
|
import { join } from "path";
|
|
15
|
+
import { isIdRef, isPathRef, isSlugRef, } from "../../_vendor/ailf-core/index.js";
|
|
10
16
|
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
11
17
|
import { checkCanonicalContextsExist } from "../../pipeline/checks.js";
|
|
12
18
|
import { loadSource } from "../../sources.js";
|
|
@@ -16,7 +22,7 @@ export class FetchDocsStep {
|
|
|
16
22
|
check() {
|
|
17
23
|
return [];
|
|
18
24
|
}
|
|
19
|
-
async execute(ctx) {
|
|
25
|
+
async execute(ctx, state) {
|
|
20
26
|
if (ctx.config.skipFetch) {
|
|
21
27
|
return { status: "skipped", reason: "--skip-fetch" };
|
|
22
28
|
}
|
|
@@ -55,8 +61,11 @@ export class FetchDocsStep {
|
|
|
55
61
|
};
|
|
56
62
|
}
|
|
57
63
|
// Execute the fetch via the DocFetcher port
|
|
64
|
+
let releaseImpact;
|
|
58
65
|
try {
|
|
59
66
|
const result = await ctx.docFetcher.fetch(tasksWithDocs, resolvedSource);
|
|
67
|
+
// Capture release impact for auto-scoping
|
|
68
|
+
releaseImpact = result.metadata?.releaseImpact;
|
|
60
69
|
// Write metadata files for downstream pipeline consumption
|
|
61
70
|
if (result.metadata) {
|
|
62
71
|
writeMetadataFiles(ctx.config.rootDir, result.metadata);
|
|
@@ -80,6 +89,23 @@ export class FetchDocsStep {
|
|
|
80
89
|
status: "failed",
|
|
81
90
|
};
|
|
82
91
|
}
|
|
92
|
+
// -----------------------------------------------------------------
|
|
93
|
+
// Release auto-scope: compute which tasks are affected by the
|
|
94
|
+
// content release. This only activates when:
|
|
95
|
+
// 1. A source-level perspective is active
|
|
96
|
+
// 2. Release impact data was computed (docs were fetched)
|
|
97
|
+
// 3. Auto-scoping was not explicitly disabled (--no-auto-scope)
|
|
98
|
+
// 4. No explicit area/task filters are set (those take precedence)
|
|
99
|
+
// -----------------------------------------------------------------
|
|
100
|
+
if (resolvedSource.perspective &&
|
|
101
|
+
releaseImpact &&
|
|
102
|
+
!ctx.config.noAutoScope) {
|
|
103
|
+
const autoScope = computeAutoScope(tasks, releaseImpact, resolvedSource.perspective);
|
|
104
|
+
if (autoScope) {
|
|
105
|
+
state.releaseAutoScope = autoScope;
|
|
106
|
+
logAutoScope(autoScope);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
83
109
|
const durationMs = Date.now() - start;
|
|
84
110
|
return {
|
|
85
111
|
durationMs,
|
|
@@ -133,3 +159,106 @@ function writeMetadataFiles(rootDir, metadata) {
|
|
|
133
159
|
console.log(" 📄 URL fetch metadata written to contexts/url-fetch.json");
|
|
134
160
|
}
|
|
135
161
|
}
|
|
162
|
+
// ---------------------------------------------------------------------------
|
|
163
|
+
// Release auto-scope computation
|
|
164
|
+
// ---------------------------------------------------------------------------
|
|
165
|
+
/**
|
|
166
|
+
* Extract display slugs from a task's canonical doc refs.
|
|
167
|
+
*
|
|
168
|
+
* Uses slug, path (last segment), or id-based slug annotation.
|
|
169
|
+
* Perspective refs are excluded — they resolve at fetch time, not here.
|
|
170
|
+
*/
|
|
171
|
+
function extractSlugsFromTask(task) {
|
|
172
|
+
const slugs = [];
|
|
173
|
+
for (const ref of task.canonicalDocs) {
|
|
174
|
+
if (isSlugRef(ref)) {
|
|
175
|
+
slugs.push(ref.slug);
|
|
176
|
+
}
|
|
177
|
+
else if (isPathRef(ref)) {
|
|
178
|
+
// Path refs use the last segment as slug (e.g., "groq/groq-introduction" → "groq-introduction")
|
|
179
|
+
const segments = ref.path.split("/");
|
|
180
|
+
slugs.push(segments[segments.length - 1]);
|
|
181
|
+
}
|
|
182
|
+
else if (isIdRef(ref) && ref.slug) {
|
|
183
|
+
// IdDocRef may carry an optional slug annotation for display
|
|
184
|
+
slugs.push(ref.slug);
|
|
185
|
+
}
|
|
186
|
+
// PerspectiveDocRef — one-to-many, slugs unknown at this point; skip
|
|
187
|
+
}
|
|
188
|
+
return slugs;
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Compute which tasks are affected by a content release.
|
|
192
|
+
*
|
|
193
|
+
* Builds a reverse mapping (slug → task IDs) from all tasks' canonical
|
|
194
|
+
* doc refs, then intersects with the release impact's added/modified slugs.
|
|
195
|
+
*
|
|
196
|
+
* Returns null if auto-scoping should not apply (e.g., all tasks affected).
|
|
197
|
+
*/
|
|
198
|
+
function computeAutoScope(tasks, releaseImpact, perspective) {
|
|
199
|
+
// Build reverse mapping: slug → task IDs
|
|
200
|
+
const slugToTaskIds = new Map();
|
|
201
|
+
for (const task of tasks) {
|
|
202
|
+
for (const slug of extractSlugsFromTask(task)) {
|
|
203
|
+
let taskIds = slugToTaskIds.get(slug);
|
|
204
|
+
if (!taskIds) {
|
|
205
|
+
taskIds = new Set();
|
|
206
|
+
slugToTaskIds.set(slug, taskIds);
|
|
207
|
+
}
|
|
208
|
+
taskIds.add(task.id);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
// Compute affected slugs (added + modified — these have changed content)
|
|
212
|
+
const affectedSlugs = new Set([
|
|
213
|
+
...releaseImpact.added,
|
|
214
|
+
...releaseImpact.modified,
|
|
215
|
+
]);
|
|
216
|
+
// Find tasks affected by the release
|
|
217
|
+
const affectedTaskIds = new Set();
|
|
218
|
+
for (const slug of affectedSlugs) {
|
|
219
|
+
const taskIds = slugToTaskIds.get(slug);
|
|
220
|
+
if (taskIds) {
|
|
221
|
+
for (const id of taskIds) {
|
|
222
|
+
affectedTaskIds.add(id);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
// If all tasks are affected, auto-scoping adds no value
|
|
227
|
+
if (affectedTaskIds.size >= tasks.length)
|
|
228
|
+
return null;
|
|
229
|
+
// If no tasks are affected, still return the scope info so downstream
|
|
230
|
+
// steps know nothing needs evaluation (avoids wasting eval budget)
|
|
231
|
+
const allTaskIds = new Set(tasks.map((t) => t.id));
|
|
232
|
+
const skippedTaskIds = [...allTaskIds].filter((id) => !affectedTaskIds.has(id));
|
|
233
|
+
return {
|
|
234
|
+
affectedTaskIds: [...affectedTaskIds],
|
|
235
|
+
skippedTaskIds,
|
|
236
|
+
perspective,
|
|
237
|
+
impact: {
|
|
238
|
+
added: releaseImpact.added,
|
|
239
|
+
modified: releaseImpact.modified,
|
|
240
|
+
removed: releaseImpact.removed,
|
|
241
|
+
unchanged: releaseImpact.unchanged,
|
|
242
|
+
},
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Log auto-scope results to the console.
|
|
247
|
+
*/
|
|
248
|
+
function logAutoScope(autoScope) {
|
|
249
|
+
const { affectedTaskIds, skippedTaskIds, impact } = autoScope;
|
|
250
|
+
const totalTasks = affectedTaskIds.length + skippedTaskIds.length;
|
|
251
|
+
const affectedDocs = impact.added.length + impact.modified.length;
|
|
252
|
+
console.log(` 🎯 Release auto-scope: ${affectedTaskIds.length} of ${totalTasks} tasks affected`);
|
|
253
|
+
if (affectedDocs > 0) {
|
|
254
|
+
console.log(` ${affectedDocs} doc(s) changed → ${affectedTaskIds.length} task(s) to evaluate`);
|
|
255
|
+
}
|
|
256
|
+
if (affectedTaskIds.length > 0) {
|
|
257
|
+
for (const id of affectedTaskIds) {
|
|
258
|
+
console.log(` ✓ ${id}`);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
if (skippedTaskIds.length > 0) {
|
|
262
|
+
console.log(` ⏭ ${skippedTaskIds.length} task(s) skipped (no docs affected by release)`);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
* derived from AppContext. No env bridge needed — source is resolved and
|
|
6
6
|
* passed directly.
|
|
7
7
|
*/
|
|
8
|
-
import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
|
+
import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
9
9
|
export declare class GenerateConfigsStep implements PipelineStep {
|
|
10
10
|
readonly name = "generate-configs";
|
|
11
11
|
check(ctx: AppContext): ValidationIssue[];
|
|
12
|
-
execute(ctx: AppContext): Promise<StepResult>;
|
|
12
|
+
execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
|
|
13
13
|
cacheInputs(ctx: AppContext): string[];
|
|
14
14
|
}
|
|
@@ -17,7 +17,7 @@ export class GenerateConfigsStep {
|
|
|
17
17
|
const issues = validateModelsYaml(ctx.config.rootDir);
|
|
18
18
|
return issues.filter((i) => i.severity === "error");
|
|
19
19
|
}
|
|
20
|
-
async execute(ctx) {
|
|
20
|
+
async execute(ctx, state) {
|
|
21
21
|
const start = Date.now();
|
|
22
22
|
// Resolve source once with typed overrides
|
|
23
23
|
const overrides = configToSourceOverrides(ctx.config);
|
|
@@ -40,6 +40,17 @@ export class GenerateConfigsStep {
|
|
|
40
40
|
status: "failed",
|
|
41
41
|
};
|
|
42
42
|
}
|
|
43
|
+
// Release auto-scope: narrow tasks to those affected by the release.
|
|
44
|
+
// When explicit area/task filters are also active, this produces the
|
|
45
|
+
// intersection (only tasks matching BOTH the explicit filter AND the
|
|
46
|
+
// release impact are included).
|
|
47
|
+
if (state.releaseAutoScope && !ctx.config.noAutoScope) {
|
|
48
|
+
const scopedIds = new Set(state.releaseAutoScope.affectedTaskIds);
|
|
49
|
+
const beforeCount = tasks.length;
|
|
50
|
+
tasks = tasks.filter((t) => scopedIds.has(t.id));
|
|
51
|
+
console.log(` 🎯 Auto-scoped to ${tasks.length} of ${beforeCount} task(s) affected by release` +
|
|
52
|
+
` (${beforeCount - tasks.length} skipped, --no-auto-scope to override)`);
|
|
53
|
+
}
|
|
43
54
|
try {
|
|
44
55
|
generateConfigs({
|
|
45
56
|
allowedOrigins: ctx.config.allowedOrigins,
|
|
@@ -60,7 +60,21 @@ export class PublishReportStep {
|
|
|
60
60
|
evalFingerprint: state.evalFingerprint ?? this.options.evalFingerprint,
|
|
61
61
|
promptfooUrls: state.promptfooUrls ?? this.options.promptfooUrls,
|
|
62
62
|
};
|
|
63
|
-
|
|
63
|
+
// Build auto-scope provenance from pipeline state
|
|
64
|
+
const autoScope = state.releaseAutoScope
|
|
65
|
+
? {
|
|
66
|
+
enabled: true,
|
|
67
|
+
affectedTaskIds: state.releaseAutoScope.affectedTaskIds,
|
|
68
|
+
skippedTaskIds: state.releaseAutoScope.skippedTaskIds,
|
|
69
|
+
perspective: state.releaseAutoScope.perspective,
|
|
70
|
+
impactSummary: {
|
|
71
|
+
added: state.releaseAutoScope.impact.added.length,
|
|
72
|
+
modified: state.releaseAutoScope.impact.modified.length,
|
|
73
|
+
removed: state.releaseAutoScope.impact.removed.length,
|
|
74
|
+
},
|
|
75
|
+
}
|
|
76
|
+
: undefined;
|
|
77
|
+
const provenanceInput = buildProvenanceInput(summary, ctx, provenanceOptions, autoScope);
|
|
64
78
|
const provenance = buildProvenance(provenanceInput);
|
|
65
79
|
// Create report
|
|
66
80
|
const now = new Date().toISOString();
|
|
@@ -118,7 +132,7 @@ export class PublishReportStep {
|
|
|
118
132
|
/**
|
|
119
133
|
* Assemble provenance input from the score summary and pipeline context.
|
|
120
134
|
*/
|
|
121
|
-
function buildProvenanceInput(summary, ctx, options) {
|
|
135
|
+
function buildProvenanceInput(summary, ctx, options, autoScope) {
|
|
122
136
|
const areas = summary.scores.map((s) => s.feature);
|
|
123
137
|
const mode = ctx.config.mode;
|
|
124
138
|
// Read document IDs from config
|
|
@@ -146,6 +160,7 @@ function buildProvenanceInput(summary, ctx, options) {
|
|
|
146
160
|
: undefined;
|
|
147
161
|
return {
|
|
148
162
|
areas,
|
|
163
|
+
autoScope,
|
|
149
164
|
callerGit: ctx.config.callerGit,
|
|
150
165
|
evalFingerprint,
|
|
151
166
|
mode,
|
|
@@ -46,8 +46,20 @@ export class RunEvalStep {
|
|
|
46
46
|
...(ctx.config.tasks ? { taskIds: ctx.config.tasks } : {}),
|
|
47
47
|
}
|
|
48
48
|
: undefined;
|
|
49
|
-
|
|
50
|
-
|
|
49
|
+
let tasks = await ctx.taskSource.loadTasks(filter);
|
|
50
|
+
// Release auto-scope: narrow to affected tasks (mirrors GenerateConfigsStep)
|
|
51
|
+
if (state.releaseAutoScope && !ctx.config.noAutoScope) {
|
|
52
|
+
const scopedIds = new Set(state.releaseAutoScope.affectedTaskIds);
|
|
53
|
+
tasks = tasks.filter((t) => scopedIds.has(t.id));
|
|
54
|
+
}
|
|
55
|
+
// Only check context files for tasks that have canonical docs.
|
|
56
|
+
// Tasks without canonical docs are skipped by FetchDocsStep (they
|
|
57
|
+
// have no docs to fetch), so no context file is written for them.
|
|
58
|
+
// The generated Promptfoo config still includes their "without-docs"
|
|
59
|
+
// variant (testing model knowledge alone), which doesn't need a
|
|
60
|
+
// context file.
|
|
61
|
+
const tasksWithDocs = tasks.filter((t) => t.canonicalDocs.length > 0);
|
|
62
|
+
const taskIds = tasksWithDocs.map((t) => t.id);
|
|
51
63
|
const contextIssues = checkCanonicalContextsExist(rootDir, taskIds);
|
|
52
64
|
const contextErrors = contextIssues.filter((i) => i.severity === "error");
|
|
53
65
|
if (contextErrors.length > 0) {
|
|
@@ -198,8 +198,15 @@ export function expandTask(task, rubricConfig, mode = "baseline") {
|
|
|
198
198
|
/**
|
|
199
199
|
* Convert a TaskDefinition (from @sanity/ailf-core) to the local
|
|
200
200
|
* SingleTaskDefinition format used by expandTask().
|
|
201
|
+
*
|
|
202
|
+
* When a task has no canonical docs, the `docs` var is set to empty string
|
|
203
|
+
* instead of a file path. This prevents Promptfoo from trying to read a
|
|
204
|
+
* context file that was never created by FetchDocsStep (which skips tasks
|
|
205
|
+
* without canonical docs). The gold entry still runs — it just tests model
|
|
206
|
+
* knowledge alone, same as the baseline variant.
|
|
201
207
|
*/
|
|
202
208
|
function taskDefinitionToSingle(task) {
|
|
209
|
+
const hasDocs = task.canonicalDocs.length > 0;
|
|
203
210
|
return {
|
|
204
211
|
assert: task.assertions.map((a) => ({ ...a })),
|
|
205
212
|
baseline: task.baseline,
|
|
@@ -208,7 +215,7 @@ function taskDefinitionToSingle(task) {
|
|
|
208
215
|
featureArea: task.featureArea,
|
|
209
216
|
id: task.id,
|
|
210
217
|
vars: {
|
|
211
|
-
docs: `file://contexts/canonical/${task.id}.md
|
|
218
|
+
docs: hasDocs ? `file://contexts/canonical/${task.id}.md` : "",
|
|
212
219
|
task: task.taskPrompt,
|
|
213
220
|
...task.extraVars,
|
|
214
221
|
},
|
|
@@ -38,6 +38,7 @@ export function mapRequestToConfig(request, rootDir) {
|
|
|
38
38
|
discoveryReportEnabled: request.discoveryReport ?? false,
|
|
39
39
|
publishEnabled: request.publish ?? publishDefault,
|
|
40
40
|
publishTag: request.publishTag,
|
|
41
|
+
noAutoScope: request.noAutoScope ?? false,
|
|
41
42
|
noCache: request.noCache ?? false,
|
|
42
43
|
noRemoteCache: request.noRemoteCache ?? false,
|
|
43
44
|
graderReplications: request.graderReplications,
|
|
@@ -12,10 +12,12 @@
|
|
|
12
12
|
* @see docs/design-docs/report-store/architecture.md — Provenance collection
|
|
13
13
|
*/
|
|
14
14
|
import type { ResolvedSourceConfig } from "../sources.js";
|
|
15
|
-
import type { EvalMode, PromptfooUrlEntry, ReportProvenance } from "./types.js";
|
|
15
|
+
import type { EvalMode, PromptfooUrlEntry, ReportAutoScope, ReportProvenance } from "./types.js";
|
|
16
16
|
export interface ProvenanceInput {
|
|
17
17
|
/** Feature areas that were evaluated */
|
|
18
18
|
areas: string[];
|
|
19
|
+
/** Release auto-scope metadata (when perspective evaluation was scoped) */
|
|
20
|
+
autoScope?: ReportAutoScope;
|
|
19
21
|
/**
|
|
20
22
|
* Git metadata from the *calling* repository (cross-repo evaluations).
|
|
21
23
|
* When provided, overrides CI env var detection so provenance attributes
|