@sanity/ailf 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/task-sources/content-lake-task-source.js +15 -7
- package/dist/orchestration/steps/fetch-docs-step.js +23 -9
- package/dist/orchestration/steps/generate-configs-step.d.ts +15 -0
- package/dist/orchestration/steps/generate-configs-step.js +44 -0
- package/dist/orchestration/steps/run-eval-step.js +14 -0
- package/dist/webhook/eval-request-handler.js +4 -0
- package/package.json +1 -1
|
@@ -28,7 +28,13 @@
|
|
|
28
28
|
*/
|
|
29
29
|
const TASKS_QUERY = /* groq */ `
|
|
30
30
|
*[_type == "ailf.task"
|
|
31
|
-
&& (
|
|
31
|
+
&& (
|
|
32
|
+
!defined($areas)
|
|
33
|
+
// Current field name
|
|
34
|
+
|| area->areaId.current in $areas
|
|
35
|
+
// Legacy field name (pre-schema-rename documents)
|
|
36
|
+
|| featureArea->areaId.current in $areas
|
|
37
|
+
)
|
|
32
38
|
&& (!defined($taskIds) || id.current in $taskIds)
|
|
33
39
|
&& (
|
|
34
40
|
// Status-based filtering (unified — replaces execution.enabled)
|
|
@@ -39,13 +45,15 @@ const TASKS_QUERY = /* groq */ `
|
|
|
39
45
|
|| (defined($taskIds) && status != "archived")
|
|
40
46
|
)
|
|
41
47
|
&& (!defined($tags) || count((tags)[@ in $tags]) > 0)
|
|
42
|
-
] | order(area->areaId.current asc, id.current asc) {
|
|
48
|
+
] | order(coalesce(area->areaId.current, featureArea->areaId.current) asc, id.current asc) {
|
|
43
49
|
"taskId": id.current,
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
50
|
+
// Coalesce current and legacy field names so documents created before
|
|
51
|
+
// the schema rename are still readable.
|
|
52
|
+
"title": coalesce(title, description),
|
|
53
|
+
"areaId": coalesce(area->areaId.current, featureArea->areaId.current),
|
|
54
|
+
"promptText": coalesce(promptText, taskPrompt),
|
|
47
55
|
docCoverage,
|
|
48
|
-
"contextDocs": contextDocs[] {
|
|
56
|
+
"contextDocs": coalesce(contextDocs, canonicalDocs)[] {
|
|
49
57
|
refType,
|
|
50
58
|
"slug": doc->slug.current,
|
|
51
59
|
"docRefId": doc->_id,
|
|
@@ -55,7 +63,7 @@ const TASKS_QUERY = /* groq */ `
|
|
|
55
63
|
perspective,
|
|
56
64
|
reason
|
|
57
65
|
},
|
|
58
|
-
assertions,
|
|
66
|
+
"assertions": coalesce(assertions, assert),
|
|
59
67
|
rawAssert,
|
|
60
68
|
baseline,
|
|
61
69
|
tags,
|
|
@@ -29,15 +29,29 @@ export class FetchDocsStep {
|
|
|
29
29
|
return { status: "skipped", reason: "--skip-fetch" };
|
|
30
30
|
}
|
|
31
31
|
const start = Date.now();
|
|
32
|
-
// Load tasks
|
|
33
|
-
//
|
|
34
|
-
//
|
|
35
|
-
//
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
32
|
+
// Load tasks — use the same source as GenerateConfigsStep to avoid
|
|
33
|
+
// a mismatch where configs reference context files that were never
|
|
34
|
+
// fetched.
|
|
35
|
+
//
|
|
36
|
+
// Content Lake path: use ctx.taskSource (ContentLakeTaskSource) which
|
|
37
|
+
// loads Studio-owned ailf.task documents via GROQ.
|
|
38
|
+
// Filesystem path: load from .task.ts files (repo/inline tasks).
|
|
39
|
+
let allTasks;
|
|
40
|
+
if (ctx.config.taskSourceType === "content-lake") {
|
|
41
|
+
const filter = {
|
|
42
|
+
...(ctx.config.areas?.length ? { areas: ctx.config.areas } : {}),
|
|
43
|
+
...(ctx.config.tasks?.length ? { taskIds: ctx.config.tasks } : {}),
|
|
44
|
+
...(ctx.config.tags?.length ? { tags: ctx.config.tags } : {}),
|
|
45
|
+
};
|
|
46
|
+
allTasks = await ctx.taskSource.loadTasks(Object.keys(filter).length > 0 ? filter : undefined);
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
allTasks = await loadPipelineTasks({
|
|
50
|
+
rootDir: ctx.config.rootDir,
|
|
51
|
+
mode: ctx.config.mode,
|
|
52
|
+
repoTasksPath: ctx.config.repoTasksPath,
|
|
53
|
+
});
|
|
54
|
+
}
|
|
41
55
|
// Bridge: narrow to literacy tasks for canonical doc access
|
|
42
56
|
const literacyTasks = allTasks.filter((t) => t.mode === "literacy");
|
|
43
57
|
const tasksWithDocs = literacyTasks.filter((t) => (t.context?.docs?.length ?? 0) > 0);
|
|
@@ -18,6 +18,21 @@ export declare class GenerateConfigsStep implements PipelineStep {
|
|
|
18
18
|
private compileLiteracyVariants;
|
|
19
19
|
private compileSingleMode;
|
|
20
20
|
private loadTasks;
|
|
21
|
+
/**
|
|
22
|
+
* Load tasks from the Content Lake via ctx.taskSource.
|
|
23
|
+
*
|
|
24
|
+
* The ContentLakeTaskSource adapter handles area/task/tag filtering
|
|
25
|
+
* in the GROQ query itself, so we build a FilterOptions and pass it
|
|
26
|
+
* through rather than filtering in-memory after loading.
|
|
27
|
+
*/
|
|
28
|
+
private loadTasksFromContentLake;
|
|
29
|
+
/**
|
|
30
|
+
* Load tasks from filesystem .task.ts files.
|
|
31
|
+
*
|
|
32
|
+
* This is the original path used for repo-based and inline tasks.
|
|
33
|
+
* It scans tasks/{mode}/ and optionally --repo-tasks-path.
|
|
34
|
+
*/
|
|
35
|
+
private loadTasksFromFilesystem;
|
|
21
36
|
private applyFilters;
|
|
22
37
|
/**
|
|
23
38
|
* Build a descriptive error message when no tasks match the current filters.
|
|
@@ -209,6 +209,50 @@ export class GenerateConfigsStep {
|
|
|
209
209
|
// Task loading — unified for all modes
|
|
210
210
|
// ---------------------------------------------------------------------------
|
|
211
211
|
async loadTasks(ctx, mode, state) {
|
|
212
|
+
// Content Lake path — use ctx.taskSource (ContentLakeTaskSource) which
|
|
213
|
+
// loads ailf.task documents via GROQ. This is the only path that sees
|
|
214
|
+
// Studio-owned tasks (ownership: "studio").
|
|
215
|
+
if (ctx.config.taskSourceType === "content-lake") {
|
|
216
|
+
return this.loadTasksFromContentLake(ctx, state);
|
|
217
|
+
}
|
|
218
|
+
// Filesystem path — load from .task.ts files (repo tasks, inline tasks).
|
|
219
|
+
return this.loadTasksFromFilesystem(ctx, mode, state);
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Load tasks from the Content Lake via ctx.taskSource.
|
|
223
|
+
*
|
|
224
|
+
* The ContentLakeTaskSource adapter handles area/task/tag filtering
|
|
225
|
+
* in the GROQ query itself, so we build a FilterOptions and pass it
|
|
226
|
+
* through rather than filtering in-memory after loading.
|
|
227
|
+
*/
|
|
228
|
+
async loadTasksFromContentLake(ctx, state) {
|
|
229
|
+
const filter = {
|
|
230
|
+
...(ctx.config.areas?.length ? { areas: ctx.config.areas } : {}),
|
|
231
|
+
...(ctx.config.tasks?.length ? { taskIds: ctx.config.tasks } : {}),
|
|
232
|
+
...(ctx.config.tags?.length ? { tags: ctx.config.tags } : {}),
|
|
233
|
+
};
|
|
234
|
+
const tasks = await ctx.taskSource.loadTasks(Object.keys(filter).length > 0 ? filter : undefined);
|
|
235
|
+
// Capture loaded IDs for error messages (same as filesystem path)
|
|
236
|
+
this.lastLoadedTaskIds = tasks
|
|
237
|
+
.map((t) => t.id)
|
|
238
|
+
.filter((id) => !!id);
|
|
239
|
+
// Release auto-scope
|
|
240
|
+
if (state.releaseAutoScope && !ctx.config.noAutoScope) {
|
|
241
|
+
const scopedIds = new Set(state.releaseAutoScope.affectedTaskIds);
|
|
242
|
+
const beforeCount = tasks.length;
|
|
243
|
+
const scoped = tasks.filter((t) => "id" in t && scopedIds.has(t.id));
|
|
244
|
+
ctx.logger.info(` 🎯 Auto-scoped to ${scoped.length} of ${beforeCount} task(s) affected by release`);
|
|
245
|
+
return scoped;
|
|
246
|
+
}
|
|
247
|
+
return tasks;
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* Load tasks from filesystem .task.ts files.
|
|
251
|
+
*
|
|
252
|
+
* This is the original path used for repo-based and inline tasks.
|
|
253
|
+
* It scans tasks/{mode}/ and optionally --repo-tasks-path.
|
|
254
|
+
*/
|
|
255
|
+
async loadTasksFromFilesystem(ctx, mode, state) {
|
|
212
256
|
const { resolve } = await import("path");
|
|
213
257
|
const { discoverTsTaskFiles, loadTsTaskFile } = await import("../../adapters/task-sources/task-file-loader.js");
|
|
214
258
|
const { resolveVendoredSubdir } = await import("../../pipeline/compiler/config-loader.js");
|
|
@@ -113,6 +113,11 @@ export class RunEvalStep {
|
|
|
113
113
|
// required eval modes were satisfied from the remote cache.
|
|
114
114
|
state.remoteCacheHits ??= new Set();
|
|
115
115
|
state.remoteCacheHits.add(this.mode);
|
|
116
|
+
// Carry forward Promptfoo share URLs from the cached report
|
|
117
|
+
if (remoteCacheResult.promptfooUrls?.length) {
|
|
118
|
+
state.promptfooUrls ??= [];
|
|
119
|
+
state.promptfooUrls.push(...remoteCacheResult.promptfooUrls);
|
|
120
|
+
}
|
|
116
121
|
// Capture the restored score-summary from remote cache
|
|
117
122
|
const cachedSummaryPath = resolve(rootDir, "results", "latest", "score-summary.json");
|
|
118
123
|
if (existsSync(cachedSummaryPath)) {
|
|
@@ -189,6 +194,14 @@ export class RunEvalStep {
|
|
|
189
194
|
mode: this.mode,
|
|
190
195
|
});
|
|
191
196
|
}
|
|
197
|
+
// Extract Promptfoo share URL from eval results (Step 3b)
|
|
198
|
+
if (ctx.evalRunner.extractShareUrl) {
|
|
199
|
+
const shareUrl = ctx.evalRunner.extractShareUrl(resolve(rootDir, resultsFileForMode(this.mode)));
|
|
200
|
+
if (shareUrl) {
|
|
201
|
+
state.promptfooUrls ??= [];
|
|
202
|
+
state.promptfooUrls.push({ mode: this.mode, url: shareUrl });
|
|
203
|
+
}
|
|
204
|
+
}
|
|
192
205
|
const durationMs = Date.now() - start;
|
|
193
206
|
return {
|
|
194
207
|
durationMs,
|
|
@@ -224,6 +237,7 @@ async function checkRemoteCache(fingerprint, reportStore, rootDir) {
|
|
|
224
237
|
console.log(` ℹ️ Fingerprint: ${fingerprint.slice(0, 16)}... (${queryMs}ms)`);
|
|
225
238
|
return {
|
|
226
239
|
completedAt: cachedReport.completedAt,
|
|
240
|
+
promptfooUrls: cachedReport.provenance?.promptfooUrls,
|
|
227
241
|
reportId: cachedReport.id,
|
|
228
242
|
};
|
|
229
243
|
}
|
|
@@ -173,6 +173,10 @@ async function dispatchGitHubEval(repo, payload, config) {
|
|
|
173
173
|
projectId: payload.projectId,
|
|
174
174
|
publish: true,
|
|
175
175
|
source: "production",
|
|
176
|
+
// Studio-initiated evals always use Content Lake as the task source.
|
|
177
|
+
// Without this, the pipeline only loads filesystem .task.ts files and
|
|
178
|
+
// Studio-owned tasks are invisible.
|
|
179
|
+
taskMode: "content-lake",
|
|
176
180
|
// Release-scoped fields
|
|
177
181
|
...(hasPerspective ? { perspective: payload.perspective } : {}),
|
|
178
182
|
// Task-scoped fields
|