@sanity/ailf 2.2.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/rubrics.ts +3 -3
- package/dist/_vendor/ailf-core/types/index.d.ts +25 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +15 -7
- package/dist/commands/calculate-scores.js +7 -2
- package/dist/commands/capture-list.d.ts +1 -1
- package/dist/commands/capture-list.js +6 -3
- package/dist/commands/compare.js +11 -7
- package/dist/commands/explain-handler.js +22 -24
- package/dist/commands/fetch-docs.js +4 -2
- package/dist/commands/generate-configs.js +6 -2
- package/dist/commands/pipeline-action.js +8 -24
- package/dist/commands/pipeline.js +1 -1
- package/dist/commands/pr-comment.js +6 -2
- package/dist/commands/publish.d.ts +1 -0
- package/dist/commands/publish.js +12 -8
- package/dist/commands/remote-pipeline.js +1 -1
- package/dist/commands/remote-results.d.ts +8 -8
- package/dist/commands/remote-results.js +7 -7
- package/dist/commands/shared/options.d.ts +8 -0
- package/dist/commands/shared/options.js +10 -0
- package/dist/commands/shared/resolve-output-dir.d.ts +27 -0
- package/dist/commands/shared/resolve-output-dir.js +36 -0
- package/dist/composition-root.js +1 -1
- package/dist/config/rubrics.ts +3 -3
- package/dist/orchestration/build-app-context.js +1 -1
- package/dist/orchestration/steps/fetch-docs-step.js +23 -9
- package/dist/orchestration/steps/gap-analysis-step.js +86 -75
- package/dist/orchestration/steps/generate-configs-step.d.ts +15 -0
- package/dist/orchestration/steps/generate-configs-step.js +56 -0
- package/dist/orchestration/steps/run-eval-step.js +14 -0
- package/dist/pipeline/calculate-scores.js +113 -2
- package/dist/pipeline/compare.js +50 -19
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +64 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +6 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +14 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +3 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +1 -27
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +2 -9
- package/dist/pipeline/compiler/rubric-resolution.d.ts +40 -0
- package/dist/pipeline/compiler/rubric-resolution.js +52 -0
- package/dist/pipeline/compiler/scoring-bridge.js +59 -7
- package/dist/pipeline/provenance.js +7 -1
- package/dist/pipeline/validate.d.ts +5 -4
- package/dist/pipeline/validate.js +34 -113
- package/dist/webhook/eval-request-handler.js +4 -0
- package/package.json +1 -1
|
@@ -51,10 +51,11 @@ export declare function validateReferenceSolutions(rootDir: string): ValidationI
|
|
|
51
51
|
*/
|
|
52
52
|
export declare function validateRubricsYaml(rootDir: string): ValidationIssue[];
|
|
53
53
|
/**
|
|
54
|
-
* Check that
|
|
55
|
-
*
|
|
56
|
-
*
|
|
57
|
-
*
|
|
54
|
+
* Check that task definition files exist.
|
|
55
|
+
*
|
|
56
|
+
* Tasks live as `*.task.ts` files in mode subdirectories (e.g.
|
|
57
|
+
* `tasks/literacy/groq.task.ts`). Legacy YAML task files are no longer
|
|
58
|
+
* used. Warns only if no task files are found at all.
|
|
58
59
|
*/
|
|
59
60
|
export declare function validateTaskFiles(rootDir: string): ValidationIssue[];
|
|
60
61
|
/**
|
|
@@ -9,10 +9,9 @@
|
|
|
9
9
|
*/
|
|
10
10
|
import fs from "fs";
|
|
11
11
|
import path from "path";
|
|
12
|
-
import { load } from "js-yaml";
|
|
13
12
|
import { tryLoadConfigFile } from "./compiler/config-loader.js";
|
|
14
13
|
import { resolveMappings } from "./resolve-mappings.js";
|
|
15
|
-
import { FeatureRegistrySchema, formatZodErrors, RubricConfigSchema,
|
|
14
|
+
import { FeatureRegistrySchema, formatZodErrors, RubricConfigSchema, ThresholdConfigSchema, } from "./schemas.js";
|
|
16
15
|
// ---------------------------------------------------------------------------
|
|
17
16
|
// Helpers
|
|
18
17
|
// ---------------------------------------------------------------------------
|
|
@@ -248,10 +247,11 @@ export function validateRubricsYaml(rootDir) {
|
|
|
248
247
|
return issues;
|
|
249
248
|
}
|
|
250
249
|
/**
|
|
251
|
-
* Check that
|
|
252
|
-
*
|
|
253
|
-
*
|
|
254
|
-
*
|
|
250
|
+
* Check that task definition files exist.
|
|
251
|
+
*
|
|
252
|
+
* Tasks live as `*.task.ts` files in mode subdirectories (e.g.
|
|
253
|
+
* `tasks/literacy/groq.task.ts`). Legacy YAML task files are no longer
|
|
254
|
+
* used. Warns only if no task files are found at all.
|
|
255
255
|
*/
|
|
256
256
|
export function validateTaskFiles(rootDir) {
|
|
257
257
|
const source = "validateTaskFiles";
|
|
@@ -261,70 +261,9 @@ export function validateTaskFiles(rootDir) {
|
|
|
261
261
|
issues.push(warning(source, "tasks/ directory not found (using Content Lake tasks?)", tasksDir));
|
|
262
262
|
return issues;
|
|
263
263
|
}
|
|
264
|
-
const
|
|
265
|
-
|
|
266
|
-
.
|
|
267
|
-
if (yamlFiles.length === 0) {
|
|
268
|
-
issues.push(warning(source, "No task YAML files found in tasks/ (using Content Lake tasks?)", tasksDir));
|
|
269
|
-
return issues;
|
|
270
|
-
}
|
|
271
|
-
const allIds = new Map(); // id → source file
|
|
272
|
-
const templateKeys = loadTemplateKeys(rootDir);
|
|
273
|
-
for (const file of yamlFiles) {
|
|
274
|
-
const filePath = path.join(tasksDir, file);
|
|
275
|
-
// Step 1: Parse YAML
|
|
276
|
-
const result = parseYamlFile(filePath, source);
|
|
277
|
-
if (!result.ok) {
|
|
278
|
-
issues.push(result.issue);
|
|
279
|
-
continue;
|
|
280
|
-
}
|
|
281
|
-
const { data } = result;
|
|
282
|
-
if (!Array.isArray(data)) {
|
|
283
|
-
issues.push(error(source, `${file} did not parse to an array of tasks`, filePath));
|
|
284
|
-
continue;
|
|
285
|
-
}
|
|
286
|
-
// Step 2: Validate each entry with Zod schema
|
|
287
|
-
const zodResult = TaskFileSchema.safeParse(data);
|
|
288
|
-
if (!zodResult.success) {
|
|
289
|
-
const lines = formatZodErrors(zodResult.error);
|
|
290
|
-
for (const line of lines) {
|
|
291
|
-
issues.push(error(source, `${file}: ${line.trim()}`, filePath));
|
|
292
|
-
}
|
|
293
|
-
continue;
|
|
294
|
-
}
|
|
295
|
-
// Step 3: Cross-entry validation (duplicate IDs, docs path consistency)
|
|
296
|
-
for (const entry of zodResult.data) {
|
|
297
|
-
if ("id" in entry && typeof entry.id === "string") {
|
|
298
|
-
// Check for duplicate IDs across all files
|
|
299
|
-
if (allIds.has(entry.id)) {
|
|
300
|
-
issues.push(error(source, `${file}: duplicate id '${entry.id}' (also in ${allIds.get(entry.id)})`, filePath));
|
|
301
|
-
}
|
|
302
|
-
else {
|
|
303
|
-
allIds.set(entry.id, file);
|
|
304
|
-
}
|
|
305
|
-
// Check docs path matches task id
|
|
306
|
-
const vars = entry.vars;
|
|
307
|
-
if (vars.docs && typeof vars.docs === "string") {
|
|
308
|
-
const expectedPath = `file://contexts/canonical/${entry.id}.md`;
|
|
309
|
-
if (vars.docs !== expectedPath) {
|
|
310
|
-
issues.push(warning(source, `${file}: id is '${entry.id}' but docs path is '${vars.docs}' (expected '${expectedPath}')`, filePath));
|
|
311
|
-
}
|
|
312
|
-
}
|
|
313
|
-
// Check that llm-rubric template references exist in config/rubrics
|
|
314
|
-
const asserts = entry.assert;
|
|
315
|
-
if (Array.isArray(asserts) && templateKeys.size > 0) {
|
|
316
|
-
for (const a of asserts) {
|
|
317
|
-
const assertion = a;
|
|
318
|
-
if (assertion.type === "llm-rubric" &&
|
|
319
|
-
typeof assertion.template === "string") {
|
|
320
|
-
if (!templateKeys.has(assertion.template)) {
|
|
321
|
-
issues.push(error(source, `${file}: task '${entry.id}' references unknown rubric template '${assertion.template}' (available: ${[...templateKeys].join(", ")})`, filePath));
|
|
322
|
-
}
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
}
|
|
264
|
+
const taskAreas = collectTaskAreas(tasksDir);
|
|
265
|
+
if (taskAreas.size === 0) {
|
|
266
|
+
issues.push(warning(source, "No task files found in tasks/ (using Content Lake tasks?)", tasksDir));
|
|
328
267
|
}
|
|
329
268
|
return issues;
|
|
330
269
|
}
|
|
@@ -355,15 +294,10 @@ export function validateThresholdsYaml(rootDir) {
|
|
|
355
294
|
// Cross-reference: warn if an area override references an area with no task file
|
|
356
295
|
if (zodResult.data.areas) {
|
|
357
296
|
const tasksDir = path.join(rootDir, "tasks");
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
.
|
|
362
|
-
.map((f) => f.replace(/\.(yaml|yml|task\.ts|task\.js)$/, "")));
|
|
363
|
-
for (const areaName of Object.keys(zodResult.data.areas)) {
|
|
364
|
-
if (!taskFiles.has(areaName)) {
|
|
365
|
-
issues.push(warning(source, `config/thresholds: area override '${areaName}' has no matching tasks/${areaName}`, loaded.filePath));
|
|
366
|
-
}
|
|
297
|
+
const taskAreas = collectTaskAreas(tasksDir);
|
|
298
|
+
for (const areaName of Object.keys(zodResult.data.areas)) {
|
|
299
|
+
if (!taskAreas.has(areaName)) {
|
|
300
|
+
issues.push(warning(source, `config/thresholds: area override '${areaName}' has no matching task file`, loaded.filePath));
|
|
367
301
|
}
|
|
368
302
|
}
|
|
369
303
|
}
|
|
@@ -378,44 +312,31 @@ function error(source, message, filePath) {
|
|
|
378
312
|
};
|
|
379
313
|
}
|
|
380
314
|
/**
|
|
381
|
-
*
|
|
382
|
-
*
|
|
315
|
+
* Collect task area names from all subdirectories of `tasksDir`.
|
|
316
|
+
*
|
|
317
|
+
* Task files live in mode subdirectories (e.g. `tasks/literacy/groq.task.ts`).
|
|
318
|
+
* Returns a set of basenames without the `.task.ts`/`.task.js` extension.
|
|
383
319
|
*/
|
|
384
|
-
function
|
|
385
|
-
|
|
386
|
-
if (!loaded)
|
|
320
|
+
function collectTaskAreas(tasksDir) {
|
|
321
|
+
if (!fs.existsSync(tasksDir))
|
|
387
322
|
return new Set();
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
323
|
+
const areas = new Set();
|
|
324
|
+
const taskFilePattern = /\.task\.(ts|js)$/;
|
|
325
|
+
for (const entry of fs.readdirSync(tasksDir, { withFileTypes: true })) {
|
|
326
|
+
if (entry.isDirectory()) {
|
|
327
|
+
const subdir = path.join(tasksDir, entry.name);
|
|
328
|
+
for (const file of fs.readdirSync(subdir)) {
|
|
329
|
+
if (taskFilePattern.test(file)) {
|
|
330
|
+
areas.add(file.replace(taskFilePattern, ""));
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
// Also check top-level task files for backwards compatibility
|
|
335
|
+
if (entry.isFile() && taskFilePattern.test(entry.name)) {
|
|
336
|
+
areas.add(entry.name.replace(taskFilePattern, ""));
|
|
392
337
|
}
|
|
393
338
|
}
|
|
394
|
-
|
|
395
|
-
// Ignore — structural errors are caught by validateRubricsYaml
|
|
396
|
-
}
|
|
397
|
-
return new Set();
|
|
398
|
-
}
|
|
399
|
-
/** Safely parse a YAML file, returning the parsed value or a validation issue. */
|
|
400
|
-
function parseYamlFile(filePath, source) {
|
|
401
|
-
if (!fs.existsSync(filePath)) {
|
|
402
|
-
return {
|
|
403
|
-
issue: error(source, `File not found: ${filePath}`, filePath),
|
|
404
|
-
ok: false,
|
|
405
|
-
};
|
|
406
|
-
}
|
|
407
|
-
try {
|
|
408
|
-
const raw = fs.readFileSync(filePath, "utf-8");
|
|
409
|
-
const data = load(raw);
|
|
410
|
-
return { data, ok: true };
|
|
411
|
-
}
|
|
412
|
-
catch (err) {
|
|
413
|
-
const message = err instanceof Error ? err.message : "Unknown YAML parse error";
|
|
414
|
-
return {
|
|
415
|
-
issue: error(source, `Failed to parse YAML: ${message}`, filePath),
|
|
416
|
-
ok: false,
|
|
417
|
-
};
|
|
418
|
-
}
|
|
339
|
+
return areas;
|
|
419
340
|
}
|
|
420
341
|
// ---------------------------------------------------------------------------
|
|
421
342
|
// Main entry point
|
|
@@ -173,6 +173,10 @@ async function dispatchGitHubEval(repo, payload, config) {
|
|
|
173
173
|
projectId: payload.projectId,
|
|
174
174
|
publish: true,
|
|
175
175
|
source: "production",
|
|
176
|
+
// Studio-initiated evals always use Content Lake as the task source.
|
|
177
|
+
// Without this, the pipeline only loads filesystem .task.ts files and
|
|
178
|
+
// Studio-owned tasks are invisible.
|
|
179
|
+
taskMode: "content-lake",
|
|
176
180
|
// Release-scoped fields
|
|
177
181
|
...(hasPerspective ? { perspective: payload.perspective } : {}),
|
|
178
182
|
// Task-scoped fields
|