@sanity/ailf 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/config/rubrics.ts +3 -3
  2. package/dist/_vendor/ailf-core/types/index.d.ts +25 -0
  3. package/dist/adapters/task-sources/content-lake-task-source.js +15 -7
  4. package/dist/commands/calculate-scores.js +7 -2
  5. package/dist/commands/capture-list.d.ts +1 -1
  6. package/dist/commands/capture-list.js +6 -3
  7. package/dist/commands/compare.js +11 -7
  8. package/dist/commands/explain-handler.js +22 -24
  9. package/dist/commands/fetch-docs.js +4 -2
  10. package/dist/commands/generate-configs.js +6 -2
  11. package/dist/commands/pipeline-action.js +8 -24
  12. package/dist/commands/pipeline.js +1 -1
  13. package/dist/commands/pr-comment.js +6 -2
  14. package/dist/commands/publish.d.ts +1 -0
  15. package/dist/commands/publish.js +12 -8
  16. package/dist/commands/remote-pipeline.js +1 -1
  17. package/dist/commands/remote-results.d.ts +8 -8
  18. package/dist/commands/remote-results.js +7 -7
  19. package/dist/commands/shared/options.d.ts +8 -0
  20. package/dist/commands/shared/options.js +10 -0
  21. package/dist/commands/shared/resolve-output-dir.d.ts +27 -0
  22. package/dist/commands/shared/resolve-output-dir.js +36 -0
  23. package/dist/composition-root.js +1 -1
  24. package/dist/config/rubrics.ts +3 -3
  25. package/dist/orchestration/build-app-context.js +1 -1
  26. package/dist/orchestration/steps/fetch-docs-step.js +23 -9
  27. package/dist/orchestration/steps/gap-analysis-step.js +86 -75
  28. package/dist/orchestration/steps/generate-configs-step.d.ts +15 -0
  29. package/dist/orchestration/steps/generate-configs-step.js +56 -0
  30. package/dist/orchestration/steps/run-eval-step.js +14 -0
  31. package/dist/pipeline/calculate-scores.js +113 -2
  32. package/dist/pipeline/compare.js +50 -19
  33. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +64 -0
  34. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +6 -0
  35. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +14 -0
  36. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -0
  37. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +3 -0
  38. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +1 -27
  39. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +2 -9
  40. package/dist/pipeline/compiler/rubric-resolution.d.ts +40 -0
  41. package/dist/pipeline/compiler/rubric-resolution.js +52 -0
  42. package/dist/pipeline/compiler/scoring-bridge.js +59 -7
  43. package/dist/pipeline/provenance.js +7 -1
  44. package/dist/pipeline/validate.d.ts +5 -4
  45. package/dist/pipeline/validate.js +34 -113
  46. package/dist/webhook/eval-request-handler.js +4 -0
  47. package/package.json +1 -1
@@ -51,10 +51,11 @@ export declare function validateReferenceSolutions(rootDir: string): ValidationI
51
51
  */
52
52
  export declare function validateRubricsYaml(rootDir: string): ValidationIssue[];
53
53
  /**
54
- * Check that tasks/*.yaml files exist, parse, and conform to the Zod schema.
55
- * Validates both the new single-definition format (with `id`) and the legacy
56
- * paired format. Uses `TaskFileSchema` from schemas.ts for structural
57
- * validation, plus cross-entry checks (duplicate IDs, docs path consistency).
54
+ * Check that task definition files exist.
55
+ *
56
+ * Tasks live as `*.task.ts` files in mode subdirectories (e.g.
57
+ * `tasks/literacy/groq.task.ts`). Legacy YAML task files are no longer
58
+ * used. Warns only if no task files are found at all.
58
59
  */
59
60
  export declare function validateTaskFiles(rootDir: string): ValidationIssue[];
60
61
  /**
@@ -9,10 +9,9 @@
9
9
  */
10
10
  import fs from "fs";
11
11
  import path from "path";
12
- import { load } from "js-yaml";
13
12
  import { tryLoadConfigFile } from "./compiler/config-loader.js";
14
13
  import { resolveMappings } from "./resolve-mappings.js";
15
- import { FeatureRegistrySchema, formatZodErrors, RubricConfigSchema, TaskFileSchema, ThresholdConfigSchema, } from "./schemas.js";
14
+ import { FeatureRegistrySchema, formatZodErrors, RubricConfigSchema, ThresholdConfigSchema, } from "./schemas.js";
16
15
  // ---------------------------------------------------------------------------
17
16
  // Helpers
18
17
  // ---------------------------------------------------------------------------
@@ -248,10 +247,11 @@ export function validateRubricsYaml(rootDir) {
248
247
  return issues;
249
248
  }
250
249
  /**
251
- * Check that tasks/*.yaml files exist, parse, and conform to the Zod schema.
252
- * Validates both the new single-definition format (with `id`) and the legacy
253
- * paired format. Uses `TaskFileSchema` from schemas.ts for structural
254
- * validation, plus cross-entry checks (duplicate IDs, docs path consistency).
250
+ * Check that task definition files exist.
251
+ *
252
+ * Tasks live as `*.task.ts` files in mode subdirectories (e.g.
253
+ * `tasks/literacy/groq.task.ts`). Legacy YAML task files are no longer
254
+ * used. Warns only if no task files are found at all.
255
255
  */
256
256
  export function validateTaskFiles(rootDir) {
257
257
  const source = "validateTaskFiles";
@@ -261,70 +261,9 @@ export function validateTaskFiles(rootDir) {
261
261
  issues.push(warning(source, "tasks/ directory not found (using Content Lake tasks?)", tasksDir));
262
262
  return issues;
263
263
  }
264
- const yamlFiles = fs
265
- .readdirSync(tasksDir)
266
- .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
267
- if (yamlFiles.length === 0) {
268
- issues.push(warning(source, "No task YAML files found in tasks/ (using Content Lake tasks?)", tasksDir));
269
- return issues;
270
- }
271
- const allIds = new Map(); // id → source file
272
- const templateKeys = loadTemplateKeys(rootDir);
273
- for (const file of yamlFiles) {
274
- const filePath = path.join(tasksDir, file);
275
- // Step 1: Parse YAML
276
- const result = parseYamlFile(filePath, source);
277
- if (!result.ok) {
278
- issues.push(result.issue);
279
- continue;
280
- }
281
- const { data } = result;
282
- if (!Array.isArray(data)) {
283
- issues.push(error(source, `${file} did not parse to an array of tasks`, filePath));
284
- continue;
285
- }
286
- // Step 2: Validate each entry with Zod schema
287
- const zodResult = TaskFileSchema.safeParse(data);
288
- if (!zodResult.success) {
289
- const lines = formatZodErrors(zodResult.error);
290
- for (const line of lines) {
291
- issues.push(error(source, `${file}: ${line.trim()}`, filePath));
292
- }
293
- continue;
294
- }
295
- // Step 3: Cross-entry validation (duplicate IDs, docs path consistency)
296
- for (const entry of zodResult.data) {
297
- if ("id" in entry && typeof entry.id === "string") {
298
- // Check for duplicate IDs across all files
299
- if (allIds.has(entry.id)) {
300
- issues.push(error(source, `${file}: duplicate id '${entry.id}' (also in ${allIds.get(entry.id)})`, filePath));
301
- }
302
- else {
303
- allIds.set(entry.id, file);
304
- }
305
- // Check docs path matches task id
306
- const vars = entry.vars;
307
- if (vars.docs && typeof vars.docs === "string") {
308
- const expectedPath = `file://contexts/canonical/${entry.id}.md`;
309
- if (vars.docs !== expectedPath) {
310
- issues.push(warning(source, `${file}: id is '${entry.id}' but docs path is '${vars.docs}' (expected '${expectedPath}')`, filePath));
311
- }
312
- }
313
- // Check that llm-rubric template references exist in config/rubrics
314
- const asserts = entry.assert;
315
- if (Array.isArray(asserts) && templateKeys.size > 0) {
316
- for (const a of asserts) {
317
- const assertion = a;
318
- if (assertion.type === "llm-rubric" &&
319
- typeof assertion.template === "string") {
320
- if (!templateKeys.has(assertion.template)) {
321
- issues.push(error(source, `${file}: task '${entry.id}' references unknown rubric template '${assertion.template}' (available: ${[...templateKeys].join(", ")})`, filePath));
322
- }
323
- }
324
- }
325
- }
326
- }
327
- }
264
+ const taskAreas = collectTaskAreas(tasksDir);
265
+ if (taskAreas.size === 0) {
266
+ issues.push(warning(source, "No task files found in tasks/ (using Content Lake tasks?)", tasksDir));
328
267
  }
329
268
  return issues;
330
269
  }
@@ -355,15 +294,10 @@ export function validateThresholdsYaml(rootDir) {
355
294
  // Cross-reference: warn if an area override references an area with no task file
356
295
  if (zodResult.data.areas) {
357
296
  const tasksDir = path.join(rootDir, "tasks");
358
- if (fs.existsSync(tasksDir)) {
359
- const taskFiles = new Set(fs
360
- .readdirSync(tasksDir)
361
- .filter((f) => /\.(yaml|yml|task\.ts|task\.js)$/.test(f))
362
- .map((f) => f.replace(/\.(yaml|yml|task\.ts|task\.js)$/, "")));
363
- for (const areaName of Object.keys(zodResult.data.areas)) {
364
- if (!taskFiles.has(areaName)) {
365
- issues.push(warning(source, `config/thresholds: area override '${areaName}' has no matching tasks/${areaName}`, loaded.filePath));
366
- }
297
+ const taskAreas = collectTaskAreas(tasksDir);
298
+ for (const areaName of Object.keys(zodResult.data.areas)) {
299
+ if (!taskAreas.has(areaName)) {
300
+ issues.push(warning(source, `config/thresholds: area override '${areaName}' has no matching task file`, loaded.filePath));
367
301
  }
368
302
  }
369
303
  }
@@ -378,44 +312,31 @@ function error(source, message, filePath) {
378
312
  };
379
313
  }
380
314
  /**
381
- * Load the set of valid rubric template keys from config/rubrics.
382
- * Returns an empty set if the file is missing or invalid.
315
+ * Collect task area names from all subdirectories of `tasksDir`.
316
+ *
317
+ * Task files live in mode subdirectories (e.g. `tasks/literacy/groq.task.ts`).
318
+ * Returns a set of basenames without the `.task.ts`/`.task.js` extension.
383
319
  */
384
- function loadTemplateKeys(rootDir) {
385
- const loaded = tryLoadConfigFile("rubrics", rootDir);
386
- if (!loaded)
320
+ function collectTaskAreas(tasksDir) {
321
+ if (!fs.existsSync(tasksDir))
387
322
  return new Set();
388
- try {
389
- const templates = loaded.data?.templates;
390
- if (templates && typeof templates === "object") {
391
- return new Set(Object.keys(templates));
323
+ const areas = new Set();
324
+ const taskFilePattern = /\.task\.(ts|js)$/;
325
+ for (const entry of fs.readdirSync(tasksDir, { withFileTypes: true })) {
326
+ if (entry.isDirectory()) {
327
+ const subdir = path.join(tasksDir, entry.name);
328
+ for (const file of fs.readdirSync(subdir)) {
329
+ if (taskFilePattern.test(file)) {
330
+ areas.add(file.replace(taskFilePattern, ""));
331
+ }
332
+ }
333
+ }
334
+ // Also check top-level task files for backwards compatibility
335
+ if (entry.isFile() && taskFilePattern.test(entry.name)) {
336
+ areas.add(entry.name.replace(taskFilePattern, ""));
392
337
  }
393
338
  }
394
- catch {
395
- // Ignore — structural errors are caught by validateRubricsYaml
396
- }
397
- return new Set();
398
- }
399
- /** Safely parse a YAML file, returning the parsed value or a validation issue. */
400
- function parseYamlFile(filePath, source) {
401
- if (!fs.existsSync(filePath)) {
402
- return {
403
- issue: error(source, `File not found: ${filePath}`, filePath),
404
- ok: false,
405
- };
406
- }
407
- try {
408
- const raw = fs.readFileSync(filePath, "utf-8");
409
- const data = load(raw);
410
- return { data, ok: true };
411
- }
412
- catch (err) {
413
- const message = err instanceof Error ? err.message : "Unknown YAML parse error";
414
- return {
415
- issue: error(source, `Failed to parse YAML: ${message}`, filePath),
416
- ok: false,
417
- };
418
- }
339
+ return areas;
419
340
  }
420
341
  // ---------------------------------------------------------------------------
421
342
  // Main entry point
@@ -173,6 +173,10 @@ async function dispatchGitHubEval(repo, payload, config) {
173
173
  projectId: payload.projectId,
174
174
  publish: true,
175
175
  source: "production",
176
+ // Studio-initiated evals always use Content Lake as the task source.
177
+ // Without this, the pipeline only loads filesystem .task.ts files and
178
+ // Studio-owned tasks are invisible.
179
+ taskMode: "content-lake",
176
180
  // Release-scoped fields
177
181
  ...(hasPerspective ? { perspective: payload.perspective } : {}),
178
182
  // Task-scoped fields
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "2.2.0",
3
+ "version": "2.3.1",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"