@sanity/ailf 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +0 -1
  2. package/config/models.ts +15 -3
  3. package/dist/_vendor/ailf-core/config-helpers.d.ts +14 -17
  4. package/dist/_vendor/ailf-core/config-helpers.js +22 -2
  5. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  6. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  7. package/dist/_vendor/ailf-core/index.d.ts +2 -2
  8. package/dist/_vendor/ailf-core/index.js +1 -1
  9. package/dist/_vendor/ailf-core/ports/context.d.ts +2 -0
  10. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  11. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  12. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
  13. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +2 -0
  14. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  15. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  16. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +13 -0
  17. package/dist/_vendor/ailf-core/types/index.d.ts +1 -3
  18. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +78 -23
  19. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  20. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  21. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  22. package/dist/adapters/task-sources/content-lake-task-source.js +17 -20
  23. package/dist/adapters/task-sources/index.d.ts +2 -2
  24. package/dist/adapters/task-sources/index.js +2 -2
  25. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  26. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  27. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  28. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  29. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  30. package/dist/adapters/task-sources/repo-validation.js +126 -5
  31. package/dist/adapters/task-sources/task-file-loader.d.ts +2 -2
  32. package/dist/adapters/task-sources/task-file-loader.js +2 -2
  33. package/dist/commands/coverage-audit.js +3 -1
  34. package/dist/commands/init.d.ts +6 -4
  35. package/dist/commands/init.js +302 -23
  36. package/dist/commands/validate-tasks.d.ts +2 -2
  37. package/dist/commands/validate-tasks.js +26 -15
  38. package/dist/composition-root.d.ts +13 -1
  39. package/dist/composition-root.js +73 -41
  40. package/dist/index.d.ts +41 -0
  41. package/dist/index.js +48 -0
  42. package/dist/orchestration/build-step-sequence.js +4 -2
  43. package/dist/orchestration/steps/fetch-docs-step.js +2 -3
  44. package/dist/orchestration/steps/generate-configs-step.js +28 -12
  45. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  46. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  47. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  48. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +105 -68
  49. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  50. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  51. package/dist/pipeline/compiler/literacy-bridge.js +1 -1
  52. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  53. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  54. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  55. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  56. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  57. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  58. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  59. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  60. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  61. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  62. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  63. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  64. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  65. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  66. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  67. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  68. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  69. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  70. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  71. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  72. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  73. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  74. package/dist/pipeline/compiler/mode-handlers/{agent-harness-handler.d.ts → agent-harness/types.d.ts} +3 -24
  75. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  76. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  77. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  78. package/dist/pipeline/compiler/mode-handlers/index.d.ts +4 -5
  79. package/dist/pipeline/compiler/mode-handlers/index.js +4 -6
  80. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  81. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  82. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  83. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  84. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  85. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  86. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  87. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  88. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  89. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  90. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  91. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  92. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  93. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  94. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  95. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  96. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  97. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  98. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  99. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  100. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  101. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  102. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  103. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  104. package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.d.ts → mcp-server/assertions.d.ts} +2 -10
  105. package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.js → mcp-server/assertions.js} +63 -6
  106. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  107. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  108. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  109. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  110. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  111. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  112. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  113. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  114. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  115. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  116. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  117. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  118. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  119. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  120. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  121. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  122. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  123. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  124. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  125. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  126. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  127. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  128. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  129. package/dist/pipeline/compiler/preset-loader.js +99 -0
  130. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +6 -9
  131. package/dist/pipeline/compiler/presets/sanity-literacy.js +10 -156
  132. package/dist/pipeline/expand-tasks.d.ts +2 -2
  133. package/dist/pipeline/expand-tasks.js +2 -2
  134. package/dist/pipeline/generate-configs.js +1 -1
  135. package/dist/pipeline/map-request-to-config.js +1 -0
  136. package/dist/pipeline/mirror-repo-tasks.d.ts +7 -7
  137. package/dist/pipeline/mirror-repo-tasks.js +9 -9
  138. package/dist/pipeline/plan.js +1 -1
  139. package/package.json +11 -3
  140. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  141. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  142. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  143. package/dist/_vendor/ailf-tasks/index.js +0 -16
  144. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  145. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  146. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  147. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  148. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  149. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  150. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  151. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  152. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  153. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  154. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  155. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -67
  156. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -309
@@ -1,12 +1,12 @@
1
1
  /**
2
2
  * Plugin registry — typed extension points for AILF evaluation capabilities.
3
3
  *
4
- * Twelve extension points: evaluation modes, providers, assertions,
5
- * rubric templates, fixture resolvers, report sinks, dashboard renderers,
6
- * prompt templates, scoring profiles, doc fetcher factory, source defs,
7
- * and feature defs.
8
- *
9
- * Presets bundle multiple extensions into a single installable unit.
4
+ * Three-tier architecture:
5
+ * - **Mode bases** define evaluation methodology (rubrics, scoring, prompts)
6
+ * - **Domain presets** target a mode base and add domain config (sources,
7
+ * features, doc fetcher)
8
+ * - **Framework assertions** are generic evaluation primitives available to
9
+ * all modes
10
10
  *
11
11
  * @see docs/design-docs/architecture-overhaul/extensibility-plugins.md
12
12
  */
@@ -19,6 +19,7 @@ export class InMemoryPluginRegistry {
19
19
  rubricTemplates_ = new Map();
20
20
  fixtureResolvers_ = new Map();
21
21
  reportSinks_ = new Map();
22
+ modeBases_ = new Map();
22
23
  presets_ = new Map();
23
24
  promptTemplates_ = {};
24
25
  scoringProfiles_ = {};
@@ -26,19 +27,56 @@ export class InMemoryPluginRegistry {
26
27
  sourceDefs_ = [];
27
28
  featureDefs_;
28
29
  registerPreset(preset) {
30
+ // Skip archived presets entirely
31
+ if (preset.status === "archived")
32
+ return;
33
+ // Store draft/paused presets in the map (for later activation via
34
+ // --preset flag) but skip all side-effect registrations. This prevents
35
+ // a draft preset from silently overwriting the doc fetcher, merging
36
+ // scoring profiles, etc.
29
37
  this.presets_.set(preset.name, preset);
30
- if (preset.modes) {
31
- for (const mode of preset.modes)
32
- this.registerMode(mode);
38
+ if (preset.status === "draft" || preset.status === "paused")
39
+ return;
40
+ // Resolve mode base defaults
41
+ const base = this.modeBases_.get(preset.mode);
42
+ if (!base) {
43
+ throw new Error(`Preset "${preset.name}" targets mode "${preset.mode}" ` +
44
+ `but no mode base is registered for it. ` +
45
+ `Available mode bases: ${[...this.modeBases_.keys()].join(", ") || "(none)"}`);
46
+ }
47
+ // Mode is already registered by registerModeBase() — no need to re-register.
48
+ // Merge rubric templates: base defaults + preset overrides (by ID)
49
+ const baseRubrics = new Map((base.rubricTemplates ?? []).map((r) => [r.id, r]));
50
+ for (const r of preset.rubricTemplates ?? []) {
51
+ baseRubrics.set(r.id, r);
52
+ }
53
+ for (const r of baseRubrics.values()) {
54
+ this.registerRubricTemplate(r);
55
+ }
56
+ // Merge scoring profiles: base defaults + preset overrides (by name)
57
+ const profiles = {
58
+ ...base.scoringProfiles,
59
+ ...preset.scoringProfiles,
60
+ };
61
+ if (Object.keys(profiles).length > 0) {
62
+ this.registerScoringProfiles(profiles);
33
63
  }
64
+ // Merge prompt templates: base defaults + preset overrides (by name)
65
+ const prompts = {
66
+ ...base.promptTemplates,
67
+ ...preset.promptTemplates,
68
+ };
69
+ if (Object.keys(prompts).length > 0) {
70
+ this.registerPromptTemplates(prompts);
71
+ }
72
+ // Merge assertions: base + preset (preset overrides by type)
73
+ for (const a of base.assertions ?? [])
74
+ this.registerAssertion(a);
34
75
  if (preset.assertions) {
35
76
  for (const a of preset.assertions)
36
77
  this.registerAssertion(a);
37
78
  }
38
- if (preset.rubricTemplates) {
39
- for (const t of preset.rubricTemplates)
40
- this.registerRubricTemplate(t);
41
- }
79
+ // Register domain-specific fields
42
80
  if (preset.fixtureResolvers) {
43
81
  for (const r of preset.fixtureResolvers)
44
82
  this.registerFixtureResolver(r);
@@ -47,12 +85,6 @@ export class InMemoryPluginRegistry {
47
85
  for (const s of preset.reportSinks)
48
86
  this.registerReportSink(s);
49
87
  }
50
- if (preset.promptTemplates) {
51
- this.registerPromptTemplates(preset.promptTemplates);
52
- }
53
- if (preset.scoringProfiles) {
54
- this.registerScoringProfiles(preset.scoringProfiles);
55
- }
56
88
  if (preset.docFetcher) {
57
89
  this.registerDocFetcherFactory(preset.docFetcher);
58
90
  }
@@ -124,9 +156,30 @@ export class InMemoryPluginRegistry {
124
156
  return this.sourceDefs_;
125
157
  }
126
158
  registerFeatureDefs(features) {
127
- this.featureDefs_ = features;
159
+ if (!this.featureDefs_) {
160
+ this.featureDefs_ = features;
161
+ return;
162
+ }
163
+ // Merge by feature ID: new features override existing on ID collision,
164
+ // existing features not in new set are preserved.
165
+ const merged = new Map(this.featureDefs_.features.map((f) => [f.id, f]));
166
+ for (const f of features.features) {
167
+ merged.set(f.id, f);
168
+ }
169
+ this.featureDefs_ = { features: [...merged.values()] };
128
170
  }
129
171
  getFeatureDefs() {
130
172
  return this.featureDefs_;
131
173
  }
174
+ registerModeBase(base) {
175
+ this.modeBases_.set(base.mode.id, base);
176
+ // Also register the mode itself so getMode() works
177
+ this.registerMode(base.mode);
178
+ }
179
+ getModeBase(modeId) {
180
+ return this.modeBases_.get(modeId);
181
+ }
182
+ getModeBases() {
183
+ return [...this.modeBases_.values()];
184
+ }
132
185
  }
@@ -121,5 +121,6 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
121
121
  concurrency: config.concurrency,
122
122
  remote: false,
123
123
  apiUrl: "https://ailf-api.sanity.build",
124
+ presets: config.presets,
124
125
  };
125
126
  }
@@ -13,21 +13,29 @@
13
13
  * @see docs/design-docs/architecture-overhaul/typescript-configuration.md
14
14
  */
15
15
  import { existsSync } from "fs";
16
+ import { pathToFileURL } from "node:url";
16
17
  import { createJiti } from "jiti";
17
18
  // ---------------------------------------------------------------------------
18
- // Singleton jiti instance — reused across all loads for caching
19
+ // jiti instance factory resolves imports relative to the loaded file
19
20
  // ---------------------------------------------------------------------------
20
- let _jiti = null;
21
- function getJiti() {
22
- if (!_jiti) {
23
- _jiti = createJiti(import.meta.url, {
24
- // Interop: handle both `export default` and `module.exports`
25
- interopDefault: true,
26
- // Don't require file extensions in imports
27
- requireCache: true,
28
- });
29
- }
30
- return _jiti;
21
+ /**
22
+ * Create a jiti instance that resolves bare-specifier imports relative to
23
+ * the given file path, not relative to this loader module.
24
+ *
25
+ * This is critical for pnpm workspaces: a task file at `.ailf/tasks/foo.task.ts`
26
+ * importing `@sanity/ailf` must resolve through the dependency graph
27
+ * visible from the task file's directory, not from deep inside packages/eval/.
28
+ *
29
+ * We pass a `file://` URL (not a bare path) so jiti uses ESM resolution,
30
+ * which matches the `"import"` condition in package.json exports maps.
31
+ */
32
+ function createJitiForFile(filePath) {
33
+ return createJiti(pathToFileURL(filePath).href, {
34
+ // Interop: handle both `export default` and `module.exports`
35
+ interopDefault: true,
36
+ // Don't require file extensions in imports
37
+ requireCache: true,
38
+ });
31
39
  }
32
40
  /**
33
41
  * Load a TypeScript or JavaScript config file and return its default export.
@@ -43,7 +51,7 @@ export async function loadTsConfig(filePath) {
43
51
  return { ok: false, error: `File not found: ${filePath}`, path: filePath };
44
52
  }
45
53
  try {
46
- const jiti = getJiti();
54
+ const jiti = createJitiForFile(filePath);
47
55
  const mod = await jiti.import(filePath);
48
56
  const value = extractDefault(mod);
49
57
  if (value === undefined || value === null) {
@@ -20,8 +20,8 @@
20
20
  * mapping to LiteracyTaskDefinition.
21
21
  *
22
22
  * Key projections:
23
- * - featureArea reference → dereferenced areaId string
24
- * - canonicalDocs[] → dereferenced article slugs with reason
23
+ * - area reference → dereferenced areaId string
24
+ * - contextDocs[] → dereferenced article slugs with reason
25
25
  * - referenceSolution → title (for identification, not full content)
26
26
  *
27
27
  * Filter parameters:
@@ -30,7 +30,7 @@
30
30
  */
31
31
  const TASKS_QUERY = /* groq */ `
32
32
  *[_type == "ailf.task"
33
- && (!defined($areas) || featureArea->areaId.current in $areas)
33
+ && (!defined($areas) || area->areaId.current in $areas)
34
34
  && (!defined($taskIds) || id.current in $taskIds)
35
35
  && (
36
36
  // Status-based filtering (unified — replaces execution.enabled)
@@ -41,13 +41,13 @@ const TASKS_QUERY = /* groq */ `
41
41
  || (defined($taskIds) && status != "archived")
42
42
  )
43
43
  && (!defined($tags) || count((tags)[@ in $tags]) > 0)
44
- ] | order(featureArea->areaId.current asc, id.current asc) {
44
+ ] | order(area->areaId.current asc, id.current asc) {
45
45
  "taskId": id.current,
46
- description,
47
- "featureAreaId": featureArea->areaId.current,
48
- taskPrompt,
46
+ title,
47
+ "areaId": area->areaId.current,
48
+ promptText,
49
49
  docCoverage,
50
- "canonicalDocs": canonicalDocs[] {
50
+ "contextDocs": contextDocs[] {
51
51
  refType,
52
52
  "slug": doc->slug.current,
53
53
  "docRefId": doc->_id,
@@ -57,7 +57,7 @@ const TASKS_QUERY = /* groq */ `
57
57
  perspective,
58
58
  reason
59
59
  },
60
- assert,
60
+ assertions,
61
61
  rawAssert,
62
62
  baseline,
63
63
  tags,
@@ -112,22 +112,19 @@ function buildGroqParams(filter) {
112
112
  * Map a Content Lake ailf.task document directly to a LiteracyTaskDefinition.
113
113
  *
114
114
  * Returns null if the document is missing required fields (taskId,
115
- * description, featureAreaId, taskPrompt). These are required by the
115
+ * title, areaId, promptText). These are required by the
116
116
  * Studio schema, but defensive coding handles edge cases (drafts,
117
117
  * partially-created documents, etc.).
118
118
  */
119
119
  function mapToLiteracyTask(raw) {
120
120
  // Required fields — skip malformed documents
121
- if (!raw.taskId ||
122
- !raw.description ||
123
- !raw.featureAreaId ||
124
- !raw.taskPrompt) {
121
+ if (!raw.taskId || !raw.title || !raw.areaId || !raw.promptText) {
125
122
  return null;
126
123
  }
127
- const docs = (raw.canonicalDocs ?? [])
124
+ const docs = (raw.contextDocs ?? [])
128
125
  .map(mapCanonicalDocRef)
129
126
  .filter((d) => d !== null);
130
- const assertions = mapAssertions(raw.assert ?? []);
127
+ const assertions = mapAssertions(raw.assertions ?? []);
131
128
  // Append raw pass-through assertions (escape hatch for arbitrary Promptfoo
132
129
  // assertion types that aren't in the curated list). These bypass template
133
130
  // resolution and flow directly into the expanded Promptfoo test case as
@@ -158,9 +155,9 @@ function mapToLiteracyTask(raw) {
158
155
  return {
159
156
  mode: "literacy",
160
157
  id: raw.taskId,
161
- title: raw.description,
162
- area: raw.featureAreaId,
163
- prompt: { text: raw.taskPrompt },
158
+ title: raw.title,
159
+ area: raw.areaId,
160
+ prompt: { text: raw.promptText },
164
161
  context: { docs },
165
162
  assertions: allAssertions,
166
163
  docCoverage: raw.docCoverage ?? false,
@@ -172,7 +169,7 @@ function mapToLiteracyTask(raw) {
172
169
  };
173
170
  }
174
171
  /**
175
- * Map a Content Lake canonical doc entry to the polymorphic CanonicalDocRef.
172
+ * Map a Content Lake context doc entry to the polymorphic CanonicalDocRef.
176
173
  *
177
174
  * Uses `refType` to determine which value field to read. Falls back to
178
175
  * slug-based resolution for backward compatibility (documents created
@@ -1,8 +1,8 @@
1
1
  export { CompositeTaskSource } from "./composite-task-source.js";
2
2
  export { ContentLakeTaskSource } from "./content-lake-task-source.js";
3
- export { parseRepoConfig, parseRepoTaskFile, RepoConfigSchema, RepoTaskSchema, CURATED_ASSERTION_TYPES, RUBRIC_TEMPLATE_NAMES, type RepoConfig, type RepoTask, } from "./repo-schemas.js";
3
+ export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type CuratedAssertionType, type RepoConfig, type RubricTemplateName, } from "./repo-schemas.js";
4
4
  export { RepoTaskSource } from "./repo-task-source.js";
5
5
  export { detectTriggerContext, resolveTrigger, type ResolvedTrigger, type TriggerContext, } from "./repo-trigger.js";
6
- export { formatValidationResult, validateRepoTasks, type ValidationMessage, type ValidationResult, } from "./repo-validation.js";
6
+ export { formatValidationResult, validateCanonicalTasks, type ValidationMessage, type ValidationResult, } from "./repo-validation.js";
7
7
  export { discoverTsTaskFiles, loadAllTsTaskFiles, loadTsTaskFile, } from "./task-file-loader.js";
8
8
  export { YamlTaskSource } from "./yaml-task-source.js";
@@ -1,8 +1,8 @@
1
1
  export { CompositeTaskSource } from "./composite-task-source.js";
2
2
  export { ContentLakeTaskSource } from "./content-lake-task-source.js";
3
- export { parseRepoConfig, parseRepoTaskFile, RepoConfigSchema, RepoTaskSchema, CURATED_ASSERTION_TYPES, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
3
+ export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
4
4
  export { RepoTaskSource } from "./repo-task-source.js";
5
5
  export { detectTriggerContext, resolveTrigger, } from "./repo-trigger.js";
6
- export { formatValidationResult, validateRepoTasks, } from "./repo-validation.js";
6
+ export { formatValidationResult, validateCanonicalTasks, } from "./repo-validation.js";
7
7
  export { discoverTsTaskFiles, loadAllTsTaskFiles, loadTsTaskFile, } from "./task-file-loader.js";
8
8
  export { YamlTaskSource } from "./yaml-task-source.js";
@@ -1,29 +1,231 @@
1
1
  /**
2
- * repo-schemas.ts — Re-exports task schemas + defines config schemas.
2
+ * repo-schemas.ts — Canonical Zod schemas for task and config validation.
3
3
  *
4
- * Task schemas (RepoTaskSchema, assertions, etc.) are the single source
5
- * of truth in @sanity/ailf-tasks. This file re-exports them so existing
6
- * importers within the eval package don't need to change their paths.
4
+ * Task schemas validate .ailf/tasks/*.yaml and .task.ts files against the
5
+ * canonical GeneralizedTaskDefinition shape. Field names match the internal
6
+ * domain model: `area` (not featureArea), `assertions` (not assert),
7
+ * `context.docs` (not canonicalDocs), `prompt.text` (not vars.task).
7
8
  *
8
- * Config schemas (RepoConfigSchema, trigger config, etc.) remain here
9
- * because they are specific to the eval pipeline and not needed by
10
- * external tools that only validate task YAML.
9
+ * Previously this file re-exported from @sanity/ailf-tasks. That package
10
+ * has been eliminated all schema logic now lives here.
11
11
  *
12
- * @see packages/tasks/src/schemas.ts task schema source of truth
12
+ * Config schemas (RepoConfigSchema, trigger config) are eval-pipeline-
13
+ * specific and remain here unchanged.
14
+ *
15
+ * @see packages/core/src/types/generalized-task.ts — canonical TypeScript types
13
16
  * @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
14
17
  */
15
- import { RepoTaskFileSchema as _Schema } from "../../_vendor/ailf-tasks/index.d.ts";
16
18
  import { z } from "zod";
17
- export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, type CuratedAssertionType, type RepoTask, type RubricTemplateName, } from "../../_vendor/ailf-tasks/index.d.ts";
18
- export { loadTaskDir, parseTaskFile } from "../../_vendor/ailf-tasks/index.d.ts";
19
19
  /**
20
- * Parse and validate a repo task file's content. Returns typed tasks or throws
21
- * with a user-friendly Zod error message.
20
+ * The set of assertion types allowed in task files.
21
+ *
22
+ * This is a curated subset of Promptfoo assertion types — we expose only the
23
+ * types that are stable, well-documented, and useful for external authors.
24
+ */
25
+ export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency"];
26
+ export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
27
+ /**
28
+ * Valid rubric template names — must match keys in config/rubrics.yaml.
29
+ */
30
+ export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage"];
31
+ export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
32
+ /**
33
+ * Zod schema for a single task definition using canonical field names.
34
+ *
35
+ * Uses .passthrough() to allow mode-specific fields (serverConfig, sandbox,
36
+ * handler, etc.) without listing every possible field. Mode-specific
37
+ * validation is deferred to the pipeline's mode handlers.
38
+ */
39
+ export declare const CanonicalTaskSchema: z.ZodObject<{
40
+ id: z.ZodString;
41
+ mode: z.ZodDefault<z.ZodString>;
42
+ title: z.ZodString;
43
+ description: z.ZodOptional<z.ZodString>;
44
+ area: z.ZodOptional<z.ZodString>;
45
+ difficulty: z.ZodOptional<z.ZodEnum<{
46
+ basic: "basic";
47
+ intermediate: "intermediate";
48
+ advanced: "advanced";
49
+ }>>;
50
+ status: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
51
+ active: "active";
52
+ draft: "draft";
53
+ paused: "paused";
54
+ archived: "archived";
55
+ }>>>;
56
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
57
+ prompt: z.ZodOptional<z.ZodObject<{
58
+ template: z.ZodOptional<z.ZodString>;
59
+ text: z.ZodOptional<z.ZodString>;
60
+ systemMessage: z.ZodOptional<z.ZodString>;
61
+ vars: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
62
+ }, z.core.$strip>>;
63
+ context: z.ZodOptional<z.ZodObject<{
64
+ docs: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
65
+ id: z.ZodString;
66
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
67
+ slug: z.ZodOptional<z.ZodString>;
68
+ path: z.ZodOptional<z.ZodString>;
69
+ }, z.core.$strip>, z.ZodObject<{
70
+ slug: z.ZodString;
71
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
72
+ }, z.core.$strip>, z.ZodObject<{
73
+ path: z.ZodString;
74
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
75
+ }, z.core.$strip>, z.ZodObject<{
76
+ perspective: z.ZodString;
77
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
78
+ }, z.core.$strip>]>>>;
79
+ fixtures: z.ZodOptional<z.ZodArray<z.ZodString>>;
80
+ }, z.core.$strip>>;
81
+ assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
82
+ type: z.ZodLiteral<"llm-rubric">;
83
+ template: z.ZodEnum<{
84
+ "task-completion": "task-completion";
85
+ "code-correctness": "code-correctness";
86
+ "doc-coverage": "doc-coverage";
87
+ }>;
88
+ criteria: z.ZodArray<z.ZodString>;
89
+ weight: z.ZodOptional<z.ZodNumber>;
90
+ }, z.core.$strip>, z.ZodObject<{
91
+ type: z.ZodEnum<{
92
+ "llm-rubric": "llm-rubric";
93
+ contains: "contains";
94
+ "contains-any": "contains-any";
95
+ "contains-all": "contains-all";
96
+ "not-contains": "not-contains";
97
+ icontains: "icontains";
98
+ "icontains-any": "icontains-any";
99
+ regex: "regex";
100
+ javascript: "javascript";
101
+ similar: "similar";
102
+ cost: "cost";
103
+ latency: "latency";
104
+ }>;
105
+ value: z.ZodOptional<z.ZodUnknown>;
106
+ threshold: z.ZodOptional<z.ZodNumber>;
107
+ weight: z.ZodOptional<z.ZodNumber>;
108
+ }, z.core.$loose>]>>>;
109
+ referenceSolution: z.ZodOptional<z.ZodString>;
110
+ docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
111
+ baseline: z.ZodOptional<z.ZodObject<{
112
+ enabled: z.ZodOptional<z.ZodBoolean>;
113
+ rubric: z.ZodOptional<z.ZodEnum<{
114
+ full: "full";
115
+ abbreviated: "abbreviated";
116
+ none: "none";
117
+ }>>;
118
+ }, z.core.$strip>>;
119
+ rubric: z.ZodOptional<z.ZodUnknown>;
120
+ providers: z.ZodOptional<z.ZodArray<z.ZodUnknown>>;
121
+ options: z.ZodOptional<z.ZodUnknown>;
122
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
123
+ }, z.core.$loose>;
124
+ export type CanonicalTask = z.infer<typeof CanonicalTaskSchema>;
125
+ /**
126
+ * Schema for an array of canonical tasks — what a single .ailf/tasks/*.yaml
127
+ * file contains. Each file must define at least one task.
128
+ */
129
+ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
130
+ id: z.ZodString;
131
+ mode: z.ZodDefault<z.ZodString>;
132
+ title: z.ZodString;
133
+ description: z.ZodOptional<z.ZodString>;
134
+ area: z.ZodOptional<z.ZodString>;
135
+ difficulty: z.ZodOptional<z.ZodEnum<{
136
+ basic: "basic";
137
+ intermediate: "intermediate";
138
+ advanced: "advanced";
139
+ }>>;
140
+ status: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
141
+ active: "active";
142
+ draft: "draft";
143
+ paused: "paused";
144
+ archived: "archived";
145
+ }>>>;
146
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
147
+ prompt: z.ZodOptional<z.ZodObject<{
148
+ template: z.ZodOptional<z.ZodString>;
149
+ text: z.ZodOptional<z.ZodString>;
150
+ systemMessage: z.ZodOptional<z.ZodString>;
151
+ vars: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
152
+ }, z.core.$strip>>;
153
+ context: z.ZodOptional<z.ZodObject<{
154
+ docs: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
155
+ id: z.ZodString;
156
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
157
+ slug: z.ZodOptional<z.ZodString>;
158
+ path: z.ZodOptional<z.ZodString>;
159
+ }, z.core.$strip>, z.ZodObject<{
160
+ slug: z.ZodString;
161
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
162
+ }, z.core.$strip>, z.ZodObject<{
163
+ path: z.ZodString;
164
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
165
+ }, z.core.$strip>, z.ZodObject<{
166
+ perspective: z.ZodString;
167
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
168
+ }, z.core.$strip>]>>>;
169
+ fixtures: z.ZodOptional<z.ZodArray<z.ZodString>>;
170
+ }, z.core.$strip>>;
171
+ assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
172
+ type: z.ZodLiteral<"llm-rubric">;
173
+ template: z.ZodEnum<{
174
+ "task-completion": "task-completion";
175
+ "code-correctness": "code-correctness";
176
+ "doc-coverage": "doc-coverage";
177
+ }>;
178
+ criteria: z.ZodArray<z.ZodString>;
179
+ weight: z.ZodOptional<z.ZodNumber>;
180
+ }, z.core.$strip>, z.ZodObject<{
181
+ type: z.ZodEnum<{
182
+ "llm-rubric": "llm-rubric";
183
+ contains: "contains";
184
+ "contains-any": "contains-any";
185
+ "contains-all": "contains-all";
186
+ "not-contains": "not-contains";
187
+ icontains: "icontains";
188
+ "icontains-any": "icontains-any";
189
+ regex: "regex";
190
+ javascript: "javascript";
191
+ similar: "similar";
192
+ cost: "cost";
193
+ latency: "latency";
194
+ }>;
195
+ value: z.ZodOptional<z.ZodUnknown>;
196
+ threshold: z.ZodOptional<z.ZodNumber>;
197
+ weight: z.ZodOptional<z.ZodNumber>;
198
+ }, z.core.$loose>]>>>;
199
+ referenceSolution: z.ZodOptional<z.ZodString>;
200
+ docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
201
+ baseline: z.ZodOptional<z.ZodObject<{
202
+ enabled: z.ZodOptional<z.ZodBoolean>;
203
+ rubric: z.ZodOptional<z.ZodEnum<{
204
+ full: "full";
205
+ abbreviated: "abbreviated";
206
+ none: "none";
207
+ }>>;
208
+ }, z.core.$strip>>;
209
+ rubric: z.ZodOptional<z.ZodUnknown>;
210
+ providers: z.ZodOptional<z.ZodArray<z.ZodUnknown>>;
211
+ options: z.ZodOptional<z.ZodUnknown>;
212
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
213
+ }, z.core.$loose>>;
214
+ /**
215
+ * Parse and validate a task file's content against the canonical schema.
216
+ * Returns typed tasks or throws with a user-friendly Zod error message.
217
+ *
218
+ * Accepts pre-parsed YAML data (unknown), not a raw string.
219
+ */
220
+ export declare function parseCanonicalTaskFile(raw: unknown, filename: string): CanonicalTask[];
221
+ /**
222
+ * Detect legacy field names in raw task data and return helpful messages.
22
223
  *
23
- * NOTE: This accepts pre-parsed YAML data (unknown), not a raw string.
24
- * For raw YAML strings, use `parseTaskFile()` from @sanity/ailf-tasks.
224
+ * Runs BEFORE Zod parsing to catch the most common migration mistake —
225
+ * using old field names from @sanity/ailf-tasks instead of the canonical
226
+ * GeneralizedTaskDefinition shape.
25
227
  */
26
- export declare function parseRepoTaskFile(raw: unknown, filename: string): z.infer<typeof _Schema>;
228
+ export declare function detectLegacyFieldNames(raw: unknown, filename: string): string[];
27
229
  /**
28
230
  * Zod schema for .ailf/config.yaml — controls documentation source,
29
231
  * report destination, and trigger behavior for evaluations from an