@sanity/ailf 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (499) hide show
  1. package/README.md +0 -1
  2. package/canonical/grader-references/README.md +2 -2
  3. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  4. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  5. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  6. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  7. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  8. package/config/features.ts +1 -1
  9. package/config/models.ts +29 -12
  10. package/config/sources.ts +1 -1
  11. package/config/thresholds.ts +1 -1
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  13. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  17. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  18. package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
  19. package/dist/_vendor/ailf-core/config-helpers.js +51 -2
  20. package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
  21. package/dist/_vendor/ailf-core/examples/index.js +213 -94
  22. package/dist/_vendor/ailf-core/index.d.ts +3 -2
  23. package/dist/_vendor/ailf-core/index.js +2 -1
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  25. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  27. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  28. package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
  29. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  30. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  31. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  32. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  33. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  34. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  35. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  40. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  41. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  42. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  43. package/dist/_vendor/ailf-core/services/index.js +1 -1
  44. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  47. package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
  48. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
  49. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  50. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  51. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  52. package/dist/adapters/api-client/remediation.js +2 -2
  53. package/dist/adapters/config-sources/file-config-adapter.js +7 -1
  54. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  55. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  56. package/dist/adapters/index.d.ts +0 -1
  57. package/dist/adapters/index.js +0 -1
  58. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  59. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  60. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  61. package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
  62. package/dist/adapters/task-sources/index.d.ts +3 -4
  63. package/dist/adapters/task-sources/index.js +3 -4
  64. package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
  65. package/dist/adapters/task-sources/repo-schemas.js +228 -20
  66. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  67. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  68. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  69. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  70. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  71. package/dist/adapters/task-sources/repo-validation.js +126 -5
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
  73. package/dist/adapters/task-sources/task-file-loader.js +21 -7
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/coverage-audit.js +3 -1
  95. package/dist/commands/explain-handler.d.ts +1 -1
  96. package/dist/commands/explain-handler.js +37 -8
  97. package/dist/commands/fetch-docs.js +1 -0
  98. package/dist/commands/generate-configs.d.ts +3 -3
  99. package/dist/commands/generate-configs.js +20 -8
  100. package/dist/commands/init.d.ts +5 -4
  101. package/dist/commands/init.js +190 -25
  102. package/dist/commands/pipeline-action.d.ts +7 -1
  103. package/dist/commands/pipeline-action.js +43 -19
  104. package/dist/commands/pipeline.d.ts +6 -1
  105. package/dist/commands/pipeline.js +7 -2
  106. package/dist/commands/pr-comment.js +1 -0
  107. package/dist/commands/publish.js +1 -0
  108. package/dist/commands/shared/help.js +2 -2
  109. package/dist/commands/update-quality-scores.d.ts +5 -0
  110. package/dist/commands/update-quality-scores.js +20 -0
  111. package/dist/commands/validate-tasks.d.ts +2 -2
  112. package/dist/commands/validate-tasks.js +26 -15
  113. package/dist/composition-root.d.ts +15 -4
  114. package/dist/composition-root.js +100 -55
  115. package/dist/config/features.ts +23 -0
  116. package/dist/config/models.ts +100 -0
  117. package/dist/config/prompts.ts +16 -0
  118. package/dist/config/rubrics.ts +225 -0
  119. package/dist/config/schedules.ts +47 -0
  120. package/dist/config/sinks.ts +37 -0
  121. package/dist/config/sources.ts +21 -0
  122. package/dist/config/thresholds.ts +61 -0
  123. package/dist/index.d.ts +41 -0
  124. package/dist/index.js +48 -0
  125. package/dist/lib/agent-behavior-report.d.ts +8 -0
  126. package/dist/lib/agent-behavior-report.js +185 -0
  127. package/dist/lib/baseline.d.ts +19 -0
  128. package/dist/lib/baseline.js +153 -0
  129. package/dist/lib/calculate-scores.d.ts +23 -0
  130. package/dist/lib/calculate-scores.js +42 -0
  131. package/dist/lib/compare.d.ts +18 -0
  132. package/dist/lib/compare.js +170 -0
  133. package/dist/lib/coverage-audit.d.ts +4 -0
  134. package/dist/lib/coverage-audit.js +42 -0
  135. package/dist/lib/discovery-report.d.ts +13 -0
  136. package/dist/lib/discovery-report.js +57 -0
  137. package/dist/lib/fetch-docs.d.ts +30 -0
  138. package/dist/lib/fetch-docs.js +171 -0
  139. package/dist/lib/generate-configs.d.ts +25 -0
  140. package/dist/lib/generate-configs.js +42 -0
  141. package/dist/lib/grader-api.d.ts +21 -0
  142. package/dist/lib/grader-api.js +34 -0
  143. package/dist/lib/grader-compare.d.ts +19 -0
  144. package/dist/lib/grader-compare.js +91 -0
  145. package/dist/lib/grader-consistency.d.ts +27 -0
  146. package/dist/lib/grader-consistency.js +79 -0
  147. package/dist/lib/grader-sensitivity.d.ts +19 -0
  148. package/dist/lib/grader-sensitivity.js +75 -0
  149. package/dist/lib/grader-validate.d.ts +19 -0
  150. package/dist/lib/grader-validate.js +78 -0
  151. package/dist/lib/measure-retrieval.d.ts +14 -0
  152. package/dist/lib/measure-retrieval.js +71 -0
  153. package/dist/lib/pr-comment.d.ts +16 -0
  154. package/dist/lib/pr-comment.js +28 -0
  155. package/dist/lib/readiness-report.d.ts +13 -0
  156. package/dist/lib/readiness-report.js +108 -0
  157. package/dist/lib/webhook-server.d.ts +11 -0
  158. package/dist/lib/webhook-server.js +24 -0
  159. package/dist/lib/weekly-digest.d.ts +24 -0
  160. package/dist/lib/weekly-digest.js +148 -0
  161. package/dist/orchestration/build-app-context.js +13 -0
  162. package/dist/orchestration/build-step-sequence.js +4 -2
  163. package/dist/orchestration/cache-context.d.ts +23 -0
  164. package/dist/orchestration/cache-context.js +43 -0
  165. package/dist/orchestration/env-bridge.d.ts +21 -0
  166. package/dist/orchestration/env-bridge.js +66 -0
  167. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  168. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  169. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  170. package/dist/orchestration/step-runner.js +5 -1
  171. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  172. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  173. package/dist/orchestration/steps/callback-step.js +10 -1
  174. package/dist/orchestration/steps/compare-step.js +6 -3
  175. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  176. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  177. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  178. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  179. package/dist/orchestration/steps/fetch-docs-step.js +32 -19
  180. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  181. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  182. package/dist/orchestration/steps/generate-configs-step.js +77 -26
  183. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  184. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  185. package/dist/orchestration/steps/publish-report-step.js +19 -0
  186. package/dist/orchestration/steps/readiness-step.js +8 -3
  187. package/dist/orchestration/steps/report-step.js +17 -4
  188. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  189. package/dist/orchestration/steps/run-eval-step.js +51 -31
  190. package/dist/pipeline/agent-behavior-report.js +6 -0
  191. package/dist/pipeline/attribution.d.ts +1 -1
  192. package/dist/pipeline/attribution.js +1 -1
  193. package/dist/pipeline/cache.js +29 -15
  194. package/dist/pipeline/calculate-scores.d.ts +2 -0
  195. package/dist/pipeline/calculate-scores.js +70 -33
  196. package/dist/pipeline/chronic-failures.d.ts +55 -0
  197. package/dist/pipeline/chronic-failures.js +110 -0
  198. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  199. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  200. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  201. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
  202. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  203. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  204. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  205. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  206. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  207. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  208. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  209. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  210. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  211. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  212. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  213. package/dist/pipeline/compiler/config-loader.js +42 -2
  214. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  215. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  216. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  217. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  218. package/dist/pipeline/compiler/index.d.ts +2 -5
  219. package/dist/pipeline/compiler/index.js +2 -5
  220. package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
  221. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  222. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  223. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  224. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  225. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  226. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  227. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  228. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
  229. package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
  230. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  231. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  232. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  233. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  234. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  235. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  236. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  237. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  238. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  239. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  240. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  241. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  242. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  245. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  246. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  247. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  248. package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
  249. package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
  250. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  251. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  252. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  253. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  254. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  255. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  256. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  257. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  258. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  259. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  260. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  261. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  262. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  263. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  264. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  265. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  266. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  267. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  268. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  269. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  270. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  271. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  272. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  273. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  274. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
  275. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  276. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  277. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  278. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  279. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  280. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  281. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  282. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  283. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  284. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
  285. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  286. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  287. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  288. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  289. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
  290. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
  291. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  292. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
  293. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  294. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
  295. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  296. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  297. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
  298. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
  299. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
  300. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  301. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  302. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  303. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  304. package/dist/pipeline/compiler/preset-loader.js +99 -0
  305. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
  306. package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
  307. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  308. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  309. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  310. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  311. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  312. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  313. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  314. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  315. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  316. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  317. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  318. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  319. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  320. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  321. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  322. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  323. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  324. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  325. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  326. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  327. package/dist/pipeline/compiler/task-bridge.js +92 -0
  328. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  329. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  330. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  331. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  332. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  333. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  334. package/dist/pipeline/coverage-audit.d.ts +1 -1
  335. package/dist/pipeline/coverage-audit.js +1 -1
  336. package/dist/pipeline/degradations.d.ts +1 -1
  337. package/dist/pipeline/degradations.js +1 -1
  338. package/dist/pipeline/expand-tasks.d.ts +2 -2
  339. package/dist/pipeline/expand-tasks.js +2 -2
  340. package/dist/pipeline/failure-modes.d.ts +1 -1
  341. package/dist/pipeline/failure-modes.js +13 -1
  342. package/dist/pipeline/gap-analysis.d.ts +1 -1
  343. package/dist/pipeline/gap-analysis.js +3 -1
  344. package/dist/pipeline/generate-configs.d.ts +2 -2
  345. package/dist/pipeline/generate-configs.js +16 -9
  346. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  347. package/dist/pipeline/grader-compare-runner.js +7 -1
  348. package/dist/pipeline/grader-comparison.d.ts +1 -1
  349. package/dist/pipeline/grader-comparison.js +1 -1
  350. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  351. package/dist/pipeline/grader-consistency-runner.js +7 -1
  352. package/dist/pipeline/grader-consistency.d.ts +1 -1
  353. package/dist/pipeline/grader-consistency.js +1 -1
  354. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  355. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  356. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  357. package/dist/pipeline/grader-sensitivity.js +1 -1
  358. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  359. package/dist/pipeline/grader-validate-runner.js +2 -2
  360. package/dist/pipeline/grader-validation.d.ts +1 -1
  361. package/dist/pipeline/grader-validation.js +1 -1
  362. package/dist/pipeline/map-request-to-config.js +16 -2
  363. package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
  364. package/dist/pipeline/mirror-repo-tasks.js +10 -10
  365. package/dist/pipeline/plan-format.d.ts +1 -1
  366. package/dist/pipeline/plan-format.js +1 -1
  367. package/dist/pipeline/plan.d.ts +1 -1
  368. package/dist/pipeline/plan.js +68 -30
  369. package/dist/pipeline/probe.d.ts +1 -1
  370. package/dist/pipeline/probe.js +1 -1
  371. package/dist/pipeline/readiness-report.d.ts +2 -2
  372. package/dist/pipeline/readiness-report.js +2 -2
  373. package/dist/pipeline/release-classification.d.ts +1 -1
  374. package/dist/pipeline/release-classification.js +1 -1
  375. package/dist/pipeline/release-report.d.ts +1 -1
  376. package/dist/pipeline/release-report.js +1 -1
  377. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  378. package/dist/pipeline/repo-eval-comment.js +1 -1
  379. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  380. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  381. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  382. package/dist/pipeline/resolve-mappings.js +44 -44
  383. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  384. package/dist/pipeline/retrieval-metrics.js +28 -20
  385. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  386. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  387. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  388. package/dist/pipeline/steps/compare-step.js +90 -0
  389. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  390. package/dist/pipeline/steps/eval-step.js +347 -0
  391. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  392. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  393. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  394. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  395. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  396. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  397. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  398. package/dist/pipeline/steps/publish-report-step.js +243 -0
  399. package/dist/pipeline/steps/report-step.d.ts +13 -0
  400. package/dist/pipeline/steps/report-step.js +56 -0
  401. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  402. package/dist/pipeline/steps/update-scores-step.js +42 -0
  403. package/dist/pipeline/targeted-loo.d.ts +1 -1
  404. package/dist/pipeline/targeted-loo.js +1 -1
  405. package/dist/pipeline/thresholds.d.ts +1 -1
  406. package/dist/pipeline/thresholds.js +1 -1
  407. package/dist/pipeline/validate.js +13 -0
  408. package/dist/report-store.d.ts +17 -0
  409. package/dist/report-store.js +24 -0
  410. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  411. package/dist/scripts/agent-behavior-report.js +315 -0
  412. package/dist/scripts/baseline.d.ts +43 -0
  413. package/dist/scripts/baseline.js +267 -0
  414. package/dist/scripts/calculate-scores.d.ts +166 -0
  415. package/dist/scripts/calculate-scores.js +1296 -0
  416. package/dist/scripts/compare.d.ts +22 -0
  417. package/dist/scripts/compare.js +334 -0
  418. package/dist/scripts/coverage-audit.d.ts +44 -0
  419. package/dist/scripts/coverage-audit.js +209 -0
  420. package/dist/scripts/debug-eval.d.ts +19 -0
  421. package/dist/scripts/debug-eval.js +73 -0
  422. package/dist/scripts/discovery-report.d.ts +58 -0
  423. package/dist/scripts/discovery-report.js +250 -0
  424. package/dist/scripts/fetch-docs.d.ts +35 -0
  425. package/dist/scripts/fetch-docs.js +472 -0
  426. package/dist/scripts/generate-configs.d.ts +66 -0
  427. package/dist/scripts/generate-configs.js +459 -0
  428. package/dist/scripts/grader-api.d.ts +27 -0
  429. package/dist/scripts/grader-api.js +206 -0
  430. package/dist/scripts/grader-compare.d.ts +22 -0
  431. package/dist/scripts/grader-compare.js +368 -0
  432. package/dist/scripts/grader-consistency.d.ts +20 -0
  433. package/dist/scripts/grader-consistency.js +313 -0
  434. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  435. package/dist/scripts/grader-sensitivity.js +354 -0
  436. package/dist/scripts/grader-validate.d.ts +19 -0
  437. package/dist/scripts/grader-validate.js +267 -0
  438. package/dist/scripts/measure-retrieval.d.ts +10 -0
  439. package/dist/scripts/measure-retrieval.js +145 -0
  440. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  441. package/dist/scripts/migrate-task-mode.js +1 -1
  442. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  443. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  444. package/dist/scripts/pipeline.d.ts +76 -0
  445. package/dist/scripts/pipeline.js +1031 -0
  446. package/dist/scripts/pr-comment.d.ts +10 -0
  447. package/dist/scripts/pr-comment.js +510 -0
  448. package/dist/scripts/readiness-report.d.ts +88 -0
  449. package/dist/scripts/readiness-report.js +342 -0
  450. package/dist/scripts/update-quality-scores.d.ts +15 -0
  451. package/dist/scripts/update-quality-scores.js +184 -0
  452. package/dist/scripts/validate-task-sources.d.ts +1 -1
  453. package/dist/scripts/validate-task-sources.js +1 -1
  454. package/dist/scripts/validate.d.ts +13 -0
  455. package/dist/scripts/validate.js +79 -0
  456. package/dist/scripts/webhook-server.d.ts +26 -0
  457. package/dist/scripts/webhook-server.js +147 -0
  458. package/dist/scripts/weekly-digest.d.ts +24 -0
  459. package/dist/scripts/weekly-digest.js +144 -0
  460. package/dist/sinks/format-slack.d.ts +64 -0
  461. package/dist/sinks/format-slack.js +306 -0
  462. package/dist/sinks/slack-sink.d.ts +27 -0
  463. package/dist/sinks/slack-sink.js +78 -0
  464. package/dist/sinks/types.d.ts +1 -1
  465. package/dist/sinks/types.js +1 -1
  466. package/dist/sinks/webhook-sink.d.ts +19 -0
  467. package/dist/sinks/webhook-sink.js +50 -0
  468. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  469. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  470. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  471. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  472. package/dist/tasks/literacy/functions.task.ts +70 -0
  473. package/dist/tasks/literacy/groq.task.ts +259 -0
  474. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  475. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  476. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  477. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  478. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  479. package/package.json +32 -24
  480. package/tasks/.expanded.agentic.yaml +280 -0
  481. package/tasks/.expanded.yaml +565 -0
  482. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  483. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  484. package/tasks/literacy/content-lake.task.ts +181 -0
  485. package/tasks/literacy/frameworks.task.ts +1 -0
  486. package/tasks/literacy/functions.task.ts +1 -0
  487. package/tasks/literacy/groq.task.ts +1 -0
  488. package/tasks/literacy/image-handling.task.ts +95 -0
  489. package/tasks/literacy/nextjs-live.task.ts +2 -1
  490. package/tasks/literacy/portable-text.task.ts +169 -0
  491. package/tasks/literacy/studio-setup.task.ts +5 -2
  492. package/tasks/literacy/visual-editing.task.ts +1 -0
  493. package/LICENSE +0 -21
  494. package/tasks/frameworks.yaml +0 -98
  495. package/tasks/functions.yaml +0 -51
  496. package/tasks/groq.yaml +0 -216
  497. package/tasks/nextjs-live.yaml +0 -62
  498. package/tasks/studio-setup.yaml +0 -111
  499. package/tasks/visual-editing.yaml +0 -120
@@ -1,42 +1,250 @@
1
1
  /**
2
- * repo-schemas.ts — Re-exports task schemas + defines config schemas.
2
+ * repo-schemas.ts — Canonical Zod schemas for task and config validation.
3
3
  *
4
- * Task schemas (RepoTaskSchema, assertions, etc.) are the single source
5
- * of truth in @sanity/ailf-tasks. This file re-exports them so existing
6
- * importers within the eval package don't need to change their paths.
4
+ * Task schemas validate .ailf/tasks/*.yaml and .task.ts files against the
5
+ * canonical GeneralizedTaskDefinition shape. Field names match the internal
6
+ * domain model: `area` (not featureArea), `assertions` (not assert),
7
+ * `context.docs` (not canonicalDocs), `prompt.text` (not vars.task).
7
8
  *
8
- * Config schemas (RepoConfigSchema, trigger config, etc.) remain here
9
- * because they are specific to the eval pipeline and not needed by
10
- * external tools that only validate task YAML.
9
+ * Previously this file re-exported from @sanity/ailf-tasks. That package
10
+ * has been eliminated all schema logic now lives here.
11
11
  *
12
- * @see packages/tasks/src/schemas.ts task schema source of truth
13
- * @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
12
+ * Config schemas (RepoConfigSchema, trigger config) are eval-pipeline-
13
+ * specific and remain here unchanged.
14
+ *
15
+ * @see packages/core/src/types/generalized-task.ts — canonical TypeScript types
16
+ * @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
14
17
  */
15
- import { RepoTaskFileSchema as _Schema } from "../../_vendor/ailf-tasks/index.js";
16
18
  import { z } from "zod";
17
19
  // ---------------------------------------------------------------------------
18
- // Re-exports from @sanity/ailf-tasks (task schemas + validation)
20
+ // Constants curated assertion types and rubric template names
19
21
  // ---------------------------------------------------------------------------
20
- export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, } from "../../_vendor/ailf-tasks/index.js";
21
- export { loadTaskDir, parseTaskFile } from "../../_vendor/ailf-tasks/index.js";
22
22
  /**
23
- * Parse and validate a repo task file's content. Returns typed tasks or throws
24
- * with a user-friendly Zod error message.
23
+ * The set of assertion types allowed in task files.
25
24
  *
26
- * NOTE: This accepts pre-parsed YAML data (unknown), not a raw string.
27
- * For raw YAML strings, use `parseTaskFile()` from @sanity/ailf-tasks.
25
+ * This is a curated subset of Promptfoo assertion types — we expose only the
26
+ * types that are stable, well-documented, and useful for external authors.
27
+ */
28
+ export const CURATED_ASSERTION_TYPES = [
29
+ "llm-rubric",
30
+ "contains",
31
+ "contains-any",
32
+ "contains-all",
33
+ "not-contains",
34
+ "icontains",
35
+ "icontains-any",
36
+ "regex",
37
+ "javascript",
38
+ "similar",
39
+ "cost",
40
+ "latency",
41
+ ];
42
+ /**
43
+ * Valid rubric template names — must match keys in config/rubrics.yaml.
44
+ */
45
+ export const RUBRIC_TEMPLATE_NAMES = [
46
+ "task-completion",
47
+ "code-correctness",
48
+ "doc-coverage",
49
+ ];
50
+ // ---------------------------------------------------------------------------
51
+ // Doc ref schemas — polymorphic canonical doc references
52
+ // ---------------------------------------------------------------------------
53
+ /**
54
+ * Polymorphic canonical doc reference — discriminated by key presence.
55
+ * Exactly one resolution key (slug, path, id, or perspective) must be present.
56
+ *
57
+ * @see docs/design-docs/canonical-doc-resolution.md
58
+ */
59
+ const SlugDocRefSchema = z.object({
60
+ slug: z.string().min(1),
61
+ reason: z.string().optional().default(""),
62
+ });
63
+ const PathDocRefSchema = z.object({
64
+ path: z.string().min(1),
65
+ reason: z.string().optional().default(""),
66
+ });
67
+ const IdDocRefSchema = z.object({
68
+ id: z.string().min(1),
69
+ reason: z.string().optional().default(""),
70
+ /** Human-readable slug annotation (not used for resolution) */
71
+ slug: z.string().optional(),
72
+ /** Human-readable path annotation (not used for resolution) */
73
+ path: z.string().optional(),
74
+ });
75
+ const PerspectiveDocRefSchema = z.object({
76
+ perspective: z.string().min(1),
77
+ reason: z.string().optional().default(""),
78
+ });
79
+ // Order matters: IdDocRefSchema first because it may also carry `slug`
80
+ // and `path` as optional annotations. Zod tries schemas in order, so
81
+ // entries like `{ id: "...", slug: "..." }` must match IdDocRefSchema
82
+ // (not SlugDocRefSchema).
83
+ const CanonicalDocRefSchema = z.union([
84
+ IdDocRefSchema,
85
+ SlugDocRefSchema,
86
+ PathDocRefSchema,
87
+ PerspectiveDocRefSchema,
88
+ ]);
89
+ // ---------------------------------------------------------------------------
90
+ // Assertion schemas
91
+ // ---------------------------------------------------------------------------
92
+ /**
93
+ * A templated LLM-rubric assertion — uses one of the predefined rubric
94
+ * templates with author-supplied criteria.
28
95
  */
29
- export function parseRepoTaskFile(raw, filename) {
30
- const result = _Schema.safeParse(raw);
96
+ const TemplatedAssertionSchema = z.object({
97
+ type: z.literal("llm-rubric"),
98
+ template: z.enum(RUBRIC_TEMPLATE_NAMES),
99
+ criteria: z.array(z.string().min(1)).min(1),
100
+ weight: z.number().optional(),
101
+ });
102
+ /**
103
+ * A value-based assertion (contains, regex, cost, etc.). Uses .passthrough()
104
+ * to allow extra fields for future extension without schema breakage.
105
+ */
106
+ const ValueAssertionSchema = z
107
+ .object({
108
+ type: z.enum(CURATED_ASSERTION_TYPES),
109
+ value: z.unknown().optional(),
110
+ threshold: z.number().optional(),
111
+ weight: z.number().optional(),
112
+ })
113
+ .passthrough();
114
+ /** Union of all supported assertion shapes. */
115
+ const AssertionSchema = z.union([
116
+ TemplatedAssertionSchema,
117
+ ValueAssertionSchema,
118
+ ]);
119
+ // ---------------------------------------------------------------------------
120
+ // Nested config schemas
121
+ // ---------------------------------------------------------------------------
122
+ const BaselineConfigSchema = z
123
+ .object({
124
+ enabled: z.boolean().optional(),
125
+ rubric: z.enum(["abbreviated", "full", "none"]).optional(),
126
+ })
127
+ .optional();
128
+ // ---------------------------------------------------------------------------
129
+ // CanonicalTaskSchema — the single canonical task shape
130
+ //
131
+ // Validates the GeneralizedTaskDefinition shape. Field names match the
132
+ // internal domain model directly — no mapping layer needed.
133
+ //
134
+ // YAML tasks may omit `mode` (defaults to "literacy"). All other fields
135
+ // use the canonical names: `title`, `area`, `prompt.text`, `context.docs`,
136
+ // `assertions`.
137
+ // ---------------------------------------------------------------------------
138
+ /**
139
+ * Zod schema for a single task definition using canonical field names.
140
+ *
141
+ * Uses .passthrough() to allow mode-specific fields (serverConfig, sandbox,
142
+ * handler, etc.) without listing every possible field. Mode-specific
143
+ * validation is deferred to the pipeline's mode handlers.
144
+ */
145
+ export const CanonicalTaskSchema = z
146
+ .object({
147
+ id: z
148
+ .string()
149
+ .min(1)
150
+ .regex(/^[a-z0-9][a-z0-9-]*$/, "Task ID must be lowercase alphanumeric with hyphens"),
151
+ mode: z.string().default("literacy"),
152
+ title: z.string().min(1),
153
+ description: z.string().optional(),
154
+ area: z.string().optional(),
155
+ difficulty: z.enum(["basic", "intermediate", "advanced"]).optional(),
156
+ status: z
157
+ .enum(["active", "draft", "paused", "archived"])
158
+ .optional()
159
+ .default("active"),
160
+ tags: z.array(z.string()).optional(),
161
+ prompt: z
162
+ .object({
163
+ template: z.string().optional(),
164
+ text: z.string().optional(),
165
+ systemMessage: z.string().optional(),
166
+ vars: z.record(z.string(), z.unknown()).optional(),
167
+ })
168
+ .optional(),
169
+ context: z
170
+ .object({
171
+ docs: z.array(CanonicalDocRefSchema).optional(),
172
+ fixtures: z.array(z.string()).optional(),
173
+ })
174
+ .optional(),
175
+ assertions: z.array(AssertionSchema).optional(),
176
+ referenceSolution: z.string().optional(),
177
+ docCoverage: z.boolean().optional().default(false),
178
+ baseline: BaselineConfigSchema,
179
+ rubric: z.unknown().optional(),
180
+ providers: z.array(z.unknown()).optional(),
181
+ options: z.unknown().optional(),
182
+ metadata: z.record(z.string(), z.unknown()).optional(),
183
+ })
184
+ .passthrough();
185
+ /**
186
+ * Schema for an array of canonical tasks — what a single .ailf/tasks/*.yaml
187
+ * file contains. Each file must define at least one task.
188
+ */
189
+ export const CanonicalTaskFileSchema = z.array(CanonicalTaskSchema).min(1);
190
+ /**
191
+ * Parse and validate a task file's content against the canonical schema.
192
+ * Returns typed tasks or throws with a user-friendly Zod error message.
193
+ *
194
+ * Accepts pre-parsed YAML data (unknown), not a raw string.
195
+ */
196
+ export function parseCanonicalTaskFile(raw, filename) {
197
+ const result = CanonicalTaskFileSchema.safeParse(raw);
31
198
  if (!result.success) {
32
199
  const messages = result.error.issues
33
200
  .map((i) => ` [${i.path.join(".")}]: ${i.message}`)
34
201
  .join("\n");
35
- throw new Error(`Invalid repo task file "${filename}":\n${messages}`);
202
+ throw new Error(`Invalid task file "${filename}":\n${messages}`);
36
203
  }
37
204
  return result.data;
38
205
  }
39
206
  // ---------------------------------------------------------------------------
207
+ // Legacy field name detection
208
+ //
209
+ // When authors accidentally use the old @sanity/ailf-tasks field names
210
+ // (featureArea, canonicalDocs, assert, vars), surface a helpful error
211
+ // message telling them what the canonical names are.
212
+ // ---------------------------------------------------------------------------
213
+ /** Old field names from @sanity/ailf-tasks → canonical equivalents */
214
+ const LEGACY_FIELD_MAP = {
215
+ featureArea: "area",
216
+ canonicalDocs: "context.docs (nested under context: { docs: [...] })",
217
+ assert: "assertions",
218
+ vars: "prompt (nested under prompt: { text: ... })",
219
+ };
220
+ /**
221
+ * Detect legacy field names in raw task data and return helpful messages.
222
+ *
223
+ * Runs BEFORE Zod parsing to catch the most common migration mistake —
224
+ * using old field names from @sanity/ailf-tasks instead of the canonical
225
+ * GeneralizedTaskDefinition shape.
226
+ */
227
+ export function detectLegacyFieldNames(raw, filename) {
228
+ const warnings = [];
229
+ if (!Array.isArray(raw))
230
+ return warnings;
231
+ for (let i = 0; i < raw.length; i++) {
232
+ const entry = raw[i];
233
+ if (typeof entry !== "object" || entry === null)
234
+ continue;
235
+ const obj = entry;
236
+ const taskId = typeof obj.id === "string" ? obj.id : `task[${i}]`;
237
+ for (const [legacy, canonical] of Object.entries(LEGACY_FIELD_MAP)) {
238
+ if (legacy in obj) {
239
+ warnings.push(`[${filename}] ${taskId}: Found legacy field "${legacy}" — ` +
240
+ `use "${canonical}" instead. ` +
241
+ "See contributing-tasks.md for the canonical task format.");
242
+ }
243
+ }
244
+ }
245
+ return warnings;
246
+ }
247
+ // ---------------------------------------------------------------------------
40
248
  // Config schemas — specific to the eval pipeline
41
249
  // ---------------------------------------------------------------------------
42
250
  const TriggerModeSchema = z.enum(["validate-only", "eval"]);
@@ -1,18 +1,22 @@
1
1
  /**
2
- * Adapter: Load task definitions from .ailf/tasks/*.yaml in an external repo.
2
+ * Adapter: Load task definitions from .ailf/tasks/ in an external repo.
3
3
  *
4
- * Repo-based tasks use a camelCase YAML format (matching the Content Lake
5
- * document schema) with slug strings instead of Sanity references. The
6
- * mapping to LiteracyTaskDefinition is straightforward — field names are
7
- * already aligned with the domain type.
4
+ * Supports two task file formats:
8
5
  *
9
- * This adapter is structurally similar to YamlTaskSource but:
10
- * - Reads from an arbitrary directory (not the framework's tasks/ dir)
11
- * - Uses camelCase field names (not snake_case)
12
- * - Has an explicit featureArea field per task (not derived from filename)
6
+ * 1. **TypeScript (.task.ts)** Tasks authored with `defineTask()` from
7
+ * `@sanity/ailf`. These use `GeneralizedTaskDefinition` field names
8
+ * and are passed through with basic runtime guards.
13
9
  *
10
+ * 2. **YAML (.yaml)** — Tasks using the canonical `GeneralizedTaskDefinition`
11
+ * field names (area, context.docs, assertions, prompt.text). Validated
12
+ * through the CanonicalTaskSchema Zod schema.
13
+ *
14
+ * All tasks use the single canonical shape — no mapping layer, no dual-shape
15
+ * detection. The `mode` field defaults to "literacy" for YAML tasks that
16
+ * omit it.
17
+ *
18
+ * @see packages/core/src/types/generalized-task.ts — canonical types
14
19
  * @see packages/core/src/ports/task-source.ts — TaskSource port
15
- * @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
16
20
  */
17
21
  import type { FilterOptions, GeneralizedTaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
18
22
  export declare class RepoTaskSource implements TaskSource {
@@ -1,24 +1,31 @@
1
1
  /**
2
- * Adapter: Load task definitions from .ailf/tasks/*.yaml in an external repo.
2
+ * Adapter: Load task definitions from .ailf/tasks/ in an external repo.
3
3
  *
4
- * Repo-based tasks use a camelCase YAML format (matching the Content Lake
5
- * document schema) with slug strings instead of Sanity references. The
6
- * mapping to LiteracyTaskDefinition is straightforward — field names are
7
- * already aligned with the domain type.
4
+ * Supports two task file formats:
8
5
  *
9
- * This adapter is structurally similar to YamlTaskSource but:
10
- * - Reads from an arbitrary directory (not the framework's tasks/ dir)
11
- * - Uses camelCase field names (not snake_case)
12
- * - Has an explicit featureArea field per task (not derived from filename)
6
+ * 1. **TypeScript (.task.ts)** Tasks authored with `defineTask()` from
7
+ * `@sanity/ailf`. These use `GeneralizedTaskDefinition` field names
8
+ * and are passed through with basic runtime guards.
13
9
  *
10
+ * 2. **YAML (.yaml)** — Tasks using the canonical `GeneralizedTaskDefinition`
11
+ * field names (area, context.docs, assertions, prompt.text). Validated
12
+ * through the CanonicalTaskSchema Zod schema.
13
+ *
14
+ * All tasks use the single canonical shape — no mapping layer, no dual-shape
15
+ * detection. The `mode` field defaults to "literacy" for YAML tasks that
16
+ * omit it.
17
+ *
18
+ * @see packages/core/src/types/generalized-task.ts — canonical types
14
19
  * @see packages/core/src/ports/task-source.ts — TaskSource port
15
- * @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
16
20
  */
17
21
  import { existsSync, readdirSync, readFileSync } from "fs";
18
22
  import { resolve } from "path";
19
23
  import { load } from "js-yaml";
20
- import { parseRepoTaskFile } from "./repo-schemas.js";
24
+ import { CANONICAL_EVAL_MODES } from "../../_vendor/ailf-shared/index.js";
25
+ import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "./repo-schemas.js";
21
26
  import { discoverTsTaskFiles, loadTsTaskFile } from "./task-file-loader.js";
27
+ /** Set of canonical mode names for O(1) lookup */
28
+ const KNOWN_MODES = new Set(CANONICAL_EVAL_MODES);
22
29
  // ---------------------------------------------------------------------------
23
30
  // RepoTaskSource adapter
24
31
  // ---------------------------------------------------------------------------
@@ -41,6 +48,7 @@ export class RepoTaskSource {
41
48
  " Expected .ailf/tasks/*.yaml or .ailf/tasks/*.task.ts files");
42
49
  }
43
50
  const definitions = [];
51
+ // Load YAML task files
44
52
  for (const file of yamlFiles) {
45
53
  const filePath = resolve(this.tasksDir, file);
46
54
  const raw = readFileSync(filePath, "utf-8");
@@ -49,139 +57,90 @@ export class RepoTaskSource {
49
57
  throw new Error(`${file} did not parse to an array of tasks. ` +
50
58
  "Repo task files must contain a YAML array of task definitions.");
51
59
  }
52
- // Validate entire file through Zod schema
60
+ // Detect legacy field names (featureArea, canonicalDocs, assert, vars)
61
+ // and surface helpful migration messages before Zod validation fails.
62
+ const legacyWarnings = detectLegacyFieldNames(parsed, file);
63
+ if (legacyWarnings.length > 0) {
64
+ throw new Error(`${file} uses legacy field names from @sanity/ailf-tasks.\n` +
65
+ "Task files must use canonical GeneralizedTaskDefinition field names.\n\n" +
66
+ legacyWarnings.join("\n") +
67
+ "\n\nSee contributing-tasks.md for the canonical task format.");
68
+ }
69
+ // Validate through canonical Zod schema
53
70
  let validated;
54
71
  try {
55
- validated = parseRepoTaskFile(parsed, file);
72
+ validated = parseCanonicalTaskFile(parsed, file);
56
73
  }
57
74
  catch (err) {
58
75
  const msg = err instanceof Error ? err.message : String(err);
59
76
  throw new Error(`Failed to validate ${file}:\n${msg}`, { cause: err });
60
77
  }
61
- for (const entry of validated) {
62
- // Filter stages:
63
- // 1. Area filter — skip tasks outside requested feature areas
64
- // 2. Task ID filter — skip tasks not matching explicit task IDs
65
- // 3. Status filter — skip non-active tasks (unless targeting by ID)
66
- // 4. Tag filter — skip tasks not matching requested tags
67
- // Area filter
68
- if (filter?.areas &&
69
- filter.areas.length > 0 &&
70
- !filter.areas
71
- .map((a) => a.toLowerCase())
72
- .includes(entry.featureArea.toLowerCase())) {
73
- continue;
74
- }
75
- // Task ID filter
76
- if (filter?.taskIds &&
77
- filter.taskIds.length > 0 &&
78
- !filter.taskIds.includes(entry.id)) {
79
- continue;
80
- }
81
- // Status filter — unified lifecycle control
82
- // Resolve effective status: explicit status field wins,
83
- // then fall back to execution.enabled for backwards compat
84
- const effectiveStatus = entry.status ??
85
- (entry.execution?.enabled === false ? "paused" : "active");
86
- const isTargetedById = filter?.taskIds && filter.taskIds.includes(entry.id);
87
- if (effectiveStatus === "archived") {
88
- continue; // Archived is always excluded, even with --task
89
- }
90
- if (effectiveStatus === "paused" && !isTargetedById) {
91
- continue; // Paused skipped unless explicitly targeted
78
+ for (const task of validated) {
79
+ if (passesFilter(task, filter)) {
80
+ definitions.push(task);
92
81
  }
93
- if (effectiveStatus === "draft" &&
94
- !isTargetedById &&
95
- !filter?.includeDrafts) {
96
- continue; // Draft skipped unless targeted or includeDrafts
97
- }
98
- // Tag filter — skip tasks that don't match any requested tag
99
- if (filter?.tags &&
100
- filter.tags.length > 0 &&
101
- (!entry.tags || !entry.tags.some((t) => filter.tags.includes(t)))) {
102
- continue;
103
- }
104
- definitions.push(mapToLiteracyTask(entry));
105
82
  }
106
83
  }
107
84
  // Load TS task files (.task.ts / .task.js)
108
85
  for (const tsFile of tsFiles) {
109
86
  const loaded = await loadTsTaskFile(tsFile);
110
87
  const filename = tsFile.split("/").pop() ?? tsFile;
111
- // Validate through the same schema as YAML tasks
112
- let validated;
113
- try {
114
- validated = parseRepoTaskFile(loaded.tasks, filename);
115
- }
116
- catch (err) {
117
- const msg = err instanceof Error ? err.message : String(err);
118
- throw new Error(`Failed to validate ${filename}:\n${msg}`, {
119
- cause: err,
120
- });
121
- }
122
- for (const entry of validated) {
123
- // Apply the same filtering as YAML tasks
124
- if (filter?.areas &&
125
- filter.areas.length > 0 &&
126
- !filter.areas
127
- .map((a) => a.toLowerCase())
128
- .includes(entry.featureArea.toLowerCase())) {
129
- continue;
88
+ for (const raw of loaded.tasks) {
89
+ const task = raw;
90
+ if (!task.id || typeof task.id !== "string") {
91
+ throw new Error(`Task in ${filename} is missing a valid "id" field`);
130
92
  }
131
- if (filter?.taskIds &&
132
- filter.taskIds.length > 0 &&
133
- !filter.taskIds.includes(entry.id)) {
134
- continue;
93
+ if (!task.mode || !KNOWN_MODES.has(task.mode)) {
94
+ throw new Error(`Task "${task.id}" in ${filename} has missing or unknown mode "${task.mode}". ` +
95
+ `Valid modes: ${[...KNOWN_MODES].join(", ")}`);
135
96
  }
136
- const effectiveStatus = entry.status ??
137
- (entry.execution?.enabled === false ? "paused" : "active");
138
- const isTargetedById = filter?.taskIds && filter.taskIds.includes(entry.id);
139
- if (effectiveStatus === "archived")
140
- continue;
141
- if (effectiveStatus === "paused" && !isTargetedById)
142
- continue;
143
- if (effectiveStatus === "draft" &&
144
- !isTargetedById &&
145
- !filter?.includeDrafts) {
146
- continue;
97
+ if (passesFilter(task, filter)) {
98
+ definitions.push(task);
147
99
  }
148
- if (filter?.tags &&
149
- filter.tags.length > 0 &&
150
- (!entry.tags || !entry.tags.some((t) => filter.tags.includes(t)))) {
151
- continue;
152
- }
153
- definitions.push(mapToLiteracyTask(entry));
154
100
  }
155
101
  }
156
102
  return definitions;
157
103
  }
158
104
  }
159
105
  // ---------------------------------------------------------------------------
160
- // Mapping helpers
106
+ // Filter helper
161
107
  // ---------------------------------------------------------------------------
162
- function mapToLiteracyTask(raw) {
163
- // Extract task prompt from vars.task (same convention as YAML tasks)
164
- const vars = (raw.vars ?? {});
165
- const { docs: _docs, task, ...rest } = vars;
166
- // Canonical docs are already parsed into the polymorphic union by Zod.
167
- const docs = (raw.canonicalDocs ??
168
- []);
169
- const extraVars = Object.keys(rest).length > 0 ? rest : undefined;
170
- return {
171
- mode: "literacy",
172
- id: raw.id,
173
- title: raw.description,
174
- area: raw.featureArea,
175
- prompt: {
176
- text: typeof task === "string" ? task : "",
177
- ...(extraVars ? { vars: extraVars } : {}),
178
- },
179
- context: { docs },
180
- assertions: (raw.assert ?? []),
181
- referenceSolution: raw.referenceSolution ?? "",
182
- docCoverage: raw.docCoverage ?? false,
183
- ...(raw.baseline ? { baseline: raw.baseline } : {}),
184
- ...(raw.status && raw.status !== "active" ? { status: raw.status } : {}),
185
- ...(raw.tags?.length ? { tags: raw.tags } : {}),
186
- };
108
+ /**
109
+ * Apply standard task filtering. Used for both YAML and TS tasks.
110
+ */
111
+ function passesFilter(task, filter) {
112
+ // Area filter
113
+ if (filter?.areas &&
114
+ filter.areas.length > 0 &&
115
+ (!task.area ||
116
+ !filter.areas
117
+ .map((a) => a.toLowerCase())
118
+ .includes(task.area.toLowerCase()))) {
119
+ return false;
120
+ }
121
+ // Task ID filter
122
+ if (filter?.taskIds &&
123
+ filter.taskIds.length > 0 &&
124
+ !filter.taskIds.includes(task.id)) {
125
+ return false;
126
+ }
127
+ // Status filter — unified lifecycle control
128
+ const effectiveStatus = task.status ?? "active";
129
+ const isTargetedById = filter?.taskIds && filter.taskIds.includes(task.id);
130
+ if (effectiveStatus === "archived")
131
+ return false;
132
+ if (effectiveStatus === "paused" && !isTargetedById)
133
+ return false;
134
+ if (effectiveStatus === "draft" &&
135
+ !isTargetedById &&
136
+ !filter?.includeDrafts) {
137
+ return false;
138
+ }
139
+ // Tag filter
140
+ if (filter?.tags &&
141
+ filter.tags.length > 0 &&
142
+ (!task.tags || !task.tags.some((t) => filter.tags.includes(t)))) {
143
+ return false;
144
+ }
145
+ return true;
187
146
  }
@@ -6,7 +6,7 @@
6
6
  * config applies. This drives whether the pipeline runs in validate-only
7
7
  * mode or full eval mode, and whether results are blocking.
8
8
  *
9
- * @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
9
+ * @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
10
10
  */
11
11
  export type TriggerContext = {
12
12
  type: "pr";
@@ -6,7 +6,7 @@
6
6
  * config applies. This drives whether the pipeline runs in validate-only
7
7
  * mode or full eval mode, and whether results are blocking.
8
8
  *
9
- * @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
9
+ * @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
10
10
  */
11
11
  import { existsSync, readFileSync } from "fs";
12
12
  import { resolve } from "path";
@@ -1,8 +1,39 @@
1
1
  /**
2
- * repo-validation.ts — Re-exports semantic validation from @sanity/ailf-tasks.
2
+ * repo-validation.ts — Semantic validation for task definitions.
3
3
  *
4
- * The validation logic is the single source of truth in @sanity/ailf-tasks.
5
- * This file re-exports so existing eval-package importers don't need
6
- * to change their import paths.
4
+ * Checks that go beyond Zod schema parsing:
5
+ * - Assertion types are in the curated set
6
+ * - Rubric template names resolve to known templates
7
+ * - Doc ref slugs look reasonable (slugs, not URLs)
8
+ * - Tasks have at least one LLM rubric assertion (recommended)
9
+ * - Tasks have a prompt text (recommended)
10
+ *
11
+ * These produce warnings, not errors — the pipeline can still run
12
+ * with imperfect tasks. Only structural failures (caught by Zod) block.
13
+ *
14
+ * Previously this file re-exported from @sanity/ailf-tasks. That package
15
+ * has been eliminated — all validation logic now lives here.
16
+ */
17
+ import { type CanonicalTask } from "./repo-schemas.js";
18
+ export interface ValidationResult {
19
+ valid: boolean;
20
+ errors: ValidationMessage[];
21
+ warnings: ValidationMessage[];
22
+ }
23
+ export interface ValidationMessage {
24
+ taskId: string;
25
+ field: string;
26
+ message: string;
27
+ }
28
+ /**
29
+ * Run semantic validation on an array of parsed canonical tasks.
30
+ *
31
+ * Returns warnings for issues that don't block execution (unknown feature
32
+ * areas, unresolved slugs) and errors for issues that would cause pipeline
33
+ * failures (completely missing required fields — though Zod catches most).
34
+ */
35
+ export declare function validateCanonicalTasks(tasks: CanonicalTask[]): ValidationResult;
36
+ /**
37
+ * Format validation results for console output.
7
38
  */
8
- export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, type ValidationMessage, type ValidationResult, } from "../../_vendor/ailf-tasks/index.d.ts";
39
+ export declare function formatValidationResult(result: ValidationResult): string;