@sanity/ailf 0.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/README.md +0 -1
  2. package/config/features.ts +23 -0
  3. package/config/models.ts +95 -0
  4. package/config/prompts.ts +16 -0
  5. package/config/rubrics.ts +225 -0
  6. package/config/schedules.ts +47 -0
  7. package/config/sinks.ts +37 -0
  8. package/config/sources.ts +21 -0
  9. package/config/thresholds.ts +61 -0
  10. package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
  11. package/dist/_vendor/ailf-core/config-helpers.js +170 -0
  12. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  13. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  14. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  15. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  16. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  17. package/dist/_vendor/ailf-core/index.js +5 -0
  18. package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
  19. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  20. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  21. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  22. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  23. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  24. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  25. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
  26. package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
  27. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
  28. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
  29. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
  30. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
  31. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  32. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  33. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  34. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  35. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  36. package/dist/_vendor/ailf-core/services/index.js +2 -1
  37. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  38. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  39. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  40. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  41. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  42. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  43. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  44. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  47. package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
  48. package/dist/_vendor/ailf-core/types/index.js +8 -1
  49. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
  50. package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
  51. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  52. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  53. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  54. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  55. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  56. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  57. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  58. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  59. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  60. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  61. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  62. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  63. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  64. package/dist/_vendor/ailf-shared/index.js +0 -1
  65. package/dist/adapters/api-client/build-request.js +14 -13
  66. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  67. package/dist/adapters/config-sources/file-config-adapter.js +39 -12
  68. package/dist/adapters/config-sources/index.d.ts +2 -0
  69. package/dist/adapters/config-sources/index.js +1 -0
  70. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  71. package/dist/adapters/config-sources/ts-config-loader.js +141 -0
  72. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  73. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  74. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  75. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  76. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  77. package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
  78. package/dist/adapters/task-sources/index.d.ts +3 -2
  79. package/dist/adapters/task-sources/index.js +3 -2
  80. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  81. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  82. package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
  83. package/dist/adapters/task-sources/repo-task-source.js +92 -80
  84. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  85. package/dist/adapters/task-sources/repo-validation.js +126 -5
  86. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  87. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  88. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  89. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  90. package/dist/cli.js +0 -2
  91. package/dist/commands/baseline.js +4 -1
  92. package/dist/commands/calculate-scores.js +1 -1
  93. package/dist/commands/coverage-audit.js +9 -1
  94. package/dist/commands/explain-handler.js +25 -23
  95. package/dist/commands/fetch-docs.js +3 -2
  96. package/dist/commands/generate-configs.js +1 -1
  97. package/dist/commands/init.d.ts +6 -4
  98. package/dist/commands/init.js +302 -23
  99. package/dist/commands/interactive.js +11 -7
  100. package/dist/commands/pipeline-action.d.ts +2 -0
  101. package/dist/commands/pipeline-action.js +16 -6
  102. package/dist/commands/pipeline.d.ts +1 -0
  103. package/dist/commands/pipeline.js +4 -2
  104. package/dist/commands/pr-comment.js +1 -1
  105. package/dist/commands/publish.js +2 -2
  106. package/dist/commands/readiness-report.js +13 -6
  107. package/dist/commands/validate-tasks.d.ts +2 -2
  108. package/dist/commands/validate-tasks.js +26 -15
  109. package/dist/composition-root.d.ts +13 -1
  110. package/dist/composition-root.js +99 -4
  111. package/dist/index.d.ts +41 -0
  112. package/dist/index.js +48 -0
  113. package/dist/orchestration/build-app-context.js +1 -0
  114. package/dist/orchestration/build-step-sequence.js +28 -8
  115. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  116. package/dist/orchestration/steps/fetch-docs-step.js +8 -7
  117. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  118. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  119. package/dist/orchestration/steps/generate-configs-step.js +261 -51
  120. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  121. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  122. package/dist/orchestration/steps/readiness-step.js +5 -6
  123. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  124. package/dist/orchestration/steps/run-eval-step.js +8 -7
  125. package/dist/pipeline/cache.d.ts +1 -1
  126. package/dist/pipeline/cache.js +36 -8
  127. package/dist/pipeline/calculate-scores.d.ts +2 -4
  128. package/dist/pipeline/calculate-scores.js +43 -113
  129. package/dist/pipeline/checks.js +2 -2
  130. package/dist/pipeline/compare.js +8 -8
  131. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  132. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  133. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  134. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  135. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  136. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  137. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  138. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  139. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  140. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
  141. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  142. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  143. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  144. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  145. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  146. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
  147. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  148. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  149. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  150. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  151. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  152. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  153. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  154. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  155. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  156. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  157. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  158. package/dist/pipeline/compiler/config-loader.js +111 -0
  159. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  160. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  161. package/dist/pipeline/compiler/hash.d.ts +11 -0
  162. package/dist/pipeline/compiler/hash.js +18 -0
  163. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  164. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  165. package/dist/pipeline/compiler/index.d.ts +29 -0
  166. package/dist/pipeline/compiler/index.js +45 -0
  167. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  168. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  169. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  170. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  171. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  172. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  173. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  174. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  175. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  176. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  177. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  178. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  179. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  180. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  181. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  182. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  183. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  184. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  185. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  186. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  187. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  188. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  189. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  190. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  191. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  192. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  193. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  194. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  195. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  196. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  197. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  198. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  199. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  200. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  201. package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
  202. package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
  203. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  204. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  205. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  206. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  207. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  208. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  209. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  210. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  211. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  212. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  213. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  214. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  215. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  216. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  217. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  218. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  219. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  220. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  221. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  222. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  223. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  224. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  225. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  226. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  227. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  228. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  229. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  230. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  231. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  232. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  233. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  251. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  252. package/dist/pipeline/compiler/preset-loader.js +99 -0
  253. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  254. package/dist/pipeline/compiler/presets/index.js +8 -0
  255. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
  256. package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
  257. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  258. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  259. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  260. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  261. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  262. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  263. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  264. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  265. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  266. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  267. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  268. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  269. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  270. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  271. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  272. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  273. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  274. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  275. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  276. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  277. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  278. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  279. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  280. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  281. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  282. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  283. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  284. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  285. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  286. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  287. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  288. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  289. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  290. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  291. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  292. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  293. package/dist/pipeline/coverage-audit.d.ts +15 -5
  294. package/dist/pipeline/coverage-audit.js +41 -22
  295. package/dist/pipeline/eval-constants.d.ts +16 -6
  296. package/dist/pipeline/eval-constants.js +25 -4
  297. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  298. package/dist/pipeline/eval-fingerprint.js +8 -9
  299. package/dist/pipeline/expand-tasks.d.ts +19 -10
  300. package/dist/pipeline/expand-tasks.js +34 -28
  301. package/dist/pipeline/gap-analysis.d.ts +1 -1
  302. package/dist/pipeline/gap-analysis.js +2 -2
  303. package/dist/pipeline/generate-configs.d.ts +22 -4
  304. package/dist/pipeline/generate-configs.js +53 -24
  305. package/dist/pipeline/grader-api.d.ts +3 -3
  306. package/dist/pipeline/grader-api.js +5 -12
  307. package/dist/pipeline/grader-compare-runner.js +20 -27
  308. package/dist/pipeline/grader-comparison.d.ts +4 -8
  309. package/dist/pipeline/grader-comparison.js +11 -17
  310. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  311. package/dist/pipeline/grader-consistency-runner.js +16 -20
  312. package/dist/pipeline/grader-consistency.d.ts +6 -10
  313. package/dist/pipeline/grader-consistency.js +13 -32
  314. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  315. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  316. package/dist/pipeline/grader-sensitivity.js +10 -10
  317. package/dist/pipeline/grader-validate-runner.js +7 -5
  318. package/dist/pipeline/grader-validation.d.ts +2 -6
  319. package/dist/pipeline/grader-validation.js +14 -22
  320. package/dist/pipeline/map-request-to-config.js +7 -1
  321. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
  322. package/dist/pipeline/mirror-repo-tasks.js +22 -21
  323. package/dist/pipeline/normalize-mode.d.ts +49 -0
  324. package/dist/pipeline/normalize-mode.js +64 -0
  325. package/dist/pipeline/plan.d.ts +5 -2
  326. package/dist/pipeline/plan.js +134 -78
  327. package/dist/pipeline/pr-comment.js +2 -0
  328. package/dist/pipeline/profile-resolution.d.ts +22 -14
  329. package/dist/pipeline/profile-resolution.js +41 -19
  330. package/dist/pipeline/provenance.d.ts +2 -2
  331. package/dist/pipeline/provenance.js +12 -17
  332. package/dist/pipeline/release-report.js +4 -4
  333. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  334. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  335. package/dist/pipeline/rubric-loader.d.ts +20 -0
  336. package/dist/pipeline/rubric-loader.js +37 -0
  337. package/dist/pipeline/validate.d.ts +4 -4
  338. package/dist/pipeline/validate.js +64 -53
  339. package/dist/schedules/loader.js +18 -8
  340. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  341. package/dist/scripts/migrate-task-mode.js +85 -0
  342. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  343. package/dist/scripts/validate-task-sources.d.ts +1 -1
  344. package/dist/scripts/validate-task-sources.js +15 -15
  345. package/dist/sinks/loader.js +5 -7
  346. package/dist/sources.d.ts +7 -7
  347. package/dist/sources.js +22 -24
  348. package/dist/webhook/dispatch.js +2 -1
  349. package/package.json +15 -4
  350. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  351. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  352. package/tasks/literacy/frameworks.task.ts +128 -0
  353. package/tasks/literacy/functions.task.ts +69 -0
  354. package/tasks/literacy/groq.task.ts +258 -0
  355. package/tasks/literacy/nextjs-live.task.ts +75 -0
  356. package/tasks/literacy/studio-setup.task.ts +131 -0
  357. package/tasks/literacy/visual-editing.task.ts +146 -0
  358. package/config/features.yaml +0 -116
  359. package/config/models.yaml +0 -116
  360. package/config/prompts.yaml +0 -75
  361. package/config/rubrics.yaml +0 -81
  362. package/config/schedules.yaml +0 -43
  363. package/config/sinks.yaml +0 -54
  364. package/config/sources.yaml +0 -51
  365. package/config/thresholds.yaml +0 -49
  366. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  367. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  368. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  369. package/dist/_vendor/ailf-tasks/index.js +0 -16
  370. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  371. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  372. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  373. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  374. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  375. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  376. package/dist/agent-observer/test-imports.d.ts +0 -7
  377. package/dist/agent-observer/test-imports.js +0 -185
@@ -1,42 +1,250 @@
1
1
  /**
2
- * repo-schemas.ts — Re-exports task schemas + defines config schemas.
2
+ * repo-schemas.ts — Canonical Zod schemas for task and config validation.
3
3
  *
4
- * Task schemas (RepoTaskSchema, assertions, etc.) are the single source
5
- * of truth in @sanity/ailf-tasks. This file re-exports them so existing
6
- * importers within the eval package don't need to change their paths.
4
+ * Task schemas validate .ailf/tasks/*.yaml and .task.ts files against the
5
+ * canonical GeneralizedTaskDefinition shape. Field names match the internal
6
+ * domain model: `area` (not featureArea), `assertions` (not assert),
7
+ * `context.docs` (not canonicalDocs), `prompt.text` (not vars.task).
7
8
  *
8
- * Config schemas (RepoConfigSchema, trigger config, etc.) remain here
9
- * because they are specific to the eval pipeline and not needed by
10
- * external tools that only validate task YAML.
9
+ * Previously this file re-exported from @sanity/ailf-tasks. That package
10
+ * has been eliminated all schema logic now lives here.
11
11
  *
12
- * @see packages/tasks/src/schemas.ts task schema source of truth
12
+ * Config schemas (RepoConfigSchema, trigger config) are eval-pipeline-
13
+ * specific and remain here unchanged.
14
+ *
15
+ * @see packages/core/src/types/generalized-task.ts — canonical TypeScript types
13
16
  * @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
14
17
  */
15
- import { RepoTaskFileSchema as _Schema } from "../../_vendor/ailf-tasks/index.js";
16
18
  import { z } from "zod";
17
19
  // ---------------------------------------------------------------------------
18
- // Re-exports from @sanity/ailf-tasks (task schemas + validation)
20
+ // Constants curated assertion types and rubric template names
19
21
  // ---------------------------------------------------------------------------
20
- export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, } from "../../_vendor/ailf-tasks/index.js";
21
- export { loadTaskDir, parseTaskFile } from "../../_vendor/ailf-tasks/index.js";
22
22
  /**
23
- * Parse and validate a repo task file's content. Returns typed tasks or throws
24
- * with a user-friendly Zod error message.
23
+ * The set of assertion types allowed in task files.
25
24
  *
26
- * NOTE: This accepts pre-parsed YAML data (unknown), not a raw string.
27
- * For raw YAML strings, use `parseTaskFile()` from @sanity/ailf-tasks.
25
+ * This is a curated subset of Promptfoo assertion types — we expose only the
26
+ * types that are stable, well-documented, and useful for external authors.
27
+ */
28
+ export const CURATED_ASSERTION_TYPES = [
29
+ "llm-rubric",
30
+ "contains",
31
+ "contains-any",
32
+ "contains-all",
33
+ "not-contains",
34
+ "icontains",
35
+ "icontains-any",
36
+ "regex",
37
+ "javascript",
38
+ "similar",
39
+ "cost",
40
+ "latency",
41
+ ];
42
+ /**
43
+ * Valid rubric template names — must match keys in config/rubrics.yaml.
44
+ */
45
+ export const RUBRIC_TEMPLATE_NAMES = [
46
+ "task-completion",
47
+ "code-correctness",
48
+ "doc-coverage",
49
+ ];
50
+ // ---------------------------------------------------------------------------
51
+ // Doc ref schemas — polymorphic canonical doc references
52
+ // ---------------------------------------------------------------------------
53
+ /**
54
+ * Polymorphic canonical doc reference — discriminated by key presence.
55
+ * Exactly one resolution key (slug, path, id, or perspective) must be present.
56
+ *
57
+ * @see docs/design-docs/canonical-doc-resolution.md
58
+ */
59
+ const SlugDocRefSchema = z.object({
60
+ slug: z.string().min(1),
61
+ reason: z.string().optional().default(""),
62
+ });
63
+ const PathDocRefSchema = z.object({
64
+ path: z.string().min(1),
65
+ reason: z.string().optional().default(""),
66
+ });
67
+ const IdDocRefSchema = z.object({
68
+ id: z.string().min(1),
69
+ reason: z.string().optional().default(""),
70
+ /** Human-readable slug annotation (not used for resolution) */
71
+ slug: z.string().optional(),
72
+ /** Human-readable path annotation (not used for resolution) */
73
+ path: z.string().optional(),
74
+ });
75
+ const PerspectiveDocRefSchema = z.object({
76
+ perspective: z.string().min(1),
77
+ reason: z.string().optional().default(""),
78
+ });
79
+ // Order matters: IdDocRefSchema first because it may also carry `slug`
80
+ // and `path` as optional annotations. Zod tries schemas in order, so
81
+ // entries like `{ id: "...", slug: "..." }` must match IdDocRefSchema
82
+ // (not SlugDocRefSchema).
83
+ const CanonicalDocRefSchema = z.union([
84
+ IdDocRefSchema,
85
+ SlugDocRefSchema,
86
+ PathDocRefSchema,
87
+ PerspectiveDocRefSchema,
88
+ ]);
89
+ // ---------------------------------------------------------------------------
90
+ // Assertion schemas
91
+ // ---------------------------------------------------------------------------
92
+ /**
93
+ * A templated LLM-rubric assertion — uses one of the predefined rubric
94
+ * templates with author-supplied criteria.
28
95
  */
29
- export function parseRepoTaskFile(raw, filename) {
30
- const result = _Schema.safeParse(raw);
96
+ const TemplatedAssertionSchema = z.object({
97
+ type: z.literal("llm-rubric"),
98
+ template: z.enum(RUBRIC_TEMPLATE_NAMES),
99
+ criteria: z.array(z.string().min(1)).min(1),
100
+ weight: z.number().optional(),
101
+ });
102
+ /**
103
+ * A value-based assertion (contains, regex, cost, etc.). Uses .passthrough()
104
+ * to allow extra fields for future extension without schema breakage.
105
+ */
106
+ const ValueAssertionSchema = z
107
+ .object({
108
+ type: z.enum(CURATED_ASSERTION_TYPES),
109
+ value: z.unknown().optional(),
110
+ threshold: z.number().optional(),
111
+ weight: z.number().optional(),
112
+ })
113
+ .passthrough();
114
+ /** Union of all supported assertion shapes. */
115
+ const AssertionSchema = z.union([
116
+ TemplatedAssertionSchema,
117
+ ValueAssertionSchema,
118
+ ]);
119
+ // ---------------------------------------------------------------------------
120
+ // Nested config schemas
121
+ // ---------------------------------------------------------------------------
122
+ const BaselineConfigSchema = z
123
+ .object({
124
+ enabled: z.boolean().optional(),
125
+ rubric: z.enum(["abbreviated", "full", "none"]).optional(),
126
+ })
127
+ .optional();
128
+ // ---------------------------------------------------------------------------
129
+ // CanonicalTaskSchema — the single canonical task shape
130
+ //
131
+ // Validates the GeneralizedTaskDefinition shape. Field names match the
132
+ // internal domain model directly — no mapping layer needed.
133
+ //
134
+ // YAML tasks may omit `mode` (defaults to "literacy"). All other fields
135
+ // use the canonical names: `title`, `area`, `prompt.text`, `context.docs`,
136
+ // `assertions`.
137
+ // ---------------------------------------------------------------------------
138
+ /**
139
+ * Zod schema for a single task definition using canonical field names.
140
+ *
141
+ * Uses .passthrough() to allow mode-specific fields (serverConfig, sandbox,
142
+ * handler, etc.) without listing every possible field. Mode-specific
143
+ * validation is deferred to the pipeline's mode handlers.
144
+ */
145
+ export const CanonicalTaskSchema = z
146
+ .object({
147
+ id: z
148
+ .string()
149
+ .min(1)
150
+ .regex(/^[a-z0-9][a-z0-9-]*$/, "Task ID must be lowercase alphanumeric with hyphens"),
151
+ mode: z.string().default("literacy"),
152
+ title: z.string().min(1),
153
+ description: z.string().optional(),
154
+ area: z.string().optional(),
155
+ difficulty: z.enum(["basic", "intermediate", "advanced"]).optional(),
156
+ status: z
157
+ .enum(["active", "draft", "paused", "archived"])
158
+ .optional()
159
+ .default("active"),
160
+ tags: z.array(z.string()).optional(),
161
+ prompt: z
162
+ .object({
163
+ template: z.string().optional(),
164
+ text: z.string().optional(),
165
+ systemMessage: z.string().optional(),
166
+ vars: z.record(z.string(), z.unknown()).optional(),
167
+ })
168
+ .optional(),
169
+ context: z
170
+ .object({
171
+ docs: z.array(CanonicalDocRefSchema).optional(),
172
+ fixtures: z.array(z.string()).optional(),
173
+ })
174
+ .optional(),
175
+ assertions: z.array(AssertionSchema).optional(),
176
+ referenceSolution: z.string().optional(),
177
+ docCoverage: z.boolean().optional().default(false),
178
+ baseline: BaselineConfigSchema,
179
+ rubric: z.unknown().optional(),
180
+ providers: z.array(z.unknown()).optional(),
181
+ options: z.unknown().optional(),
182
+ metadata: z.record(z.string(), z.unknown()).optional(),
183
+ })
184
+ .passthrough();
185
+ /**
186
+ * Schema for an array of canonical tasks — what a single .ailf/tasks/*.yaml
187
+ * file contains. Each file must define at least one task.
188
+ */
189
+ export const CanonicalTaskFileSchema = z.array(CanonicalTaskSchema).min(1);
190
+ /**
191
+ * Parse and validate a task file's content against the canonical schema.
192
+ * Returns typed tasks or throws with a user-friendly Zod error message.
193
+ *
194
+ * Accepts pre-parsed YAML data (unknown), not a raw string.
195
+ */
196
+ export function parseCanonicalTaskFile(raw, filename) {
197
+ const result = CanonicalTaskFileSchema.safeParse(raw);
31
198
  if (!result.success) {
32
199
  const messages = result.error.issues
33
200
  .map((i) => ` [${i.path.join(".")}]: ${i.message}`)
34
201
  .join("\n");
35
- throw new Error(`Invalid repo task file "${filename}":\n${messages}`);
202
+ throw new Error(`Invalid task file "${filename}":\n${messages}`);
36
203
  }
37
204
  return result.data;
38
205
  }
39
206
  // ---------------------------------------------------------------------------
207
+ // Legacy field name detection
208
+ //
209
+ // When authors accidentally use the old @sanity/ailf-tasks field names
210
+ // (featureArea, canonicalDocs, assert, vars), surface a helpful error
211
+ // message telling them what the canonical names are.
212
+ // ---------------------------------------------------------------------------
213
+ /** Old field names from @sanity/ailf-tasks → canonical equivalents */
214
+ const LEGACY_FIELD_MAP = {
215
+ featureArea: "area",
216
+ canonicalDocs: "context.docs (nested under context: { docs: [...] })",
217
+ assert: "assertions",
218
+ vars: "prompt (nested under prompt: { text: ... })",
219
+ };
220
+ /**
221
+ * Detect legacy field names in raw task data and return helpful messages.
222
+ *
223
+ * Runs BEFORE Zod parsing to catch the most common migration mistake —
224
+ * using old field names from @sanity/ailf-tasks instead of the canonical
225
+ * GeneralizedTaskDefinition shape.
226
+ */
227
+ export function detectLegacyFieldNames(raw, filename) {
228
+ const warnings = [];
229
+ if (!Array.isArray(raw))
230
+ return warnings;
231
+ for (let i = 0; i < raw.length; i++) {
232
+ const entry = raw[i];
233
+ if (typeof entry !== "object" || entry === null)
234
+ continue;
235
+ const obj = entry;
236
+ const taskId = typeof obj.id === "string" ? obj.id : `task[${i}]`;
237
+ for (const [legacy, canonical] of Object.entries(LEGACY_FIELD_MAP)) {
238
+ if (legacy in obj) {
239
+ warnings.push(`[${filename}] ${taskId}: Found legacy field "${legacy}" — ` +
240
+ `use "${canonical}" instead. ` +
241
+ "See CONTRIBUTING_TASKS.md for the canonical task format.");
242
+ }
243
+ }
244
+ }
245
+ return warnings;
246
+ }
247
+ // ---------------------------------------------------------------------------
40
248
  // Config schemas — specific to the eval pipeline
41
249
  // ---------------------------------------------------------------------------
42
250
  const TriggerModeSchema = z.enum(["validate-only", "eval"]);
@@ -1,22 +1,26 @@
1
1
  /**
2
- * Adapter: Load task definitions from .ailf/tasks/*.yaml in an external repo.
2
+ * Adapter: Load task definitions from .ailf/tasks/ in an external repo.
3
3
  *
4
- * Repo-based tasks use a camelCase YAML format (matching the Content Lake
5
- * document schema) with slug strings instead of Sanity references. The
6
- * mapping to TaskDefinition is straightforward — field names are already
7
- * aligned with the domain type.
4
+ * Supports two task file formats:
8
5
  *
9
- * This adapter is structurally similar to YamlTaskSource but:
10
- * - Reads from an arbitrary directory (not the framework's tasks/ dir)
11
- * - Uses camelCase field names (not snake_case)
12
- * - Has an explicit featureArea field per task (not derived from filename)
6
+ * 1. **TypeScript (.task.ts)** Tasks authored with `defineTask()` from
7
+ * `@sanity/ailf`. These use `GeneralizedTaskDefinition` field names
8
+ * and are passed through with basic runtime guards.
13
9
  *
10
+ * 2. **YAML (.yaml)** — Tasks using the canonical `GeneralizedTaskDefinition`
11
+ * field names (area, context.docs, assertions, prompt.text). Validated
12
+ * through the CanonicalTaskSchema Zod schema.
13
+ *
14
+ * All tasks use the single canonical shape — no mapping layer, no dual-shape
15
+ * detection. The `mode` field defaults to "literacy" for YAML tasks that
16
+ * omit it.
17
+ *
18
+ * @see packages/core/src/types/generalized-task.ts — canonical types
14
19
  * @see packages/core/src/ports/task-source.ts — TaskSource port
15
- * @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
16
20
  */
17
- import type { FilterOptions, TaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
21
+ import type { FilterOptions, GeneralizedTaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
18
22
  export declare class RepoTaskSource implements TaskSource {
19
23
  private readonly tasksDir;
20
24
  constructor(tasksDir: string);
21
- loadTasks(filter?: FilterOptions): Promise<TaskDefinition[]>;
25
+ loadTasks(filter?: FilterOptions): Promise<GeneralizedTaskDefinition[]>;
22
26
  }
@@ -1,23 +1,31 @@
1
1
  /**
2
- * Adapter: Load task definitions from .ailf/tasks/*.yaml in an external repo.
2
+ * Adapter: Load task definitions from .ailf/tasks/ in an external repo.
3
3
  *
4
- * Repo-based tasks use a camelCase YAML format (matching the Content Lake
5
- * document schema) with slug strings instead of Sanity references. The
6
- * mapping to TaskDefinition is straightforward — field names are already
7
- * aligned with the domain type.
4
+ * Supports two task file formats:
8
5
  *
9
- * This adapter is structurally similar to YamlTaskSource but:
10
- * - Reads from an arbitrary directory (not the framework's tasks/ dir)
11
- * - Uses camelCase field names (not snake_case)
12
- * - Has an explicit featureArea field per task (not derived from filename)
6
+ * 1. **TypeScript (.task.ts)** Tasks authored with `defineTask()` from
7
+ * `@sanity/ailf`. These use `GeneralizedTaskDefinition` field names
8
+ * and are passed through with basic runtime guards.
13
9
  *
10
+ * 2. **YAML (.yaml)** — Tasks using the canonical `GeneralizedTaskDefinition`
11
+ * field names (area, context.docs, assertions, prompt.text). Validated
12
+ * through the CanonicalTaskSchema Zod schema.
13
+ *
14
+ * All tasks use the single canonical shape — no mapping layer, no dual-shape
15
+ * detection. The `mode` field defaults to "literacy" for YAML tasks that
16
+ * omit it.
17
+ *
18
+ * @see packages/core/src/types/generalized-task.ts — canonical types
14
19
  * @see packages/core/src/ports/task-source.ts — TaskSource port
15
- * @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
16
20
  */
17
21
  import { existsSync, readdirSync, readFileSync } from "fs";
18
22
  import { resolve } from "path";
19
23
  import { load } from "js-yaml";
20
- import { parseRepoTaskFile } from "./repo-schemas.js";
24
+ import { CANONICAL_EVAL_MODES } from "../../_vendor/ailf-shared/index.js";
25
+ import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "./repo-schemas.js";
26
+ import { discoverTsTaskFiles, loadTsTaskFile } from "./task-file-loader.js";
27
+ /** Set of canonical mode names for O(1) lookup */
28
+ const KNOWN_MODES = new Set(CANONICAL_EVAL_MODES);
21
29
  // ---------------------------------------------------------------------------
22
30
  // RepoTaskSource adapter
23
31
  // ---------------------------------------------------------------------------
@@ -34,11 +42,13 @@ export class RepoTaskSource {
34
42
  const yamlFiles = readdirSync(this.tasksDir)
35
43
  .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
36
44
  .sort();
37
- if (yamlFiles.length === 0) {
38
- throw new Error(`No YAML files found in ${this.tasksDir}\n` +
39
- " Expected .ailf/tasks/*.yaml files with task definitions");
45
+ const tsFiles = discoverTsTaskFiles(this.tasksDir);
46
+ if (yamlFiles.length === 0 && tsFiles.length === 0) {
47
+ throw new Error(`No task files found in ${this.tasksDir}\n` +
48
+ " Expected .ailf/tasks/*.yaml or .ailf/tasks/*.task.ts files");
40
49
  }
41
50
  const definitions = [];
51
+ // Load YAML task files
42
52
  for (const file of yamlFiles) {
43
53
  const filePath = resolve(this.tasksDir, file);
44
54
  const raw = readFileSync(filePath, "utf-8");
@@ -47,88 +57,90 @@ export class RepoTaskSource {
47
57
  throw new Error(`${file} did not parse to an array of tasks. ` +
48
58
  "Repo task files must contain a YAML array of task definitions.");
49
59
  }
50
- // Validate entire file through Zod schema
60
+ // Detect legacy field names (featureArea, canonicalDocs, assert, vars)
61
+ // and surface helpful migration messages before Zod validation fails.
62
+ const legacyWarnings = detectLegacyFieldNames(parsed, file);
63
+ if (legacyWarnings.length > 0) {
64
+ throw new Error(`${file} uses legacy field names from @sanity/ailf-tasks.\n` +
65
+ "Task files must use canonical GeneralizedTaskDefinition field names.\n\n" +
66
+ legacyWarnings.join("\n") +
67
+ "\n\nSee CONTRIBUTING_TASKS.md for the canonical task format.");
68
+ }
69
+ // Validate through canonical Zod schema
51
70
  let validated;
52
71
  try {
53
- validated = parseRepoTaskFile(parsed, file);
72
+ validated = parseCanonicalTaskFile(parsed, file);
54
73
  }
55
74
  catch (err) {
56
75
  const msg = err instanceof Error ? err.message : String(err);
57
76
  throw new Error(`Failed to validate ${file}:\n${msg}`, { cause: err });
58
77
  }
59
- for (const entry of validated) {
60
- // Filter stages:
61
- // 1. Area filter — skip tasks outside requested feature areas
62
- // 2. Task ID filter — skip tasks not matching explicit task IDs
63
- // 3. Status filter — skip non-active tasks (unless targeting by ID)
64
- // 4. Tag filter — skip tasks not matching requested tags
65
- // Area filter
66
- if (filter?.areas &&
67
- filter.areas.length > 0 &&
68
- !filter.areas
69
- .map((a) => a.toLowerCase())
70
- .includes(entry.featureArea.toLowerCase())) {
71
- continue;
72
- }
73
- // Task ID filter
74
- if (filter?.taskIds &&
75
- filter.taskIds.length > 0 &&
76
- !filter.taskIds.includes(entry.id)) {
77
- continue;
78
+ for (const task of validated) {
79
+ if (passesFilter(task, filter)) {
80
+ definitions.push(task);
78
81
  }
79
- // Status filter — unified lifecycle control
80
- // Resolve effective status: explicit status field wins,
81
- // then fall back to execution.enabled for backwards compat
82
- const effectiveStatus = entry.status ??
83
- (entry.execution?.enabled === false ? "paused" : "active");
84
- const isTargetedById = filter?.taskIds && filter.taskIds.includes(entry.id);
85
- if (effectiveStatus === "archived") {
86
- continue; // Archived is always excluded, even with --task
87
- }
88
- if (effectiveStatus === "paused" && !isTargetedById) {
89
- continue; // Paused skipped unless explicitly targeted
82
+ }
83
+ }
84
+ // Load TS task files (.task.ts / .task.js)
85
+ for (const tsFile of tsFiles) {
86
+ const loaded = await loadTsTaskFile(tsFile);
87
+ const filename = tsFile.split("/").pop() ?? tsFile;
88
+ for (const raw of loaded.tasks) {
89
+ const task = raw;
90
+ if (!task.id || typeof task.id !== "string") {
91
+ throw new Error(`Task in ${filename} is missing a valid "id" field`);
90
92
  }
91
- if (effectiveStatus === "draft" &&
92
- !isTargetedById &&
93
- !filter?.includeDrafts) {
94
- continue; // Draft skipped unless targeted or includeDrafts
93
+ if (!task.mode || !KNOWN_MODES.has(task.mode)) {
94
+ throw new Error(`Task "${task.id}" in ${filename} has missing or unknown mode "${task.mode}". ` +
95
+ `Valid modes: ${[...KNOWN_MODES].join(", ")}`);
95
96
  }
96
- // Tag filter — skip tasks that don't match any requested tag
97
- if (filter?.tags &&
98
- filter.tags.length > 0 &&
99
- (!entry.tags || !entry.tags.some((t) => filter.tags.includes(t)))) {
100
- continue;
97
+ if (passesFilter(task, filter)) {
98
+ definitions.push(task);
101
99
  }
102
- definitions.push(mapToTaskDefinition(entry));
103
100
  }
104
101
  }
105
102
  return definitions;
106
103
  }
107
104
  }
108
105
  // ---------------------------------------------------------------------------
109
- // Mapping helpers
106
+ // Filter helper
110
107
  // ---------------------------------------------------------------------------
111
- function mapToTaskDefinition(raw) {
112
- // Extract task prompt from vars.task (same convention as YAML tasks)
113
- const vars = (raw.vars ?? {});
114
- const { docs: _docs, task, ...rest } = vars;
115
- // Canonical docs are already parsed into the polymorphic union by Zod.
116
- // Each entry has exactly one resolution key (slug, path, id, or perspective).
117
- const canonicalDocs = (raw.canonicalDocs ??
118
- []);
119
- const extraVars = Object.keys(rest).length > 0 ? rest : undefined;
120
- return {
121
- assertions: (raw.assert ?? []),
122
- canonicalDocs,
123
- description: raw.description,
124
- docCoverage: raw.docCoverage ?? false,
125
- featureArea: raw.featureArea,
126
- id: raw.id,
127
- referenceSolution: raw.referenceSolution ?? "",
128
- taskPrompt: typeof task === "string" ? task : "",
129
- ...(raw.baseline ? { baseline: raw.baseline } : {}),
130
- ...(extraVars ? { extraVars } : {}),
131
- ...(raw.status && raw.status !== "active" ? { status: raw.status } : {}),
132
- ...(raw.tags?.length ? { tags: raw.tags } : {}),
133
- };
108
+ /**
109
+ * Apply standard task filtering. Used for both YAML and TS tasks.
110
+ */
111
+ function passesFilter(task, filter) {
112
+ // Area filter
113
+ if (filter?.areas &&
114
+ filter.areas.length > 0 &&
115
+ (!task.area ||
116
+ !filter.areas
117
+ .map((a) => a.toLowerCase())
118
+ .includes(task.area.toLowerCase()))) {
119
+ return false;
120
+ }
121
+ // Task ID filter
122
+ if (filter?.taskIds &&
123
+ filter.taskIds.length > 0 &&
124
+ !filter.taskIds.includes(task.id)) {
125
+ return false;
126
+ }
127
+ // Status filter unified lifecycle control
128
+ const effectiveStatus = task.status ?? "active";
129
+ const isTargetedById = filter?.taskIds && filter.taskIds.includes(task.id);
130
+ if (effectiveStatus === "archived")
131
+ return false;
132
+ if (effectiveStatus === "paused" && !isTargetedById)
133
+ return false;
134
+ if (effectiveStatus === "draft" &&
135
+ !isTargetedById &&
136
+ !filter?.includeDrafts) {
137
+ return false;
138
+ }
139
+ // Tag filter
140
+ if (filter?.tags &&
141
+ filter.tags.length > 0 &&
142
+ (!task.tags || !task.tags.some((t) => filter.tags.includes(t)))) {
143
+ return false;
144
+ }
145
+ return true;
134
146
  }
@@ -1,8 +1,39 @@
1
1
  /**
2
- * repo-validation.ts — Re-exports semantic validation from @sanity/ailf-tasks.
2
+ * repo-validation.ts — Semantic validation for task definitions.
3
3
  *
4
- * The validation logic is the single source of truth in @sanity/ailf-tasks.
5
- * This file re-exports so existing eval-package importers don't need
6
- * to change their import paths.
4
+ * Checks that go beyond Zod schema parsing:
5
+ * - Assertion types are in the curated set
6
+ * - Rubric template names resolve to known templates
7
+ * - Doc ref slugs look reasonable (slugs, not URLs)
8
+ * - Tasks have at least one LLM rubric assertion (recommended)
9
+ * - Tasks have a prompt text (recommended)
10
+ *
11
+ * These produce warnings, not errors — the pipeline can still run
12
+ * with imperfect tasks. Only structural failures (caught by Zod) block.
13
+ *
14
+ * Previously this file re-exported from @sanity/ailf-tasks. That package
15
+ * has been eliminated — all validation logic now lives here.
16
+ */
17
+ import { type CanonicalTask } from "./repo-schemas.js";
18
+ export interface ValidationResult {
19
+ valid: boolean;
20
+ errors: ValidationMessage[];
21
+ warnings: ValidationMessage[];
22
+ }
23
+ export interface ValidationMessage {
24
+ taskId: string;
25
+ field: string;
26
+ message: string;
27
+ }
28
+ /**
29
+ * Run semantic validation on an array of parsed canonical tasks.
30
+ *
31
+ * Returns warnings for issues that don't block execution (unknown feature
32
+ * areas, unresolved slugs) and errors for issues that would cause pipeline
33
+ * failures (completely missing required fields — though Zod catches most).
34
+ */
35
+ export declare function validateCanonicalTasks(tasks: CanonicalTask[]): ValidationResult;
36
+ /**
37
+ * Format validation results for console output.
7
38
  */
8
- export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, type ValidationMessage, type ValidationResult, } from "../../_vendor/ailf-tasks/index.d.ts";
39
+ export declare function formatValidationResult(result: ValidationResult): string;