@sanity/ailf 0.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/README.md +0 -1
  2. package/config/features.ts +23 -0
  3. package/config/models.ts +95 -0
  4. package/config/prompts.ts +16 -0
  5. package/config/rubrics.ts +225 -0
  6. package/config/schedules.ts +47 -0
  7. package/config/sinks.ts +37 -0
  8. package/config/sources.ts +21 -0
  9. package/config/thresholds.ts +61 -0
  10. package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
  11. package/dist/_vendor/ailf-core/config-helpers.js +170 -0
  12. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  13. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  14. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  15. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  16. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  17. package/dist/_vendor/ailf-core/index.js +5 -0
  18. package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
  19. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  20. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  21. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  22. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  23. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  24. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  25. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
  26. package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
  27. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
  28. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
  29. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
  30. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
  31. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  32. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  33. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  34. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  35. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  36. package/dist/_vendor/ailf-core/services/index.js +2 -1
  37. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  38. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  39. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  40. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  41. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  42. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  43. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  44. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  47. package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
  48. package/dist/_vendor/ailf-core/types/index.js +8 -1
  49. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
  50. package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
  51. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  52. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  53. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  54. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  55. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  56. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  57. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  58. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  59. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  60. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  61. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  62. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  63. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  64. package/dist/_vendor/ailf-shared/index.js +0 -1
  65. package/dist/adapters/api-client/build-request.js +14 -13
  66. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  67. package/dist/adapters/config-sources/file-config-adapter.js +39 -12
  68. package/dist/adapters/config-sources/index.d.ts +2 -0
  69. package/dist/adapters/config-sources/index.js +1 -0
  70. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  71. package/dist/adapters/config-sources/ts-config-loader.js +141 -0
  72. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  73. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  74. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  75. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  76. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  77. package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
  78. package/dist/adapters/task-sources/index.d.ts +3 -2
  79. package/dist/adapters/task-sources/index.js +3 -2
  80. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  81. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  82. package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
  83. package/dist/adapters/task-sources/repo-task-source.js +92 -80
  84. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  85. package/dist/adapters/task-sources/repo-validation.js +126 -5
  86. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  87. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  88. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  89. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  90. package/dist/cli.js +0 -2
  91. package/dist/commands/baseline.js +4 -1
  92. package/dist/commands/calculate-scores.js +1 -1
  93. package/dist/commands/coverage-audit.js +9 -1
  94. package/dist/commands/explain-handler.js +25 -23
  95. package/dist/commands/fetch-docs.js +3 -2
  96. package/dist/commands/generate-configs.js +1 -1
  97. package/dist/commands/init.d.ts +6 -4
  98. package/dist/commands/init.js +302 -23
  99. package/dist/commands/interactive.js +11 -7
  100. package/dist/commands/pipeline-action.d.ts +2 -0
  101. package/dist/commands/pipeline-action.js +16 -6
  102. package/dist/commands/pipeline.d.ts +1 -0
  103. package/dist/commands/pipeline.js +4 -2
  104. package/dist/commands/pr-comment.js +1 -1
  105. package/dist/commands/publish.js +2 -2
  106. package/dist/commands/readiness-report.js +13 -6
  107. package/dist/commands/validate-tasks.d.ts +2 -2
  108. package/dist/commands/validate-tasks.js +26 -15
  109. package/dist/composition-root.d.ts +13 -1
  110. package/dist/composition-root.js +99 -4
  111. package/dist/index.d.ts +41 -0
  112. package/dist/index.js +48 -0
  113. package/dist/orchestration/build-app-context.js +1 -0
  114. package/dist/orchestration/build-step-sequence.js +28 -8
  115. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  116. package/dist/orchestration/steps/fetch-docs-step.js +8 -7
  117. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  118. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  119. package/dist/orchestration/steps/generate-configs-step.js +261 -51
  120. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  121. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  122. package/dist/orchestration/steps/readiness-step.js +5 -6
  123. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  124. package/dist/orchestration/steps/run-eval-step.js +8 -7
  125. package/dist/pipeline/cache.d.ts +1 -1
  126. package/dist/pipeline/cache.js +36 -8
  127. package/dist/pipeline/calculate-scores.d.ts +2 -4
  128. package/dist/pipeline/calculate-scores.js +43 -113
  129. package/dist/pipeline/checks.js +2 -2
  130. package/dist/pipeline/compare.js +8 -8
  131. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  132. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  133. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  134. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  135. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  136. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  137. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  138. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  139. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  140. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
  141. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  142. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  143. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  144. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  145. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  146. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
  147. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  148. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  149. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  150. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  151. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  152. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  153. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  154. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  155. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  156. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  157. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  158. package/dist/pipeline/compiler/config-loader.js +111 -0
  159. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  160. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  161. package/dist/pipeline/compiler/hash.d.ts +11 -0
  162. package/dist/pipeline/compiler/hash.js +18 -0
  163. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  164. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  165. package/dist/pipeline/compiler/index.d.ts +29 -0
  166. package/dist/pipeline/compiler/index.js +45 -0
  167. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  168. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  169. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  170. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  171. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  172. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  173. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  174. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  175. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  176. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  177. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  178. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  179. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  180. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  181. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  182. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  183. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  184. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  185. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  186. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  187. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  188. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  189. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  190. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  191. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  192. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  193. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  194. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  195. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  196. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  197. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  198. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  199. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  200. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  201. package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
  202. package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
  203. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  204. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  205. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  206. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  207. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  208. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  209. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  210. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  211. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  212. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  213. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  214. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  215. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  216. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  217. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  218. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  219. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  220. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  221. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  222. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  223. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  224. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  225. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  226. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  227. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  228. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  229. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  230. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  231. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  232. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  233. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  251. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  252. package/dist/pipeline/compiler/preset-loader.js +99 -0
  253. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  254. package/dist/pipeline/compiler/presets/index.js +8 -0
  255. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
  256. package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
  257. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  258. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  259. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  260. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  261. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  262. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  263. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  264. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  265. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  266. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  267. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  268. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  269. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  270. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  271. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  272. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  273. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  274. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  275. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  276. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  277. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  278. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  279. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  280. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  281. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  282. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  283. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  284. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  285. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  286. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  287. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  288. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  289. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  290. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  291. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  292. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  293. package/dist/pipeline/coverage-audit.d.ts +15 -5
  294. package/dist/pipeline/coverage-audit.js +41 -22
  295. package/dist/pipeline/eval-constants.d.ts +16 -6
  296. package/dist/pipeline/eval-constants.js +25 -4
  297. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  298. package/dist/pipeline/eval-fingerprint.js +8 -9
  299. package/dist/pipeline/expand-tasks.d.ts +19 -10
  300. package/dist/pipeline/expand-tasks.js +34 -28
  301. package/dist/pipeline/gap-analysis.d.ts +1 -1
  302. package/dist/pipeline/gap-analysis.js +2 -2
  303. package/dist/pipeline/generate-configs.d.ts +22 -4
  304. package/dist/pipeline/generate-configs.js +53 -24
  305. package/dist/pipeline/grader-api.d.ts +3 -3
  306. package/dist/pipeline/grader-api.js +5 -12
  307. package/dist/pipeline/grader-compare-runner.js +20 -27
  308. package/dist/pipeline/grader-comparison.d.ts +4 -8
  309. package/dist/pipeline/grader-comparison.js +11 -17
  310. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  311. package/dist/pipeline/grader-consistency-runner.js +16 -20
  312. package/dist/pipeline/grader-consistency.d.ts +6 -10
  313. package/dist/pipeline/grader-consistency.js +13 -32
  314. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  315. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  316. package/dist/pipeline/grader-sensitivity.js +10 -10
  317. package/dist/pipeline/grader-validate-runner.js +7 -5
  318. package/dist/pipeline/grader-validation.d.ts +2 -6
  319. package/dist/pipeline/grader-validation.js +14 -22
  320. package/dist/pipeline/map-request-to-config.js +7 -1
  321. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
  322. package/dist/pipeline/mirror-repo-tasks.js +22 -21
  323. package/dist/pipeline/normalize-mode.d.ts +49 -0
  324. package/dist/pipeline/normalize-mode.js +64 -0
  325. package/dist/pipeline/plan.d.ts +5 -2
  326. package/dist/pipeline/plan.js +134 -78
  327. package/dist/pipeline/pr-comment.js +2 -0
  328. package/dist/pipeline/profile-resolution.d.ts +22 -14
  329. package/dist/pipeline/profile-resolution.js +41 -19
  330. package/dist/pipeline/provenance.d.ts +2 -2
  331. package/dist/pipeline/provenance.js +12 -17
  332. package/dist/pipeline/release-report.js +4 -4
  333. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  334. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  335. package/dist/pipeline/rubric-loader.d.ts +20 -0
  336. package/dist/pipeline/rubric-loader.js +37 -0
  337. package/dist/pipeline/validate.d.ts +4 -4
  338. package/dist/pipeline/validate.js +64 -53
  339. package/dist/schedules/loader.js +18 -8
  340. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  341. package/dist/scripts/migrate-task-mode.js +85 -0
  342. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  343. package/dist/scripts/validate-task-sources.d.ts +1 -1
  344. package/dist/scripts/validate-task-sources.js +15 -15
  345. package/dist/sinks/loader.js +5 -7
  346. package/dist/sources.d.ts +7 -7
  347. package/dist/sources.js +22 -24
  348. package/dist/webhook/dispatch.js +2 -1
  349. package/package.json +15 -4
  350. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  351. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  352. package/tasks/literacy/frameworks.task.ts +128 -0
  353. package/tasks/literacy/functions.task.ts +69 -0
  354. package/tasks/literacy/groq.task.ts +258 -0
  355. package/tasks/literacy/nextjs-live.task.ts +75 -0
  356. package/tasks/literacy/studio-setup.task.ts +131 -0
  357. package/tasks/literacy/visual-editing.task.ts +146 -0
  358. package/config/features.yaml +0 -116
  359. package/config/models.yaml +0 -116
  360. package/config/prompts.yaml +0 -75
  361. package/config/rubrics.yaml +0 -81
  362. package/config/schedules.yaml +0 -43
  363. package/config/sinks.yaml +0 -54
  364. package/config/sources.yaml +0 -51
  365. package/config/thresholds.yaml +0 -49
  366. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  367. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  368. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  369. package/dist/_vendor/ailf-tasks/index.js +0 -16
  370. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  371. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  372. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  373. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  374. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  375. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  376. package/dist/agent-observer/test-imports.d.ts +0 -7
  377. package/dist/agent-observer/test-imports.js +0 -185
@@ -1,8 +1,129 @@
1
1
  /**
2
- * repo-validation.ts — Re-exports semantic validation from @sanity/ailf-tasks.
2
+ * repo-validation.ts — Semantic validation for task definitions.
3
3
  *
4
- * The validation logic is the single source of truth in @sanity/ailf-tasks.
5
- * This file re-exports so existing eval-package importers don't need
6
- * to change their import paths.
4
+ * Checks that go beyond Zod schema parsing:
5
+ * - Assertion types are in the curated set
6
+ * - Rubric template names resolve to known templates
7
+ * - Doc ref slugs look reasonable (slugs, not URLs)
8
+ * - Tasks have at least one LLM rubric assertion (recommended)
9
+ * - Tasks have a prompt text (recommended)
10
+ *
11
+ * These produce warnings, not errors — the pipeline can still run
12
+ * with imperfect tasks. Only structural failures (caught by Zod) block.
13
+ *
14
+ * Previously this file re-exported from @sanity/ailf-tasks. That package
15
+ * has been eliminated — all validation logic now lives here.
16
+ */
17
+ import { CURATED_ASSERTION_TYPES, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
18
+ // ---------------------------------------------------------------------------
19
+ // Public API
20
+ // ---------------------------------------------------------------------------
21
+ /**
22
+ * Run semantic validation on an array of parsed canonical tasks.
23
+ *
24
+ * Returns warnings for issues that don't block execution (unknown feature
25
+ * areas, unresolved slugs) and errors for issues that would cause pipeline
26
+ * failures (completely missing required fields — though Zod catches most).
27
+ */
28
+ export function validateCanonicalTasks(tasks) {
29
+ const errors = [];
30
+ const warnings = [];
31
+ // Check for duplicate IDs
32
+ const seenIds = new Set();
33
+ for (const task of tasks) {
34
+ if (seenIds.has(task.id)) {
35
+ errors.push({
36
+ taskId: task.id,
37
+ field: "id",
38
+ message: `Duplicate task ID "${task.id}"`,
39
+ });
40
+ }
41
+ seenIds.add(task.id);
42
+ }
43
+ for (const task of tasks) {
44
+ const assertions = task.assertions ?? [];
45
+ // Check assertion types
46
+ for (let i = 0; i < assertions.length; i++) {
47
+ const assertion = assertions[i];
48
+ if (!CURATED_ASSERTION_TYPES.includes(assertion.type)) {
49
+ warnings.push({
50
+ taskId: task.id,
51
+ field: `assertions[${i}].type`,
52
+ message: `Unknown assertion type "${assertion.type}". ` +
53
+ `Valid types: ${CURATED_ASSERTION_TYPES.join(", ")}`,
54
+ });
55
+ }
56
+ // Check rubric template for llm-rubric assertions
57
+ if (assertion.type === "llm-rubric" && "template" in assertion) {
58
+ const template = assertion.template;
59
+ if (!RUBRIC_TEMPLATE_NAMES.includes(template)) {
60
+ warnings.push({
61
+ taskId: task.id,
62
+ field: `assertions[${i}].template`,
63
+ message: `Unknown rubric template "${template}". ` +
64
+ `Valid templates: ${RUBRIC_TEMPLATE_NAMES.join(", ")}`,
65
+ });
66
+ }
67
+ }
68
+ }
69
+ // Check canonical doc refs look reasonable
70
+ const docs = task.context?.docs ?? [];
71
+ for (let i = 0; i < docs.length; i++) {
72
+ const doc = docs[i];
73
+ // Slug refs: warn if they look like URLs or paths
74
+ if ("slug" in doc && !("id" in doc) && typeof doc.slug === "string") {
75
+ if (doc.slug.includes("/") || doc.slug.includes("http")) {
76
+ warnings.push({
77
+ taskId: task.id,
78
+ field: `context.docs[${i}].slug`,
79
+ message: `Slug "${doc.slug}" looks like a URL or path — use 'path' type for paths or 'slug' for document slugs (e.g., "groq-introduction")`,
80
+ });
81
+ }
82
+ }
83
+ }
84
+ // Check task has at least one llm-rubric assertion (recommended but not required)
85
+ const hasLlmRubric = assertions.some((a) => a.type === "llm-rubric");
86
+ if (!hasLlmRubric) {
87
+ warnings.push({
88
+ taskId: task.id,
89
+ field: "assertions",
90
+ message: "No llm-rubric assertion found. Tasks should have at least one scored rubric for meaningful evaluation.",
91
+ });
92
+ }
93
+ // Check prompt text exists
94
+ if (!task.prompt?.text) {
95
+ warnings.push({
96
+ taskId: task.id,
97
+ field: "prompt.text",
98
+ message: "No task prompt found in prompt.text. The LLM will receive an empty implementation request.",
99
+ });
100
+ }
101
+ }
102
+ return {
103
+ valid: errors.length === 0,
104
+ errors,
105
+ warnings,
106
+ };
107
+ }
108
+ /**
109
+ * Format validation results for console output.
7
110
  */
8
- export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "../../_vendor/ailf-tasks/index.js";
111
+ export function formatValidationResult(result) {
112
+ const lines = [];
113
+ if (result.errors.length > 0) {
114
+ lines.push("Errors:");
115
+ for (const e of result.errors) {
116
+ lines.push(` [${e.taskId}] ${e.field}: ${e.message}`);
117
+ }
118
+ }
119
+ if (result.warnings.length > 0) {
120
+ lines.push("Warnings:");
121
+ for (const w of result.warnings) {
122
+ lines.push(` [${w.taskId}] ${w.field}: ${w.message}`);
123
+ }
124
+ }
125
+ if (result.valid && result.warnings.length === 0) {
126
+ lines.push("All tasks pass validation");
127
+ }
128
+ return lines.join("\n");
129
+ }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * TaskFileLoader — loads task definitions from TypeScript files.
3
+ *
4
+ * Supplements the existing YAML-based task loading by supporting
5
+ * `*.task.ts` and `*.task.js` files in task directories. Files are
6
+ * loaded via jiti and expected to export GeneralizedTaskDefinition
7
+ * objects authored with `defineTask()`.
8
+ *
9
+ * TS task files export a single task or an array of tasks:
10
+ *
11
+ * ```typescript
12
+ * // single task
13
+ * import { defineTask } from "@sanity/ailf"
14
+ * export default defineTask({ id: "my-task", mode: "literacy", ... })
15
+ *
16
+ * // multiple tasks
17
+ * export default [
18
+ * defineTask({ id: "task-1", mode: "literacy", ... }),
19
+ * defineTask({ id: "task-2", mode: "literacy", ... }),
20
+ * ]
21
+ * ```
22
+ *
23
+ * The loader integrates into the existing RepoTaskSource adapter — TS
24
+ * task files are discovered alongside YAML files in the same directory.
25
+ *
26
+ * @see docs/design-docs/architecture-overhaul/typescript-configuration.md
27
+ */
28
+ /** A raw task object loaded from a TS file (pre-validation) */
29
+ export interface RawTsTask {
30
+ /** Source file path (for error messages) */
31
+ filePath: string;
32
+ /** The loaded task data (may be a single object or an array) */
33
+ tasks: unknown[];
34
+ }
35
+ /**
36
+ * Discover TS/JS task files in a directory.
37
+ *
38
+ * Looks for files matching `*.task.ts` or `*.task.js`.
39
+ *
40
+ * @param tasksDir - Absolute path to the tasks directory
41
+ * @returns Array of absolute file paths
42
+ */
43
+ export declare function discoverTsTaskFiles(tasksDir: string): string[];
44
+ /**
45
+ * Load task definitions from a single TS/JS task file.
46
+ *
47
+ * The file's default export can be:
48
+ * - A single task object → wrapped in an array
49
+ * - An array of task objects → used as-is
50
+ *
51
+ * Returns the raw task data without validation — the caller is
52
+ * responsible for running the result through Zod schemas.
53
+ *
54
+ * @param filePath - Absolute path to the .task.ts or .task.js file
55
+ * @returns The loaded task(s), or throws on load failure
56
+ */
57
+ export declare function loadTsTaskFile(filePath: string): Promise<RawTsTask>;
58
+ /**
59
+ * Load all TS task files from a directory.
60
+ *
61
+ * @param tasksDir - Absolute path to the tasks directory
62
+ * @returns Array of raw task data from all files
63
+ */
64
+ export declare function loadAllTsTaskFiles(tasksDir: string): Promise<RawTsTask[]>;
@@ -0,0 +1,83 @@
1
+ /**
2
+ * TaskFileLoader — loads task definitions from TypeScript files.
3
+ *
4
+ * Supplements the existing YAML-based task loading by supporting
5
+ * `*.task.ts` and `*.task.js` files in task directories. Files are
6
+ * loaded via jiti and expected to export GeneralizedTaskDefinition
7
+ * objects authored with `defineTask()`.
8
+ *
9
+ * TS task files export a single task or an array of tasks:
10
+ *
11
+ * ```typescript
12
+ * // single task
13
+ * import { defineTask } from "@sanity/ailf"
14
+ * export default defineTask({ id: "my-task", mode: "literacy", ... })
15
+ *
16
+ * // multiple tasks
17
+ * export default [
18
+ * defineTask({ id: "task-1", mode: "literacy", ... }),
19
+ * defineTask({ id: "task-2", mode: "literacy", ... }),
20
+ * ]
21
+ * ```
22
+ *
23
+ * The loader integrates into the existing RepoTaskSource adapter — TS
24
+ * task files are discovered alongside YAML files in the same directory.
25
+ *
26
+ * @see docs/design-docs/architecture-overhaul/typescript-configuration.md
27
+ */
28
+ import { existsSync, readdirSync } from "fs";
29
+ import { resolve } from "path";
30
+ import { loadTsConfig } from "../config-sources/ts-config-loader.js";
31
+ /**
32
+ * Discover TS/JS task files in a directory.
33
+ *
34
+ * Looks for files matching `*.task.ts` or `*.task.js`.
35
+ *
36
+ * @param tasksDir - Absolute path to the tasks directory
37
+ * @returns Array of absolute file paths
38
+ */
39
+ export function discoverTsTaskFiles(tasksDir) {
40
+ if (!existsSync(tasksDir))
41
+ return [];
42
+ return readdirSync(tasksDir)
43
+ .filter((f) => (f.endsWith(".task.ts") || f.endsWith(".task.js")) && !f.startsWith("."))
44
+ .sort()
45
+ .map((f) => resolve(tasksDir, f));
46
+ }
47
+ /**
48
+ * Load task definitions from a single TS/JS task file.
49
+ *
50
+ * The file's default export can be:
51
+ * - A single task object → wrapped in an array
52
+ * - An array of task objects → used as-is
53
+ *
54
+ * Returns the raw task data without validation — the caller is
55
+ * responsible for running the result through Zod schemas.
56
+ *
57
+ * @param filePath - Absolute path to the .task.ts or .task.js file
58
+ * @returns The loaded task(s), or throws on load failure
59
+ */
60
+ export async function loadTsTaskFile(filePath) {
61
+ const result = await loadTsConfig(filePath);
62
+ if (!result.ok) {
63
+ throw new Error(result.error);
64
+ }
65
+ const value = result.value;
66
+ // Normalize: single object → array of one
67
+ const tasks = Array.isArray(value) ? value : [value];
68
+ return { filePath, tasks };
69
+ }
70
+ /**
71
+ * Load all TS task files from a directory.
72
+ *
73
+ * @param tasksDir - Absolute path to the tasks directory
74
+ * @returns Array of raw task data from all files
75
+ */
76
+ export async function loadAllTsTaskFiles(tasksDir) {
77
+ const files = discoverTsTaskFiles(tasksDir);
78
+ const results = [];
79
+ for (const file of files) {
80
+ results.push(await loadTsTaskFile(file));
81
+ }
82
+ return results;
83
+ }
@@ -2,17 +2,17 @@
2
2
  * Adapter: Load task definitions from tasks/*.yaml files.
3
3
  *
4
4
  * This adapter reads the raw YAML task definitions (before Promptfoo
5
- * expansion) and maps them to the canonical TaskDefinition type from
6
- * @sanity/ailf-core. It handles area filtering (filename stem) and
7
- * task ID filtering.
5
+ * expansion) and maps them to GeneralizedTaskDefinition
6
+ * (LiteracyTaskDefinition variant) from @sanity/ailf-core. It handles
7
+ * area filtering (filename stem) and task ID filtering.
8
8
  *
9
9
  * Unlike loadAndExpandTasks() — which produces Promptfoo-specific
10
10
  * ExpandedTestEntry objects — this adapter produces domain-level
11
- * TaskDefinition objects suitable for the pipeline orchestrator.
11
+ * GeneralizedTaskDefinition objects suitable for the pipeline orchestrator.
12
12
  */
13
- import type { FilterOptions, TaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
13
+ import type { FilterOptions, GeneralizedTaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
14
14
  export declare class YamlTaskSource implements TaskSource {
15
15
  private readonly rootDir;
16
16
  constructor(rootDir: string);
17
- loadTasks(filter?: FilterOptions): Promise<TaskDefinition[]>;
17
+ loadTasks(filter?: FilterOptions): Promise<GeneralizedTaskDefinition[]>;
18
18
  }
@@ -2,13 +2,13 @@
2
2
  * Adapter: Load task definitions from tasks/*.yaml files.
3
3
  *
4
4
  * This adapter reads the raw YAML task definitions (before Promptfoo
5
- * expansion) and maps them to the canonical TaskDefinition type from
6
- * @sanity/ailf-core. It handles area filtering (filename stem) and
7
- * task ID filtering.
5
+ * expansion) and maps them to GeneralizedTaskDefinition
6
+ * (LiteracyTaskDefinition variant) from @sanity/ailf-core. It handles
7
+ * area filtering (filename stem) and task ID filtering.
8
8
  *
9
9
  * Unlike loadAndExpandTasks() — which produces Promptfoo-specific
10
10
  * ExpandedTestEntry objects — this adapter produces domain-level
11
- * TaskDefinition objects suitable for the pipeline orchestrator.
11
+ * GeneralizedTaskDefinition objects suitable for the pipeline orchestrator.
12
12
  */
13
13
  import { existsSync, readdirSync, readFileSync } from "fs";
14
14
  import { resolve } from "path";
@@ -55,7 +55,7 @@ export class YamlTaskSource {
55
55
  !filter.taskIds.includes(entry.id)) {
56
56
  continue;
57
57
  }
58
- definitions.push(mapToTaskDefinition(entry, featureArea));
58
+ definitions.push(mapToLiteracyTask(entry, featureArea));
59
59
  }
60
60
  }
61
61
  return definitions;
@@ -65,29 +65,32 @@ export class YamlTaskSource {
65
65
  // Mapping helpers
66
66
  // ---------------------------------------------------------------------------
67
67
  /**
68
- * Map a raw YAML entry to a canonical TaskDefinition.
68
+ * Map a raw YAML entry directly to a LiteracyTaskDefinition.
69
69
  *
70
- * Renames snake_case YAML keys to camelCase domain types and extracts
71
- * the task prompt from `vars.task`. Additional vars beyond `task` and
72
- * `docs` are collected into `extraVars`.
70
+ * Renames snake_case YAML keys to the generalized type's field names and
71
+ * extracts the task prompt from `vars.task`. Additional vars beyond `task`
72
+ * and `docs` are collected into `prompt.vars`.
73
73
  */
74
- function mapToTaskDefinition(raw, featureArea) {
74
+ function mapToLiteracyTask(raw, featureArea) {
75
75
  const { task, docs: _docs, ...rest } = (raw.vars ?? {});
76
- const canonicalDocs = (raw.canonical_docs ?? [])
76
+ const docs = (raw.canonical_docs ?? [])
77
77
  .map(mapCanonicalDoc)
78
78
  .filter((d) => d !== null);
79
79
  const extraVars = Object.keys(rest).length > 0 ? rest : undefined;
80
80
  return {
81
+ mode: "literacy",
81
82
  id: raw.id,
82
- description: raw.description,
83
- featureArea,
84
- taskPrompt: typeof task === "string" ? task : "",
85
- canonicalDocs,
83
+ title: raw.description,
84
+ area: featureArea,
85
+ prompt: {
86
+ text: typeof task === "string" ? task : "",
87
+ ...(extraVars ? { vars: extraVars } : {}),
88
+ },
89
+ context: { docs },
86
90
  referenceSolution: raw.reference_solution ?? "",
87
91
  docCoverage: raw.doc_coverage ?? false,
88
92
  assertions: (raw.assert ?? []),
89
93
  ...(raw.baseline ? { baseline: raw.baseline } : {}),
90
- ...(extraVars ? { extraVars } : {}),
91
94
  };
92
95
  }
93
96
  // ---------------------------------------------------------------------------
package/dist/cli.js CHANGED
@@ -157,8 +157,6 @@ import { createValidateTasksCommand } from "./commands/validate-tasks.js";
157
157
  program.addCommand(createValidateTasksCommand().helpGroup(CommandGroup.SetupConfig));
158
158
  import { createFetchDocsCommand } from "./commands/fetch-docs.js";
159
159
  program.addCommand(createFetchDocsCommand().helpGroup(CommandGroup.SetupConfig));
160
- import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
161
- program.addCommand(createGenerateConfigsCommand().helpGroup(CommandGroup.SetupConfig));
162
160
  import { createCacheCommand } from "./commands/cache.js";
163
161
  program.addCommand(createCacheCommand().helpGroup(CommandGroup.SetupConfig));
164
162
  // ── Pipeline Internals ────────────────────────────────────────────────
@@ -11,8 +11,11 @@ import { Command } from "commander";
11
11
  import { compareBaseline, listBaselines, saveBaseline, } from "../pipeline/baseline.js";
12
12
  const __dirname = dirname(fileURLToPath(import.meta.url));
13
13
  const ROOT = resolve(__dirname, "../..");
14
+ // CLI command name — kept as a constant to centralize the string literal.
15
+ // "baseline" here refers to score baseline snapshots, not the legacy eval mode.
16
+ const CMD_NAME = "baseline";
14
17
  export function createBaselineCommand() {
15
- const cmd = new Command("baseline").description("Manage historical baseline snapshots of evaluation scores");
18
+ const cmd = new Command(CMD_NAME).description("Manage historical baseline snapshots of evaluation scores");
16
19
  // -----------------------------------------------------------------------
17
20
  // baseline save
18
21
  // -----------------------------------------------------------------------
@@ -20,7 +20,7 @@ export function createCalculateScoresCommand() {
20
20
  try {
21
21
  const ctx = createAppContext({
22
22
  rootDir: ROOT,
23
- mode: "baseline",
23
+ mode: "literacy",
24
24
  noAutoScope: false,
25
25
  skipFetch: true,
26
26
  skipEval: true,
@@ -3,10 +3,13 @@
3
3
  * against task files to produce a documentation coverage audit.
4
4
  *
5
5
  */
6
+ import { InMemoryPluginRegistry } from "../_vendor/ailf-core/index.js";
6
7
  import { Command } from "commander";
7
8
  import { dirname, resolve } from "path";
8
9
  import { fileURLToPath } from "url";
9
10
  import { countReferencedDocs, formatCoverageConsole, formatCoverageMarkdown, runCoverageAudit, } from "../pipeline/coverage-audit.js";
11
+ import { createLiteracyModeBase } from "../pipeline/compiler/mode-bases/index.js";
12
+ import { createSanityLiteracyPreset } from "../pipeline/compiler/presets/index.js";
10
13
  const __dirname = dirname(fileURLToPath(import.meta.url));
11
14
  const ROOT = resolve(__dirname, "..", "..");
12
15
  export function createCoverageAuditCommand() {
@@ -15,7 +18,12 @@ export function createCoverageAuditCommand() {
15
18
  .option("--format <fmt>", "Output format: table, md, markdown")
16
19
  .option("--json", "Output raw JSON", false)
17
20
  .action(async (opts) => {
18
- const report = runCoverageAudit(ROOT);
21
+ // Build a registry with mode base + preset so coverage audit works
22
+ // even when config/features.ts is empty (preset is source of truth).
23
+ const registry = new InMemoryPluginRegistry();
24
+ registry.registerModeBase(createLiteracyModeBase());
25
+ registry.registerPreset(createSanityLiteracyPreset({ rootDir: ROOT }));
26
+ const report = runCoverageAudit(ROOT, { registry });
19
27
  if (!report) {
20
28
  console.error("❌ Coverage audit failed. Ensure config/features.yaml exists and is valid.");
21
29
  process.exit(1);
@@ -23,6 +23,7 @@ import { TASK_FILE_NAMES } from "../_vendor/ailf-core/index.js";
23
23
  import { buildPipelinePlan, buildSimpleCommandPlan, } from "../pipeline/plan.js";
24
24
  import { formatPlanConsole, formatPlanJson } from "../pipeline/plan-format.js";
25
25
  import { computeResolvedOptions } from "./pipeline-action.js";
26
+ import { LiteracyVariant } from "../pipeline/normalize-mode.js";
26
27
  // ---------------------------------------------------------------------------
27
28
  // Registry
28
29
  // ---------------------------------------------------------------------------
@@ -84,8 +85,8 @@ const EXPLAIN_REGISTRY = {
84
85
  filesCreated: ["results/latest/score-summary.json"],
85
86
  filesRead: [
86
87
  "results/latest/eval-results.json",
87
- "config/rubrics.yaml",
88
- "config/models.yaml",
88
+ "config/rubrics.ts",
89
+ "config/models.ts",
89
90
  ],
90
91
  steps: [
91
92
  {
@@ -138,12 +139,12 @@ const EXPLAIN_REGISTRY = {
138
139
  },
139
140
  "coverage-audit": {
140
141
  description: "Cross-reference feature registry against evaluation tasks for coverage gaps",
141
- filesRead: ["config/features.yaml", "tasks/*.yaml"],
142
+ filesRead: ["config/features.ts", "tasks/*.{yaml,task.ts,task.js}"],
142
143
  steps: [
143
144
  {
144
145
  cacheStatus: "miss",
145
146
  name: "Load feature registry",
146
- reason: "Parse config/features.yaml for product feature list",
147
+ reason: "Parse config/features.ts for product feature list",
147
148
  willRun: true,
148
149
  },
149
150
  {
@@ -201,7 +202,7 @@ const EXPLAIN_REGISTRY = {
201
202
  "fetch-docs": {
202
203
  description: "Fetch documentation from Sanity CMS and generate canonical context files",
203
204
  filesCreated: ["contexts/canonical/*.md"],
204
- filesRead: ["config/sources.yaml", "config/models.yaml"],
205
+ filesRead: ["config/sources.ts", "config/models.ts"],
205
206
  steps: [
206
207
  {
207
208
  cacheStatus: "miss",
@@ -224,7 +225,7 @@ const EXPLAIN_REGISTRY = {
224
225
  ],
225
226
  },
226
227
  "generate-configs": {
227
- description: "Generate Promptfoo config files from models.yaml and task definitions",
228
+ description: "Generate Promptfoo config files from models.ts and task definitions",
228
229
  filesCreated: [
229
230
  "promptfooconfig.yaml",
230
231
  "promptfooconfig.observed.yaml",
@@ -232,16 +233,16 @@ const EXPLAIN_REGISTRY = {
232
233
  "tasks/.expanded.yaml",
233
234
  ],
234
235
  filesRead: [
235
- "config/models.yaml",
236
- "config/prompts.yaml",
237
- "config/rubrics.yaml",
238
- "config/sources.yaml",
236
+ "config/models.ts",
237
+ "config/prompts.ts",
238
+ "config/rubrics.ts",
239
+ "config/sources.ts",
239
240
  ],
240
241
  steps: [
241
242
  {
242
243
  cacheStatus: "miss",
243
244
  name: "Load models",
244
- reason: "Parse config/models.yaml for active model list",
245
+ reason: "Parse config/models.ts for active model list",
245
246
  willRun: true,
246
247
  },
247
248
  {
@@ -262,7 +263,7 @@ const EXPLAIN_REGISTRY = {
262
263
  description: "Grader reliability tools (consistency, compare, sensitivity, validate)",
263
264
  filesRead: [
264
265
  "results/latest/eval-results.json",
265
- "config/rubrics.yaml",
266
+ "config/rubrics.ts",
266
267
  "canonical/reference-solutions/",
267
268
  ],
268
269
  steps: [
@@ -369,7 +370,7 @@ const EXPLAIN_REGISTRY = {
369
370
  filesRead: [
370
371
  "results/latest/score-summary.json",
371
372
  "results/latest/gap-analysis.json",
372
- "config/thresholds.yaml",
373
+ "config/thresholds.ts",
373
374
  "results/baselines/",
374
375
  ],
375
376
  filesCreated: ["results/latest/readiness-report.md"],
@@ -377,7 +378,7 @@ const EXPLAIN_REGISTRY = {
377
378
  {
378
379
  cacheStatus: "miss",
379
380
  name: "Load scores + thresholds",
380
- reason: "Read score-summary.json and thresholds.yaml for gate evaluation",
381
+ reason: "Read score-summary.json and thresholds.ts for gate evaluation",
381
382
  willRun: true,
382
383
  },
383
384
  {
@@ -395,18 +396,18 @@ const EXPLAIN_REGISTRY = {
395
396
  ],
396
397
  },
397
398
  validate: {
398
- description: "Validate all YAML config files, task definitions, reference solutions, and environment",
399
+ description: "Validate all config files, task definitions, reference solutions, and environment",
399
400
  filesRead: [
400
- "config/models.yaml",
401
- "config/rubrics.yaml",
402
- "config/features.yaml",
403
- "config/thresholds.yaml",
401
+ "config/models.ts",
402
+ "config/rubrics.ts",
403
+ "config/features.ts",
404
+ "config/thresholds.ts",
404
405
  ],
405
406
  steps: [
406
407
  {
407
408
  cacheStatus: "miss",
408
409
  name: "Validate configuration",
409
- reason: "Parse all YAML configs through Zod schemas, cross-reference mappings",
410
+ reason: "Parse all config files through Zod schemas, cross-reference mappings",
410
411
  willRun: true,
411
412
  },
412
413
  {
@@ -454,12 +455,12 @@ const EXPLAIN_REGISTRY = {
454
455
  },
455
456
  "weekly-digest": {
456
457
  description: "Generate and deliver a weekly evaluation trend digest via Slack",
457
- filesRead: ["config/schedules.yaml", "config/sinks.yaml"],
458
+ filesRead: ["config/schedules.ts", "config/sinks.ts"],
458
459
  steps: [
459
460
  {
460
461
  cacheStatus: "miss",
461
462
  name: "Load digest config",
462
- reason: "Read schedules.yaml for lookback window and delivery targets",
463
+ reason: "Read schedules.ts for lookback window and delivery targets",
463
464
  willRun: true,
464
465
  },
465
466
  {
@@ -670,7 +671,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
670
671
  graderReplications: raw.graderReplications,
671
672
  header: raw.header ?? [],
672
673
  headers: raw.headers ?? [],
673
- mode: raw.mode ?? "full",
674
+ mode: raw.mode ?? LiteracyVariant.FULL,
674
675
  output: raw.output,
675
676
  promptfooUrl: raw.promptfooUrl,
676
677
  publish: raw.publish,
@@ -714,6 +715,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
714
715
  gapAnalysisEnabled: resolved.gapAnalysisEnabled,
715
716
  graderReplications: resolved.graderReplications,
716
717
  mode: resolved.mode,
718
+ variant: resolved.variant,
717
719
  noCache: resolved.noCache,
718
720
  publishEnabled: resolved.publishEnabled,
719
721
  readinessEnabled: resolved.readinessEnabled,
@@ -41,7 +41,7 @@ async function executeFetchDocs(opts) {
41
41
  // Build a minimal ResolvedConfig for the composition root
42
42
  const ctx = createAppContext({
43
43
  rootDir: ROOT,
44
- mode: "baseline",
44
+ mode: "literacy",
45
45
  noAutoScope: false,
46
46
  skipFetch: false,
47
47
  skipEval: true,
@@ -83,7 +83,8 @@ async function executeFetchDocs(opts) {
83
83
  }
84
84
  // Canonical contexts — same code path as the pipeline
85
85
  const tasks = await ctx.taskSource.loadTasks();
86
- const tasksWithDocs = tasks.filter((t) => t.canonicalDocs.length > 0);
86
+ // Bridge: narrow to literacy tasks with docs (only literacy tasks have context.docs)
87
+ const tasksWithDocs = tasks.filter((t) => t.mode === "literacy" && (t.context?.docs?.length ?? 0) > 0);
87
88
  if (tasksWithDocs.length > 0) {
88
89
  console.log("\nGenerating canonical (gold-retrieval) contexts...\n");
89
90
  const result = await fetcher.fetch(tasksWithDocs, resolvedSource);
@@ -19,7 +19,7 @@ export function createGenerateConfigsCommand() {
19
19
  try {
20
20
  const ctx = createAppContext({
21
21
  rootDir: ROOT,
22
- mode: "baseline",
22
+ mode: "literacy",
23
23
  noAutoScope: false,
24
24
  skipFetch: true,
25
25
  skipEval: true,
@@ -5,12 +5,14 @@
5
5
  * task files. The generated files are ready-to-edit starting points —
6
6
  * not live evaluation tasks.
7
7
  *
8
- * YAML output (default) preserves the inline comments from the source
9
- * YAML files in packages/core/examples/. JSON output is a plain
10
- * serialization of the parsed data no comments.
8
+ * TypeScript output (default) uses define* helpers from @sanity/ailf-core
9
+ * for full IDE autocomplete and type checking. YAML output preserves
10
+ * inline comments from the source files. JSON output is a plain
11
+ * serialization of the parsed data.
11
12
  *
12
13
  * Usage:
13
- * ailf init # YAML output (default)
14
+ * ailf init # TypeScript output (default)
15
+ * ailf init --output-format yaml # YAML output
14
16
  * ailf init --output-format json # JSON output
15
17
  * ailf init --force # overwrite existing files
16
18
  * ailf init --path ./my-dir # target a specific directory