@sanity/ailf 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (499) hide show
  1. package/README.md +0 -1
  2. package/canonical/grader-references/README.md +2 -2
  3. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  4. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  5. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  6. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  7. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  8. package/config/features.ts +1 -1
  9. package/config/models.ts +29 -12
  10. package/config/sources.ts +1 -1
  11. package/config/thresholds.ts +1 -1
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  13. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  17. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  18. package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
  19. package/dist/_vendor/ailf-core/config-helpers.js +51 -2
  20. package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
  21. package/dist/_vendor/ailf-core/examples/index.js +213 -94
  22. package/dist/_vendor/ailf-core/index.d.ts +3 -2
  23. package/dist/_vendor/ailf-core/index.js +2 -1
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  25. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  27. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  28. package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
  29. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  30. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  31. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  32. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  33. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  34. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  35. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  40. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  41. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  42. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  43. package/dist/_vendor/ailf-core/services/index.js +1 -1
  44. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  47. package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
  48. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
  49. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  50. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  51. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  52. package/dist/adapters/api-client/remediation.js +2 -2
  53. package/dist/adapters/config-sources/file-config-adapter.js +7 -1
  54. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  55. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  56. package/dist/adapters/index.d.ts +0 -1
  57. package/dist/adapters/index.js +0 -1
  58. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  59. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  60. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  61. package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
  62. package/dist/adapters/task-sources/index.d.ts +3 -4
  63. package/dist/adapters/task-sources/index.js +3 -4
  64. package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
  65. package/dist/adapters/task-sources/repo-schemas.js +228 -20
  66. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  67. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  68. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  69. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  70. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  71. package/dist/adapters/task-sources/repo-validation.js +126 -5
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
  73. package/dist/adapters/task-sources/task-file-loader.js +21 -7
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/coverage-audit.js +3 -1
  95. package/dist/commands/explain-handler.d.ts +1 -1
  96. package/dist/commands/explain-handler.js +37 -8
  97. package/dist/commands/fetch-docs.js +1 -0
  98. package/dist/commands/generate-configs.d.ts +3 -3
  99. package/dist/commands/generate-configs.js +20 -8
  100. package/dist/commands/init.d.ts +5 -4
  101. package/dist/commands/init.js +190 -25
  102. package/dist/commands/pipeline-action.d.ts +7 -1
  103. package/dist/commands/pipeline-action.js +43 -19
  104. package/dist/commands/pipeline.d.ts +6 -1
  105. package/dist/commands/pipeline.js +7 -2
  106. package/dist/commands/pr-comment.js +1 -0
  107. package/dist/commands/publish.js +1 -0
  108. package/dist/commands/shared/help.js +2 -2
  109. package/dist/commands/update-quality-scores.d.ts +5 -0
  110. package/dist/commands/update-quality-scores.js +20 -0
  111. package/dist/commands/validate-tasks.d.ts +2 -2
  112. package/dist/commands/validate-tasks.js +26 -15
  113. package/dist/composition-root.d.ts +15 -4
  114. package/dist/composition-root.js +100 -55
  115. package/dist/config/features.ts +23 -0
  116. package/dist/config/models.ts +100 -0
  117. package/dist/config/prompts.ts +16 -0
  118. package/dist/config/rubrics.ts +225 -0
  119. package/dist/config/schedules.ts +47 -0
  120. package/dist/config/sinks.ts +37 -0
  121. package/dist/config/sources.ts +21 -0
  122. package/dist/config/thresholds.ts +61 -0
  123. package/dist/index.d.ts +41 -0
  124. package/dist/index.js +48 -0
  125. package/dist/lib/agent-behavior-report.d.ts +8 -0
  126. package/dist/lib/agent-behavior-report.js +185 -0
  127. package/dist/lib/baseline.d.ts +19 -0
  128. package/dist/lib/baseline.js +153 -0
  129. package/dist/lib/calculate-scores.d.ts +23 -0
  130. package/dist/lib/calculate-scores.js +42 -0
  131. package/dist/lib/compare.d.ts +18 -0
  132. package/dist/lib/compare.js +170 -0
  133. package/dist/lib/coverage-audit.d.ts +4 -0
  134. package/dist/lib/coverage-audit.js +42 -0
  135. package/dist/lib/discovery-report.d.ts +13 -0
  136. package/dist/lib/discovery-report.js +57 -0
  137. package/dist/lib/fetch-docs.d.ts +30 -0
  138. package/dist/lib/fetch-docs.js +171 -0
  139. package/dist/lib/generate-configs.d.ts +25 -0
  140. package/dist/lib/generate-configs.js +42 -0
  141. package/dist/lib/grader-api.d.ts +21 -0
  142. package/dist/lib/grader-api.js +34 -0
  143. package/dist/lib/grader-compare.d.ts +19 -0
  144. package/dist/lib/grader-compare.js +91 -0
  145. package/dist/lib/grader-consistency.d.ts +27 -0
  146. package/dist/lib/grader-consistency.js +79 -0
  147. package/dist/lib/grader-sensitivity.d.ts +19 -0
  148. package/dist/lib/grader-sensitivity.js +75 -0
  149. package/dist/lib/grader-validate.d.ts +19 -0
  150. package/dist/lib/grader-validate.js +78 -0
  151. package/dist/lib/measure-retrieval.d.ts +14 -0
  152. package/dist/lib/measure-retrieval.js +71 -0
  153. package/dist/lib/pr-comment.d.ts +16 -0
  154. package/dist/lib/pr-comment.js +28 -0
  155. package/dist/lib/readiness-report.d.ts +13 -0
  156. package/dist/lib/readiness-report.js +108 -0
  157. package/dist/lib/webhook-server.d.ts +11 -0
  158. package/dist/lib/webhook-server.js +24 -0
  159. package/dist/lib/weekly-digest.d.ts +24 -0
  160. package/dist/lib/weekly-digest.js +148 -0
  161. package/dist/orchestration/build-app-context.js +13 -0
  162. package/dist/orchestration/build-step-sequence.js +4 -2
  163. package/dist/orchestration/cache-context.d.ts +23 -0
  164. package/dist/orchestration/cache-context.js +43 -0
  165. package/dist/orchestration/env-bridge.d.ts +21 -0
  166. package/dist/orchestration/env-bridge.js +66 -0
  167. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  168. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  169. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  170. package/dist/orchestration/step-runner.js +5 -1
  171. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  172. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  173. package/dist/orchestration/steps/callback-step.js +10 -1
  174. package/dist/orchestration/steps/compare-step.js +6 -3
  175. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  176. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  177. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  178. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  179. package/dist/orchestration/steps/fetch-docs-step.js +32 -19
  180. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  181. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  182. package/dist/orchestration/steps/generate-configs-step.js +77 -26
  183. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  184. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  185. package/dist/orchestration/steps/publish-report-step.js +19 -0
  186. package/dist/orchestration/steps/readiness-step.js +8 -3
  187. package/dist/orchestration/steps/report-step.js +17 -4
  188. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  189. package/dist/orchestration/steps/run-eval-step.js +51 -31
  190. package/dist/pipeline/agent-behavior-report.js +6 -0
  191. package/dist/pipeline/attribution.d.ts +1 -1
  192. package/dist/pipeline/attribution.js +1 -1
  193. package/dist/pipeline/cache.js +29 -15
  194. package/dist/pipeline/calculate-scores.d.ts +2 -0
  195. package/dist/pipeline/calculate-scores.js +70 -33
  196. package/dist/pipeline/chronic-failures.d.ts +55 -0
  197. package/dist/pipeline/chronic-failures.js +110 -0
  198. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  199. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  200. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  201. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
  202. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  203. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  204. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  205. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  206. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  207. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  208. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  209. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  210. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  211. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  212. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  213. package/dist/pipeline/compiler/config-loader.js +42 -2
  214. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  215. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  216. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  217. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  218. package/dist/pipeline/compiler/index.d.ts +2 -5
  219. package/dist/pipeline/compiler/index.js +2 -5
  220. package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
  221. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  222. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  223. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  224. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  225. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  226. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  227. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  228. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
  229. package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
  230. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  231. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  232. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  233. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  234. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  235. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  236. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  237. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  238. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  239. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  240. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  241. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  242. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  245. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  246. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  247. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  248. package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
  249. package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
  250. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  251. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  252. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  253. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  254. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  255. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  256. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  257. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  258. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  259. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  260. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  261. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  262. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  263. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  264. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  265. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  266. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  267. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  268. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  269. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  270. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  271. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  272. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  273. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  274. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
  275. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  276. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  277. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  278. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  279. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  280. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  281. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  282. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  283. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  284. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
  285. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  286. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  287. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  288. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  289. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
  290. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
  291. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  292. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
  293. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  294. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
  295. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  296. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  297. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
  298. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
  299. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
  300. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  301. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  302. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  303. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  304. package/dist/pipeline/compiler/preset-loader.js +99 -0
  305. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
  306. package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
  307. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  308. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  309. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  310. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  311. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  312. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  313. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  314. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  315. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  316. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  317. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  318. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  319. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  320. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  321. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  322. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  323. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  324. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  325. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  326. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  327. package/dist/pipeline/compiler/task-bridge.js +92 -0
  328. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  329. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  330. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  331. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  332. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  333. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  334. package/dist/pipeline/coverage-audit.d.ts +1 -1
  335. package/dist/pipeline/coverage-audit.js +1 -1
  336. package/dist/pipeline/degradations.d.ts +1 -1
  337. package/dist/pipeline/degradations.js +1 -1
  338. package/dist/pipeline/expand-tasks.d.ts +2 -2
  339. package/dist/pipeline/expand-tasks.js +2 -2
  340. package/dist/pipeline/failure-modes.d.ts +1 -1
  341. package/dist/pipeline/failure-modes.js +13 -1
  342. package/dist/pipeline/gap-analysis.d.ts +1 -1
  343. package/dist/pipeline/gap-analysis.js +3 -1
  344. package/dist/pipeline/generate-configs.d.ts +2 -2
  345. package/dist/pipeline/generate-configs.js +16 -9
  346. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  347. package/dist/pipeline/grader-compare-runner.js +7 -1
  348. package/dist/pipeline/grader-comparison.d.ts +1 -1
  349. package/dist/pipeline/grader-comparison.js +1 -1
  350. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  351. package/dist/pipeline/grader-consistency-runner.js +7 -1
  352. package/dist/pipeline/grader-consistency.d.ts +1 -1
  353. package/dist/pipeline/grader-consistency.js +1 -1
  354. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  355. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  356. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  357. package/dist/pipeline/grader-sensitivity.js +1 -1
  358. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  359. package/dist/pipeline/grader-validate-runner.js +2 -2
  360. package/dist/pipeline/grader-validation.d.ts +1 -1
  361. package/dist/pipeline/grader-validation.js +1 -1
  362. package/dist/pipeline/map-request-to-config.js +16 -2
  363. package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
  364. package/dist/pipeline/mirror-repo-tasks.js +10 -10
  365. package/dist/pipeline/plan-format.d.ts +1 -1
  366. package/dist/pipeline/plan-format.js +1 -1
  367. package/dist/pipeline/plan.d.ts +1 -1
  368. package/dist/pipeline/plan.js +68 -30
  369. package/dist/pipeline/probe.d.ts +1 -1
  370. package/dist/pipeline/probe.js +1 -1
  371. package/dist/pipeline/readiness-report.d.ts +2 -2
  372. package/dist/pipeline/readiness-report.js +2 -2
  373. package/dist/pipeline/release-classification.d.ts +1 -1
  374. package/dist/pipeline/release-classification.js +1 -1
  375. package/dist/pipeline/release-report.d.ts +1 -1
  376. package/dist/pipeline/release-report.js +1 -1
  377. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  378. package/dist/pipeline/repo-eval-comment.js +1 -1
  379. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  380. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  381. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  382. package/dist/pipeline/resolve-mappings.js +44 -44
  383. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  384. package/dist/pipeline/retrieval-metrics.js +28 -20
  385. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  386. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  387. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  388. package/dist/pipeline/steps/compare-step.js +90 -0
  389. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  390. package/dist/pipeline/steps/eval-step.js +347 -0
  391. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  392. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  393. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  394. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  395. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  396. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  397. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  398. package/dist/pipeline/steps/publish-report-step.js +243 -0
  399. package/dist/pipeline/steps/report-step.d.ts +13 -0
  400. package/dist/pipeline/steps/report-step.js +56 -0
  401. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  402. package/dist/pipeline/steps/update-scores-step.js +42 -0
  403. package/dist/pipeline/targeted-loo.d.ts +1 -1
  404. package/dist/pipeline/targeted-loo.js +1 -1
  405. package/dist/pipeline/thresholds.d.ts +1 -1
  406. package/dist/pipeline/thresholds.js +1 -1
  407. package/dist/pipeline/validate.js +13 -0
  408. package/dist/report-store.d.ts +17 -0
  409. package/dist/report-store.js +24 -0
  410. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  411. package/dist/scripts/agent-behavior-report.js +315 -0
  412. package/dist/scripts/baseline.d.ts +43 -0
  413. package/dist/scripts/baseline.js +267 -0
  414. package/dist/scripts/calculate-scores.d.ts +166 -0
  415. package/dist/scripts/calculate-scores.js +1296 -0
  416. package/dist/scripts/compare.d.ts +22 -0
  417. package/dist/scripts/compare.js +334 -0
  418. package/dist/scripts/coverage-audit.d.ts +44 -0
  419. package/dist/scripts/coverage-audit.js +209 -0
  420. package/dist/scripts/debug-eval.d.ts +19 -0
  421. package/dist/scripts/debug-eval.js +73 -0
  422. package/dist/scripts/discovery-report.d.ts +58 -0
  423. package/dist/scripts/discovery-report.js +250 -0
  424. package/dist/scripts/fetch-docs.d.ts +35 -0
  425. package/dist/scripts/fetch-docs.js +472 -0
  426. package/dist/scripts/generate-configs.d.ts +66 -0
  427. package/dist/scripts/generate-configs.js +459 -0
  428. package/dist/scripts/grader-api.d.ts +27 -0
  429. package/dist/scripts/grader-api.js +206 -0
  430. package/dist/scripts/grader-compare.d.ts +22 -0
  431. package/dist/scripts/grader-compare.js +368 -0
  432. package/dist/scripts/grader-consistency.d.ts +20 -0
  433. package/dist/scripts/grader-consistency.js +313 -0
  434. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  435. package/dist/scripts/grader-sensitivity.js +354 -0
  436. package/dist/scripts/grader-validate.d.ts +19 -0
  437. package/dist/scripts/grader-validate.js +267 -0
  438. package/dist/scripts/measure-retrieval.d.ts +10 -0
  439. package/dist/scripts/measure-retrieval.js +145 -0
  440. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  441. package/dist/scripts/migrate-task-mode.js +1 -1
  442. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  443. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  444. package/dist/scripts/pipeline.d.ts +76 -0
  445. package/dist/scripts/pipeline.js +1031 -0
  446. package/dist/scripts/pr-comment.d.ts +10 -0
  447. package/dist/scripts/pr-comment.js +510 -0
  448. package/dist/scripts/readiness-report.d.ts +88 -0
  449. package/dist/scripts/readiness-report.js +342 -0
  450. package/dist/scripts/update-quality-scores.d.ts +15 -0
  451. package/dist/scripts/update-quality-scores.js +184 -0
  452. package/dist/scripts/validate-task-sources.d.ts +1 -1
  453. package/dist/scripts/validate-task-sources.js +1 -1
  454. package/dist/scripts/validate.d.ts +13 -0
  455. package/dist/scripts/validate.js +79 -0
  456. package/dist/scripts/webhook-server.d.ts +26 -0
  457. package/dist/scripts/webhook-server.js +147 -0
  458. package/dist/scripts/weekly-digest.d.ts +24 -0
  459. package/dist/scripts/weekly-digest.js +144 -0
  460. package/dist/sinks/format-slack.d.ts +64 -0
  461. package/dist/sinks/format-slack.js +306 -0
  462. package/dist/sinks/slack-sink.d.ts +27 -0
  463. package/dist/sinks/slack-sink.js +78 -0
  464. package/dist/sinks/types.d.ts +1 -1
  465. package/dist/sinks/types.js +1 -1
  466. package/dist/sinks/webhook-sink.d.ts +19 -0
  467. package/dist/sinks/webhook-sink.js +50 -0
  468. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  469. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  470. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  471. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  472. package/dist/tasks/literacy/functions.task.ts +70 -0
  473. package/dist/tasks/literacy/groq.task.ts +259 -0
  474. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  475. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  476. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  477. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  478. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  479. package/package.json +32 -24
  480. package/tasks/.expanded.agentic.yaml +280 -0
  481. package/tasks/.expanded.yaml +565 -0
  482. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  483. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  484. package/tasks/literacy/content-lake.task.ts +181 -0
  485. package/tasks/literacy/frameworks.task.ts +1 -0
  486. package/tasks/literacy/functions.task.ts +1 -0
  487. package/tasks/literacy/groq.task.ts +1 -0
  488. package/tasks/literacy/image-handling.task.ts +95 -0
  489. package/tasks/literacy/nextjs-live.task.ts +2 -1
  490. package/tasks/literacy/portable-text.task.ts +169 -0
  491. package/tasks/literacy/studio-setup.task.ts +5 -2
  492. package/tasks/literacy/visual-editing.task.ts +1 -0
  493. package/LICENSE +0 -21
  494. package/tasks/frameworks.yaml +0 -98
  495. package/tasks/functions.yaml +0 -51
  496. package/tasks/groq.yaml +0 -216
  497. package/tasks/nextjs-live.yaml +0 -62
  498. package/tasks/studio-setup.yaml +0 -111
  499. package/tasks/visual-editing.yaml +0 -120
@@ -1,14 +1,14 @@
1
1
  /**
2
- * generate-configs command — generate promptfoo config files from models.yaml.
2
+ * generate-configs command — generate promptfoo config files via the compiler pipeline.
3
3
  *
4
- * Uses the composition root to wire adapters, then calls generateConfigs()
5
- * directly — the same code path as the pipeline.
4
+ * Uses the composition root to wire adapters, then runs GenerateConfigsStep
5
+ * directly — the same code path as the full pipeline.
6
6
  */
7
7
  import { dirname, resolve } from "path";
8
8
  import { fileURLToPath } from "url";
9
9
  import { Command } from "commander";
10
10
  import { createAppContext } from "../composition-root.js";
11
- import { generateConfigs } from "../pipeline/generate-configs.js";
11
+ import { GenerateConfigsStep } from "../orchestration/steps/generate-configs-step.js";
12
12
  const __dirname = dirname(fileURLToPath(import.meta.url));
13
13
  const ROOT = resolve(__dirname, "..", "..");
14
14
  export function createGenerateConfigsCommand() {
@@ -19,6 +19,7 @@ export function createGenerateConfigsCommand() {
19
19
  try {
20
20
  const ctx = createAppContext({
21
21
  rootDir: ROOT,
22
+ outputDir: resolve(ROOT, "results", "latest"),
22
23
  mode: "literacy",
23
24
  noAutoScope: false,
24
25
  skipFetch: true,
@@ -35,10 +36,21 @@ export function createGenerateConfigsCommand() {
35
36
  remote: false,
36
37
  apiUrl: "https://ailf-api.sanity.build",
37
38
  });
38
- generateConfigs({
39
- rootDir: ctx.config.rootDir,
40
- source: opts.source,
41
- });
39
+ const step = new GenerateConfigsStep();
40
+ // Run validation checks first
41
+ const issues = step.check(ctx);
42
+ if (issues.length > 0) {
43
+ for (const issue of issues) {
44
+ console.error(` ❌ ${issue.message}`);
45
+ }
46
+ process.exitCode = 1;
47
+ return;
48
+ }
49
+ const result = await step.execute(ctx, {});
50
+ if (result.status === "failed") {
51
+ console.error(`❌ ${result.error}`);
52
+ process.exitCode = 1;
53
+ }
42
54
  }
43
55
  catch (err) {
44
56
  process.exitCode = 1;
@@ -5,12 +5,13 @@
5
5
  * task files. The generated files are ready-to-edit starting points —
6
6
  * not live evaluation tasks.
7
7
  *
8
- * YAML output (default) preserves the inline comments from the source
9
- * YAML files in packages/core/examples/. JSON output is a plain
10
- * serialization of the parsed data — no comments.
8
+ * TypeScript output (default) uses define* helpers from @sanity/ailf-core
9
+ * for full IDE autocomplete and type checking. YAML output serializes the
10
+ * parsed task data. JSON output is a plain serialization of the parsed data.
11
11
  *
12
12
  * Usage:
13
- * ailf init # YAML output (default)
13
+ * ailf init # TypeScript output (default)
14
+ * ailf init --output-format yaml # YAML output
14
15
  * ailf init --output-format json # JSON output
15
16
  * ailf init --force # overwrite existing files
16
17
  * ailf init --path ./my-dir # target a specific directory
@@ -5,12 +5,13 @@
5
5
  * task files. The generated files are ready-to-edit starting points —
6
6
  * not live evaluation tasks.
7
7
  *
8
- * YAML output (default) preserves the inline comments from the source
9
- * YAML files in packages/core/examples/. JSON output is a plain
10
- * serialization of the parsed data — no comments.
8
+ * TypeScript output (default) uses define* helpers from @sanity/ailf-core
9
+ * for full IDE autocomplete and type checking. YAML output serializes the
10
+ * parsed task data. JSON output is a plain serialization of the parsed data.
11
11
  *
12
12
  * Usage:
13
- * ailf init # YAML output (default)
13
+ * ailf init # TypeScript output (default)
14
+ * ailf init --output-format yaml # YAML output
14
15
  * ailf init --output-format json # JSON output
15
16
  * ailf init --force # overwrite existing files
16
17
  * ailf init --path ./my-dir # target a specific directory
@@ -18,16 +19,17 @@
18
19
  import { Command } from "commander";
19
20
  import { existsSync, mkdirSync, writeFileSync } from "fs";
20
21
  import { resolve, relative } from "path";
21
- import { ailfConfigData, ailfConfigYaml, taskYamlFiles, TASK_FILE_NAMES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
22
+ import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_EXAMPLES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
22
23
  // ---------------------------------------------------------------------------
23
24
  // Command factory
24
25
  // ---------------------------------------------------------------------------
25
26
  export function createInitCommand() {
26
27
  return new Command("init")
27
28
  .description("Initialize a directory for AI Literacy Framework evaluation")
28
- .option("--output-format <fmt>", 'Output format for generated files: "yaml" (default) or "json"', "yaml")
29
+ .option("--output-format <fmt>", 'Output format for generated files: "ts" (default), "yaml", or "json"', "ts")
29
30
  .option("--force", "Overwrite existing files", false)
30
31
  .option("--path <dir>", "Target directory (default: current directory)", ".")
32
+ .option("--mode <mode>", "Scaffold for a specific mode: literacy, mcp-server, custom (default: all modes)")
31
33
  .action(async (opts) => {
32
34
  await runInit(opts);
33
35
  });
@@ -51,13 +53,27 @@ function rel(from, to) {
51
53
  const r = relative(from, to);
52
54
  return r.startsWith(".") ? r : `./${r}`;
53
55
  }
56
+ /** Filter task stems by mode using TASK_EXAMPLES metadata */
57
+ function taskStemsForMode(mode) {
58
+ return TASK_EXAMPLES.filter((t) => t.mode === mode).map((t) => t.stem);
59
+ }
54
60
  // ---------------------------------------------------------------------------
55
61
  // Init logic
56
62
  // ---------------------------------------------------------------------------
57
63
  async function runInit(opts) {
58
- const format = opts.outputFormat === "json" ? "json" : "yaml";
59
- const ext = format === "json" ? ".json" : ".yaml";
64
+ const validFormats = new Set(["ts", "yaml", "json"]);
65
+ if (!validFormats.has(opts.outputFormat)) {
66
+ console.error(` ✗ Invalid output format "${opts.outputFormat}". Valid options: ts, yaml, json`);
67
+ process.exitCode = 1;
68
+ return;
69
+ }
70
+ const format = opts.outputFormat;
60
71
  const force = opts.force;
72
+ if (format === "yaml") {
73
+ console.warn(" ⚠ --output-format yaml is deprecated. TypeScript (default) is the\n" +
74
+ " recommended format — it provides full IDE autocomplete via defineTask().\n" +
75
+ " YAML output will be removed in a future release.\n");
76
+ }
61
77
  // Resolve target from the caller's actual working directory
62
78
  const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
63
79
  const targetDir = resolve(callerCwd, opts.path);
@@ -72,27 +88,87 @@ async function runInit(opts) {
72
88
  console.log(` ✓ Created ${rel(targetDir, tasksDir)}/`);
73
89
  const written = [];
74
90
  const skipped = [];
75
- // 2. Write .ailf/config.yaml (or .json)
76
- // YAML: raw string passthrough (preserves comments)
77
- // JSON: serialize the parsed data
78
- const configPath = resolve(ailfDir, `config${ext}`);
79
- const configContent = format === "yaml"
80
- ? ailfConfigYaml
81
- : JSON.stringify(ailfConfigData, null, 2) + "\n";
82
- if (writeIfNew(configPath, configContent, force)) {
83
- written.push(rel(targetDir, configPath));
91
+ // 2. Write project config
92
+ if (format === "ts") {
93
+ // TypeScript: ailf.config.ts with defineConfig helper
94
+ const configPath = resolve(ailfDir, "ailf.config.ts");
95
+ if (writeIfNew(configPath, ailfConfigTs, force)) {
96
+ written.push(rel(targetDir, configPath));
97
+ }
98
+ else {
99
+ skipped.push(rel(targetDir, configPath));
100
+ }
101
+ }
102
+ else if (format === "yaml") {
103
+ // YAML: raw string passthrough (preserves comments)
104
+ const configPath = resolve(ailfDir, "config.yaml");
105
+ if (writeIfNew(configPath, ailfConfigYaml, force)) {
106
+ written.push(rel(targetDir, configPath));
107
+ }
108
+ else {
109
+ skipped.push(rel(targetDir, configPath));
110
+ }
84
111
  }
85
112
  else {
86
- skipped.push(rel(targetDir, configPath));
113
+ // JSON: serialize the parsed data
114
+ const configPath = resolve(ailfDir, "config.json");
115
+ const content = JSON.stringify(ailfConfigData, null, 2) + "\n";
116
+ if (writeIfNew(configPath, content, force)) {
117
+ written.push(rel(targetDir, configPath));
118
+ }
119
+ else {
120
+ skipped.push(rel(targetDir, configPath));
121
+ }
87
122
  }
88
123
  // 3. Write example tasks to .ailf/tasks/
89
- // YAML: raw string passthrough (preserves comments)
90
- // JSON: serialize individual task data
91
- if (format === "yaml") {
92
- // Each task is its own commented YAML file — write as-is
93
- for (const stem of TASK_FILE_NAMES) {
94
- const taskPath = resolve(tasksDir, `${stem}.yaml`);
124
+ const modeFilter = opts.mode;
125
+ const isCustomMode = modeFilter === "custom";
126
+ // Determine which task stems to write based on mode filter
127
+ let stemsToWrite;
128
+ if (isCustomMode) {
129
+ // Custom mode: write one literacy example as a starting point
130
+ stemsToWrite = taskStemsForMode("literacy").slice(0, 1);
131
+ }
132
+ else if (modeFilter === "literacy") {
133
+ stemsToWrite = taskStemsForMode("literacy");
134
+ }
135
+ else if (modeFilter === "mcp-server") {
136
+ stemsToWrite = taskStemsForMode("mcp-server");
137
+ }
138
+ else if (modeFilter === "knowledge-probe") {
139
+ stemsToWrite = taskStemsForMode("knowledge-probe");
140
+ }
141
+ else {
142
+ // Default (no --mode): write all tasks
143
+ stemsToWrite = [...TASK_FILE_NAMES];
144
+ }
145
+ if (format === "ts") {
146
+ for (const stem of stemsToWrite) {
147
+ let content = taskTsFiles[stem];
148
+ if (!content)
149
+ continue;
150
+ // For MCP-only init, activate the draft task
151
+ if (modeFilter === "mcp-server") {
152
+ content = content.replace('status: "draft",', '// status: "active", // Activated — this task runs in evaluations');
153
+ }
154
+ const fileName = isCustomMode && stem === stemsToWrite[0]
155
+ ? "example-custom.task.ts"
156
+ : `${stem}.task.ts`;
157
+ const taskPath = resolve(tasksDir, fileName);
158
+ if (writeIfNew(taskPath, content, force)) {
159
+ written.push(rel(targetDir, taskPath));
160
+ }
161
+ else {
162
+ skipped.push(rel(targetDir, taskPath));
163
+ }
164
+ }
165
+ }
166
+ else if (format === "yaml") {
167
+ for (const stem of stemsToWrite) {
95
168
  const content = taskYamlFiles[stem];
169
+ if (!content)
170
+ continue;
171
+ const taskPath = resolve(tasksDir, `${stem}.yaml`);
96
172
  if (writeIfNew(taskPath, content, force)) {
97
173
  written.push(rel(targetDir, taskPath));
98
174
  }
@@ -106,8 +182,12 @@ async function runInit(opts) {
106
182
  const tasks = Array.isArray(allTaskData)
107
183
  ? allTaskData
108
184
  : [allTaskData];
185
+ // Build a set of task IDs that match the selected stems
186
+ const selectedIds = new Set(stemsToWrite.flatMap((s) => TASK_EXAMPLES.filter((t) => t.stem === s).map((t) => t.stem)));
109
187
  for (const task of tasks) {
110
188
  const taskId = task.id;
189
+ if (!selectedIds.has(taskId))
190
+ continue;
111
191
  const taskPath = resolve(tasksDir, `${taskId}.json`);
112
192
  const content = JSON.stringify([task], null, 2) + "\n";
113
193
  if (writeIfNew(taskPath, content, force)) {
@@ -118,6 +198,16 @@ async function runInit(opts) {
118
198
  }
119
199
  }
120
200
  }
201
+ // 3b. Write custom preset scaffold (--mode custom only)
202
+ if (isCustomMode && format === "ts") {
203
+ const presetPath = resolve(ailfDir, "preset.ts");
204
+ if (writeIfNew(presetPath, CUSTOM_PRESET_TS, force)) {
205
+ written.push(rel(targetDir, presetPath));
206
+ }
207
+ else {
208
+ skipped.push(rel(targetDir, presetPath));
209
+ }
210
+ }
121
211
  // 4. Write .gitignore in .ailf/ (keep results out of version control)
122
212
  const gitignorePath = resolve(ailfDir, ".gitignore");
123
213
  const gitignoreContent = `# AILF generated files\nresults/\ncontexts/\n`;
@@ -150,18 +240,24 @@ async function runInit(opts) {
150
240
  console.log(` ⊘ Skipped ${f} (already exists, use --force to overwrite)`);
151
241
  }
152
242
  }
243
+ const taskExt = format === "ts" ? ".task.ts" : format === "yaml" ? ".yaml" : ".json";
153
244
  console.log();
154
245
  console.log(" Next steps:");
155
246
  console.log();
156
247
  console.log(` 1. Edit the example tasks in ${rel(targetDir, tasksDir)}/ — update`);
157
248
  console.log(" slugs and prompts for your documentation");
158
- console.log(" 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/");
249
+ console.log(` 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
159
250
  console.log(" 3. Add two GitHub Actions secrets");
160
251
  console.log(" (Settings → Secrets and variables → Actions):");
161
252
  console.log(" • AILF_API_KEY — your API key (starts with ailf_live_sk_)");
162
253
  console.log(" • NPM_TOKEN — npm token with read access to @sanity scope");
163
254
  console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
164
255
  console.log(" automatically on PRs");
256
+ if (format === "ts") {
257
+ console.log();
258
+ console.log(` 💡 TypeScript tasks (${taskExt}) give you full IDE autocomplete`);
259
+ console.log(" via defineTask() from @sanity/ailf-core.");
260
+ }
165
261
  console.log();
166
262
  console.log(" 🔑 Retrieve secrets from 1Password (Sanity employees):");
167
263
  console.log();
@@ -177,3 +273,72 @@ async function runInit(opts) {
177
273
  console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
178
274
  console.log();
179
275
  }
276
+ // ---------------------------------------------------------------------------
277
+ // Custom preset scaffold template
278
+ // ---------------------------------------------------------------------------
279
+ const CUSTOM_PRESET_TS = `/**
280
+ * Custom preset — your domain-specific evaluation configuration.
281
+ *
282
+ * This preset targets the "literacy" mode base and inherits its evaluation
283
+ * methodology (rubrics, scoring profiles, prompt templates). You only need
284
+ * to provide domain-specific configuration: where your docs live, what
285
+ * features to track, and how to fetch documentation.
286
+ *
287
+ * To use a different mode (e.g., "mcp-server"), change the mode field.
288
+ * Available built-in modes: literacy, mcp-server, knowledge-probe, agent-harness.
289
+ *
290
+ * @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/presets.md
291
+ */
292
+
293
+ import { definePreset } from "../_vendor/ailf-core/index.js"
294
+
295
+ export default definePreset({
296
+ name: "my-docs-evaluation",
297
+ manifest: {
298
+ name: "my-docs-evaluation",
299
+ version: "1.0.0",
300
+ description: "Documentation literacy evaluation for my project.",
301
+ pluginApiVersion: 1,
302
+ },
303
+
304
+ // Target the literacy mode base — inherits rubrics, scoring, prompts.
305
+ // Change to "mcp-server" to evaluate MCP tool usage instead.
306
+ mode: "literacy",
307
+
308
+ // Source definitions — where your documentation lives.
309
+ sourceDefs: [
310
+ {
311
+ name: "production",
312
+ baseUrl: "https://docs.example.com",
313
+ // projectId: "your-sanity-project-id",
314
+ // dataset: "production",
315
+ },
316
+ ],
317
+
318
+ // Feature registry — what product features you're tracking coverage for.
319
+ featureDefs: {
320
+ features: [
321
+ {
322
+ id: "getting-started",
323
+ name: "Getting Started Guide",
324
+ sections: ["guides"],
325
+ status: "covered",
326
+ area: "guides",
327
+ priority: "critical",
328
+ },
329
+ {
330
+ id: "api-reference",
331
+ name: "API Reference",
332
+ sections: ["reference"],
333
+ status: "uncovered",
334
+ priority: "high",
335
+ },
336
+ ],
337
+ },
338
+
339
+ // Optional: override mode base rubrics, scoring, or prompts here.
340
+ // rubricTemplates: [{ ... }],
341
+ // scoringProfiles: { ... },
342
+ // promptTemplates: { ... },
343
+ })
344
+ `;
@@ -36,6 +36,8 @@ export interface ResolvedOptions {
36
36
  noAutoScope: boolean;
37
37
  noCache: boolean;
38
38
  noRemoteCache: boolean;
39
+ /** Base directory for user-facing pipeline output artifacts (always resolved). */
40
+ outputDir: string;
39
41
  outputPath?: string;
40
42
  perspectiveOverride?: string;
41
43
  projectIdOverride?: string;
@@ -57,10 +59,14 @@ export interface ResolvedOptions {
57
59
  repoTasksPath?: string;
58
60
  taskOption?: string;
59
61
  tagOption?: string[];
60
- taskSourceType?: "content-lake" | "repo" | "yaml";
62
+ taskSourceType?: "content-lake" | "repo";
61
63
  urlArgs: string[];
62
64
  apiUrl: string;
63
65
  apiKey?: string;
66
+ captureEnabled: boolean;
67
+ captureDir?: string;
68
+ captureCompress: boolean;
69
+ captureExtras: boolean;
64
70
  }
65
71
  /**
66
72
  * Pure option resolution — computes ResolvedOptions from CLI flags without
@@ -10,7 +10,7 @@
10
10
  *
11
11
  * @see packages/eval/src/orchestration/ for the step-based pipeline
12
12
  */
13
- import { existsSync, readFileSync, writeFileSync } from "fs";
13
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
14
14
  import { dirname, resolve } from "path";
15
15
  import { fileURLToPath } from "url";
16
16
  import { classifyUrls } from "../pipeline/classify-url.js";
@@ -209,6 +209,23 @@ export function computeResolvedOptions(opts) {
209
209
  const remote = opts.remote || process.env.AILF_REMOTE === "1";
210
210
  const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
211
211
  const apiKey = process.env.AILF_API_KEY ?? undefined;
212
+ // Output directory: explicit flag → repo-task heuristic → default
213
+ const resolvedRepoTasksPath = opts.repoTasksPath
214
+ ? resolve(callerCwd, opts.repoTasksPath)
215
+ : undefined;
216
+ const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
217
+ let outputDir;
218
+ if (opts.outputDir) {
219
+ outputDir = resolve(callerCwd, opts.outputDir);
220
+ }
221
+ else if (resolvedTaskSourceType === "repo" || resolvedRepoTasksPath) {
222
+ outputDir = resolvedRepoTasksPath
223
+ ? resolve(resolvedRepoTasksPath, "..", "results", "latest")
224
+ : resolve(callerCwd, ".ailf", "results", "latest");
225
+ }
226
+ else {
227
+ outputDir = resolve(ROOT, "results", "latest");
228
+ }
212
229
  return {
213
230
  allowedOriginArgs,
214
231
  apiKey,
@@ -233,6 +250,7 @@ export function computeResolvedOptions(opts) {
233
250
  noAutoScope: opts.autoScope === false,
234
251
  noCache: !opts.cache,
235
252
  noRemoteCache: opts.remoteCache === false,
253
+ outputDir,
236
254
  outputPath: opts.output,
237
255
  perspectiveOverride,
238
256
  projectIdOverride,
@@ -250,24 +268,25 @@ export function computeResolvedOptions(opts) {
250
268
  skipFetch: opts.skipFetch,
251
269
  source: opts.source,
252
270
  studioOriginOverride,
253
- repoTasksPath: opts.repoTasksPath
254
- ? resolve(callerCwd, opts.repoTasksPath)
255
- : undefined,
271
+ repoTasksPath: resolvedRepoTasksPath,
256
272
  taskOption,
257
273
  tagOption,
258
- taskSourceType: resolveTaskSourceType(opts.taskSource),
274
+ taskSourceType: resolvedTaskSourceType,
259
275
  urlArgs,
276
+ captureEnabled: opts.capture || process.env.AILF_CAPTURE === "1",
277
+ captureDir: opts.captureDir ?? process.env.AILF_CAPTURE_DIR,
278
+ captureCompress: opts.captureCompress !== false &&
279
+ process.env.AILF_CAPTURE_COMPRESS !== "0",
280
+ captureExtras: opts.captureExtras !== false && process.env.AILF_CAPTURE_EXTRAS !== "0",
260
281
  };
261
282
  }
262
283
  /** Resolve and validate the --task-source flag value. */
263
284
  function resolveTaskSourceType(raw) {
264
285
  if (!raw || raw === "content-lake")
265
286
  return undefined; // default — Content Lake
266
- if (raw === "yaml")
267
- return "yaml";
268
287
  if (raw === "repo")
269
288
  return "repo";
270
- console.error(`❌ Invalid --task-source "${raw}". Must be "yaml", "repo", or "content-lake".`);
289
+ console.error(`❌ Invalid --task-source "${raw}". Must be "repo" or "content-lake".`);
271
290
  process.exit(1);
272
291
  }
273
292
  // ---------------------------------------------------------------------------
@@ -304,17 +323,26 @@ export async function executePipeline(cliOpts) {
304
323
  if (cliOpts.output) {
305
324
  config.outputPath = resolve(callerCwd, cliOpts.output);
306
325
  }
326
+ // Output dir: explicit CLI flag → repo-task heuristic → file-config default
327
+ if (cliOpts.outputDir) {
328
+ config.outputDir = resolve(callerCwd, cliOpts.outputDir);
329
+ }
330
+ else if (config.repoTasksPath) {
331
+ config.outputDir = resolve(config.repoTasksPath, "..", "results", "latest");
332
+ }
307
333
  // Create AppContext directly from the merged config so adapters
308
334
  // (especially taskSource) are wired from the file config's
309
335
  // taskSourceType — not from CLI defaults.
336
+ console.log(` 📂 Output directory: ${config.outputDir}`);
310
337
  const ctx = createAppContext(config);
311
338
  const pipelineStart = Date.now();
312
339
  const steps = buildStepSequence(ctx, pipelineStart);
313
340
  const result = await orchestratePipeline(ctx, steps);
314
- writePipelineResult(result);
341
+ writePipelineResult(result, config.outputDir);
315
342
  process.exit(result.success ? 0 : 1);
316
343
  }
317
344
  const o = resolveOptions(cliOpts);
345
+ console.log(` 📂 Output directory: ${o.outputDir}`);
318
346
  // Remote mode — submit to AILF API instead of running locally.
319
347
  // Use the caller's working directory (not the package root) because
320
348
  // remote mode reads .ailf/tasks/ from the user's repo, not from
@@ -350,7 +378,7 @@ export async function executePipeline(cliOpts) {
350
378
  const pipelineStart = Date.now();
351
379
  const steps = buildStepSequence(ctx, pipelineStart);
352
380
  const result = await orchestratePipeline(ctx, steps);
353
- writePipelineResult(result);
381
+ writePipelineResult(result, o.outputDir);
354
382
  process.exit(result.success ? 0 : 1);
355
383
  }
356
384
  // ---------------------------------------------------------------------------
@@ -362,15 +390,11 @@ export async function executePipeline(cliOpts) {
362
390
  function resolveOptions(opts) {
363
391
  return computeResolvedOptions(opts);
364
392
  }
365
- function writePipelineResult(result) {
366
- const resultFile = resolve(ROOT, "results", "latest", "pipeline-result.json");
367
- try {
368
- writeFileSync(resultFile, JSON.stringify(result, null, 2));
369
- console.log(` 📄 Pipeline result: ${resultFile}\n`);
370
- }
371
- catch {
372
- // results/latest/ may not exist yet — not critical
373
- }
393
+ function writePipelineResult(result, outputDir) {
394
+ mkdirSync(outputDir, { recursive: true });
395
+ const resultFile = resolve(outputDir, "pipeline-result.json");
396
+ writeFileSync(resultFile, JSON.stringify(result, null, 2));
397
+ console.log(` 📄 Pipeline result: ${resultFile}\n`);
374
398
  }
375
399
  /**
376
400
  * Load .ailf/config.yaml if --repo-tasks-path is set and the config file
@@ -5,7 +5,7 @@
5
5
  * options object, bridges to process.env for downstream modules, and
6
6
  * delegates to runPipeline().
7
7
  *
8
- * @see docs/CLI.md for the full flag reference.
8
+ * @see docs/cli.md for the full flag reference.
9
9
  */
10
10
  import { Command } from "commander";
11
11
  /**
@@ -37,6 +37,7 @@ export interface PipelineCliOptions {
37
37
  mode: string;
38
38
  variant?: string;
39
39
  output?: string;
40
+ outputDir?: string;
40
41
  promptfooUrl?: string;
41
42
  publish?: boolean;
42
43
  publishTag?: string;
@@ -63,5 +64,9 @@ export interface PipelineCliOptions {
63
64
  url: string[];
64
65
  urls: string[];
65
66
  apiUrl?: string;
67
+ capture: boolean;
68
+ captureDir?: string;
69
+ captureCompress: boolean;
70
+ captureExtras: boolean;
66
71
  }
67
72
  export declare function createPipelineCommand(): Command;
@@ -5,7 +5,7 @@
5
5
  * options object, bridges to process.env for downstream modules, and
6
6
  * delegates to runPipeline().
7
7
  *
8
- * @see docs/CLI.md for the full flag reference.
8
+ * @see docs/cli.md for the full flag reference.
9
9
  */
10
10
  import { Command } from "commander";
11
11
  import { LiteracyVariant } from "../pipeline/normalize-mode.js";
@@ -48,11 +48,16 @@ export function createPipelineCommand() {
48
48
  .option("--report-project <id>", "Sanity project ID for report store")
49
49
  .option("--config <path>", "Load pipeline config from a TS/JS/YAML/JSON file (overrides most CLI flags)")
50
50
  .option("-o, --output <path>", "Write PR comment markdown to file")
51
+ .option("--output-dir <path>", "Base directory for pipeline output artifacts (default: inferred from execution context)")
51
52
  .option("--promptfoo-url <url>", "Promptfoo share URL for report")
52
- .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge), yaml (tasks/*.yaml files, legacy)", "content-lake")
53
+ .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)", "content-lake")
53
54
  .option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
54
55
  .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
55
56
  .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
57
+ .option("--capture", "Enable artifact capture for this run", false)
58
+ .option("--capture-dir <path>", "Base directory for capture output (default: results/captures/)")
59
+ .option("--no-capture-compress", "Disable tar.gz compression of captures")
60
+ .option("--no-capture-extras", "Exclude mode-specific artifacts from captures")
56
61
  .action(async (opts) => {
57
62
  const { executePipeline } = await import("./pipeline-action.js");
58
63
  await executePipeline(opts);
@@ -20,6 +20,7 @@ export function createPrCommentCommand() {
20
20
  try {
21
21
  const ctx = createAppContext({
22
22
  rootDir: ROOT,
23
+ outputDir: resolve(ROOT, "results", "latest"),
23
24
  mode: "literacy",
24
25
  noAutoScope: false,
25
26
  skipFetch: true,
@@ -87,6 +87,7 @@ async function runPublishCommand(summaryPath, opts) {
87
87
  noAutoScope: false,
88
88
  noCache: true,
89
89
  noRemoteCache: true,
90
+ outputDir: resolve(ROOT, "results", "latest"),
90
91
  publishEnabled: true,
91
92
  publishTag: opts.tag,
92
93
  readinessEnabled: false,
@@ -74,8 +74,8 @@ Quick Start:
74
74
 
75
75
  Documentation:
76
76
  Repository https://github.com/sanity-io/ai-literacy-framework
77
- CLI Guide https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/CLI.md
78
- Getting Started https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/GETTING_STARTED.md
77
+ CLI Guide https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/cli.md
78
+ Getting Started https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/getting-started.md
79
79
 
80
80
  Run ailf <command> --help for detailed usage of any command.`;
81
81
  // ---------------------------------------------------------------------------
@@ -0,0 +1,5 @@
1
+ /**
2
+ * update-quality-scores command — update QUALITY_SCORE.md from scores.
3
+ */
4
+ import { Command } from "commander";
5
+ export declare function createUpdateQualityScoresCommand(): Command;
@@ -0,0 +1,20 @@
1
+ /**
2
+ * update-quality-scores command — update QUALITY_SCORE.md from scores.
3
+ */
4
+ import { Command } from "commander";
5
+ export function createUpdateQualityScoresCommand() {
6
+ return new Command("update-quality-scores")
7
+ .description("Update docs/QUALITY_SCORE.md from score-summary.json")
8
+ .action(async () => {
9
+ const { updateQualityScores } = await import("../scripts/update-quality-scores.js");
10
+ console.log("=== Updating QUALITY_SCORE.md from score-summary.json ===\n");
11
+ const result = updateQualityScores();
12
+ if (result.success) {
13
+ console.log(` ✅ ${result.message}`);
14
+ }
15
+ else {
16
+ console.error(` ❌ ${result.message}`);
17
+ process.exit(1);
18
+ }
19
+ });
20
+ }
@@ -1,7 +1,7 @@
1
1
  /**
2
- * validate-tasks command — standalone validation of repo-based task YAML files.
2
+ * validate-tasks command — standalone validation of task files.
3
3
  *
4
- * Validates .ailf/tasks/*.yaml files against the RepoTaskSchema without
4
+ * Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
5
5
  * running the full pipeline. Useful for pre-commit hooks and CI checks
6
6
  * in external repos.
7
7
  *