@sanity/ailf 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (442) hide show
  1. package/canonical/grader-references/README.md +2 -2
  2. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  3. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  4. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  5. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  6. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  7. package/config/features.ts +1 -1
  8. package/config/models.ts +28 -23
  9. package/config/sources.ts +1 -1
  10. package/config/thresholds.ts +1 -1
  11. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  13. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  17. package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
  18. package/dist/_vendor/ailf-core/config-helpers.js +29 -0
  19. package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
  20. package/dist/_vendor/ailf-core/examples/index.js +208 -114
  21. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  22. package/dist/_vendor/ailf-core/index.js +1 -0
  23. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  25. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  27. package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
  28. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  29. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  30. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  31. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  32. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  33. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
  34. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
  35. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  36. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  37. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  38. package/dist/_vendor/ailf-core/services/index.js +1 -1
  39. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  40. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
  41. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  42. package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
  43. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
  44. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  45. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  46. package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
  47. package/dist/_vendor/ailf-tasks/cli.js +61 -0
  48. package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
  49. package/dist/_vendor/ailf-tasks/index.js +16 -0
  50. package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
  51. package/dist/_vendor/ailf-tasks/parser.js +73 -0
  52. package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
  53. package/dist/_vendor/ailf-tasks/schemas.js +180 -0
  54. package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
  55. package/dist/_vendor/ailf-tasks/validation.js +162 -0
  56. package/dist/adapters/api-client/remediation.js +2 -2
  57. package/dist/adapters/config-sources/file-config-adapter.js +6 -1
  58. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  59. package/dist/adapters/index.d.ts +0 -1
  60. package/dist/adapters/index.js +0 -1
  61. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  62. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  63. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  64. package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
  65. package/dist/adapters/task-sources/index.d.ts +1 -2
  66. package/dist/adapters/task-sources/index.js +1 -2
  67. package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
  68. package/dist/adapters/task-sources/repo-schemas.js +2 -2
  69. package/dist/adapters/task-sources/repo-task-source.js +1 -1
  70. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  71. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
  73. package/dist/adapters/task-sources/task-file-loader.js +20 -6
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/explain-handler.d.ts +1 -1
  95. package/dist/commands/explain-handler.js +37 -8
  96. package/dist/commands/fetch-docs.js +1 -0
  97. package/dist/commands/generate-configs.d.ts +3 -3
  98. package/dist/commands/generate-configs.js +20 -8
  99. package/dist/commands/init.d.ts +2 -3
  100. package/dist/commands/init.js +56 -170
  101. package/dist/commands/pipeline-action.d.ts +7 -1
  102. package/dist/commands/pipeline-action.js +43 -19
  103. package/dist/commands/pipeline.d.ts +6 -1
  104. package/dist/commands/pipeline.js +7 -2
  105. package/dist/commands/pr-comment.js +1 -0
  106. package/dist/commands/publish.js +1 -0
  107. package/dist/commands/shared/help.js +2 -2
  108. package/dist/commands/update-quality-scores.d.ts +5 -0
  109. package/dist/commands/update-quality-scores.js +20 -0
  110. package/dist/composition-root.d.ts +2 -3
  111. package/dist/composition-root.js +27 -14
  112. package/dist/config/features.ts +23 -0
  113. package/dist/config/models.ts +100 -0
  114. package/dist/config/prompts.ts +16 -0
  115. package/dist/config/rubrics.ts +225 -0
  116. package/dist/config/schedules.ts +47 -0
  117. package/dist/config/sinks.ts +37 -0
  118. package/dist/config/sources.ts +21 -0
  119. package/dist/config/thresholds.ts +61 -0
  120. package/dist/lib/agent-behavior-report.d.ts +8 -0
  121. package/dist/lib/agent-behavior-report.js +185 -0
  122. package/dist/lib/baseline.d.ts +19 -0
  123. package/dist/lib/baseline.js +153 -0
  124. package/dist/lib/calculate-scores.d.ts +23 -0
  125. package/dist/lib/calculate-scores.js +42 -0
  126. package/dist/lib/compare.d.ts +18 -0
  127. package/dist/lib/compare.js +170 -0
  128. package/dist/lib/coverage-audit.d.ts +4 -0
  129. package/dist/lib/coverage-audit.js +42 -0
  130. package/dist/lib/discovery-report.d.ts +13 -0
  131. package/dist/lib/discovery-report.js +57 -0
  132. package/dist/lib/fetch-docs.d.ts +30 -0
  133. package/dist/lib/fetch-docs.js +171 -0
  134. package/dist/lib/generate-configs.d.ts +25 -0
  135. package/dist/lib/generate-configs.js +42 -0
  136. package/dist/lib/grader-api.d.ts +21 -0
  137. package/dist/lib/grader-api.js +34 -0
  138. package/dist/lib/grader-compare.d.ts +19 -0
  139. package/dist/lib/grader-compare.js +91 -0
  140. package/dist/lib/grader-consistency.d.ts +27 -0
  141. package/dist/lib/grader-consistency.js +79 -0
  142. package/dist/lib/grader-sensitivity.d.ts +19 -0
  143. package/dist/lib/grader-sensitivity.js +75 -0
  144. package/dist/lib/grader-validate.d.ts +19 -0
  145. package/dist/lib/grader-validate.js +78 -0
  146. package/dist/lib/measure-retrieval.d.ts +14 -0
  147. package/dist/lib/measure-retrieval.js +71 -0
  148. package/dist/lib/pr-comment.d.ts +16 -0
  149. package/dist/lib/pr-comment.js +28 -0
  150. package/dist/lib/readiness-report.d.ts +13 -0
  151. package/dist/lib/readiness-report.js +108 -0
  152. package/dist/lib/webhook-server.d.ts +11 -0
  153. package/dist/lib/webhook-server.js +24 -0
  154. package/dist/lib/weekly-digest.d.ts +24 -0
  155. package/dist/lib/weekly-digest.js +148 -0
  156. package/dist/orchestration/build-app-context.js +13 -0
  157. package/dist/orchestration/cache-context.d.ts +23 -0
  158. package/dist/orchestration/cache-context.js +43 -0
  159. package/dist/orchestration/env-bridge.d.ts +21 -0
  160. package/dist/orchestration/env-bridge.js +66 -0
  161. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  162. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  163. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  164. package/dist/orchestration/step-runner.js +5 -1
  165. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  166. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  167. package/dist/orchestration/steps/callback-step.js +10 -1
  168. package/dist/orchestration/steps/compare-step.js +6 -3
  169. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  170. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  171. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  172. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  173. package/dist/orchestration/steps/fetch-docs-step.js +30 -16
  174. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  175. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  176. package/dist/orchestration/steps/generate-configs-step.js +50 -15
  177. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  178. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  179. package/dist/orchestration/steps/publish-report-step.js +19 -0
  180. package/dist/orchestration/steps/readiness-step.js +8 -3
  181. package/dist/orchestration/steps/report-step.js +17 -4
  182. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  183. package/dist/orchestration/steps/run-eval-step.js +51 -31
  184. package/dist/pipeline/agent-behavior-report.js +6 -0
  185. package/dist/pipeline/attribution.d.ts +1 -1
  186. package/dist/pipeline/attribution.js +1 -1
  187. package/dist/pipeline/cache.js +29 -15
  188. package/dist/pipeline/calculate-scores.d.ts +2 -0
  189. package/dist/pipeline/calculate-scores.js +70 -33
  190. package/dist/pipeline/chronic-failures.d.ts +55 -0
  191. package/dist/pipeline/chronic-failures.js +110 -0
  192. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
  193. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  194. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  195. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  196. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  197. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  198. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  199. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  200. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  201. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  202. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  203. package/dist/pipeline/compiler/config-loader.js +42 -2
  204. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  205. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  206. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  207. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  208. package/dist/pipeline/compiler/index.d.ts +2 -5
  209. package/dist/pipeline/compiler/index.js +2 -5
  210. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  211. package/dist/pipeline/compiler/literacy-bridge.js +1 -1
  212. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
  213. package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
  214. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
  215. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
  216. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
  217. package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
  218. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
  219. package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
  220. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
  221. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
  222. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
  223. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
  224. package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
  225. package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
  226. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
  227. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
  228. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
  229. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
  230. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
  231. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
  232. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
  233. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  250. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
  251. package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
  252. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  253. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  254. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  255. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  256. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  257. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  258. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  259. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  260. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  261. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  262. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  263. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  264. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  265. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  266. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  267. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  268. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  269. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  270. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  271. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  272. package/dist/pipeline/compiler/task-bridge.js +92 -0
  273. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  274. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  275. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  276. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  277. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  278. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  279. package/dist/pipeline/coverage-audit.d.ts +1 -1
  280. package/dist/pipeline/coverage-audit.js +1 -1
  281. package/dist/pipeline/degradations.d.ts +1 -1
  282. package/dist/pipeline/degradations.js +1 -1
  283. package/dist/pipeline/failure-modes.d.ts +1 -1
  284. package/dist/pipeline/failure-modes.js +13 -1
  285. package/dist/pipeline/gap-analysis.d.ts +1 -1
  286. package/dist/pipeline/gap-analysis.js +3 -1
  287. package/dist/pipeline/generate-configs.d.ts +2 -2
  288. package/dist/pipeline/generate-configs.js +15 -8
  289. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  290. package/dist/pipeline/grader-compare-runner.js +7 -1
  291. package/dist/pipeline/grader-comparison.d.ts +1 -1
  292. package/dist/pipeline/grader-comparison.js +1 -1
  293. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  294. package/dist/pipeline/grader-consistency-runner.js +7 -1
  295. package/dist/pipeline/grader-consistency.d.ts +1 -1
  296. package/dist/pipeline/grader-consistency.js +1 -1
  297. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  298. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  299. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  300. package/dist/pipeline/grader-sensitivity.js +1 -1
  301. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  302. package/dist/pipeline/grader-validate-runner.js +2 -2
  303. package/dist/pipeline/grader-validation.d.ts +1 -1
  304. package/dist/pipeline/grader-validation.js +1 -1
  305. package/dist/pipeline/map-request-to-config.js +15 -2
  306. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  307. package/dist/pipeline/mirror-repo-tasks.js +1 -1
  308. package/dist/pipeline/plan-format.d.ts +1 -1
  309. package/dist/pipeline/plan-format.js +1 -1
  310. package/dist/pipeline/plan.d.ts +1 -1
  311. package/dist/pipeline/plan.js +67 -29
  312. package/dist/pipeline/probe.d.ts +1 -1
  313. package/dist/pipeline/probe.js +1 -1
  314. package/dist/pipeline/readiness-report.d.ts +2 -2
  315. package/dist/pipeline/readiness-report.js +2 -2
  316. package/dist/pipeline/release-classification.d.ts +1 -1
  317. package/dist/pipeline/release-classification.js +1 -1
  318. package/dist/pipeline/release-report.d.ts +1 -1
  319. package/dist/pipeline/release-report.js +1 -1
  320. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  321. package/dist/pipeline/repo-eval-comment.js +1 -1
  322. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  323. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  324. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  325. package/dist/pipeline/resolve-mappings.js +44 -44
  326. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  327. package/dist/pipeline/retrieval-metrics.js +28 -20
  328. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  329. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  330. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  331. package/dist/pipeline/steps/compare-step.js +90 -0
  332. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  333. package/dist/pipeline/steps/eval-step.js +347 -0
  334. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  335. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  336. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  337. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  338. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  339. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  340. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  341. package/dist/pipeline/steps/publish-report-step.js +243 -0
  342. package/dist/pipeline/steps/report-step.d.ts +13 -0
  343. package/dist/pipeline/steps/report-step.js +56 -0
  344. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  345. package/dist/pipeline/steps/update-scores-step.js +42 -0
  346. package/dist/pipeline/targeted-loo.d.ts +1 -1
  347. package/dist/pipeline/targeted-loo.js +1 -1
  348. package/dist/pipeline/thresholds.d.ts +1 -1
  349. package/dist/pipeline/thresholds.js +1 -1
  350. package/dist/pipeline/validate.js +13 -0
  351. package/dist/report-store.d.ts +17 -0
  352. package/dist/report-store.js +24 -0
  353. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  354. package/dist/scripts/agent-behavior-report.js +315 -0
  355. package/dist/scripts/baseline.d.ts +43 -0
  356. package/dist/scripts/baseline.js +267 -0
  357. package/dist/scripts/calculate-scores.d.ts +166 -0
  358. package/dist/scripts/calculate-scores.js +1296 -0
  359. package/dist/scripts/compare.d.ts +22 -0
  360. package/dist/scripts/compare.js +334 -0
  361. package/dist/scripts/coverage-audit.d.ts +44 -0
  362. package/dist/scripts/coverage-audit.js +209 -0
  363. package/dist/scripts/debug-eval.d.ts +19 -0
  364. package/dist/scripts/debug-eval.js +73 -0
  365. package/dist/scripts/discovery-report.d.ts +58 -0
  366. package/dist/scripts/discovery-report.js +250 -0
  367. package/dist/scripts/fetch-docs.d.ts +35 -0
  368. package/dist/scripts/fetch-docs.js +472 -0
  369. package/dist/scripts/generate-configs.d.ts +66 -0
  370. package/dist/scripts/generate-configs.js +459 -0
  371. package/dist/scripts/grader-api.d.ts +27 -0
  372. package/dist/scripts/grader-api.js +206 -0
  373. package/dist/scripts/grader-compare.d.ts +22 -0
  374. package/dist/scripts/grader-compare.js +368 -0
  375. package/dist/scripts/grader-consistency.d.ts +20 -0
  376. package/dist/scripts/grader-consistency.js +313 -0
  377. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  378. package/dist/scripts/grader-sensitivity.js +354 -0
  379. package/dist/scripts/grader-validate.d.ts +19 -0
  380. package/dist/scripts/grader-validate.js +267 -0
  381. package/dist/scripts/measure-retrieval.d.ts +10 -0
  382. package/dist/scripts/measure-retrieval.js +145 -0
  383. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  384. package/dist/scripts/migrate-task-mode.js +1 -1
  385. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  386. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  387. package/dist/scripts/pipeline.d.ts +76 -0
  388. package/dist/scripts/pipeline.js +1031 -0
  389. package/dist/scripts/pr-comment.d.ts +10 -0
  390. package/dist/scripts/pr-comment.js +510 -0
  391. package/dist/scripts/readiness-report.d.ts +88 -0
  392. package/dist/scripts/readiness-report.js +342 -0
  393. package/dist/scripts/update-quality-scores.d.ts +15 -0
  394. package/dist/scripts/update-quality-scores.js +184 -0
  395. package/dist/scripts/validate-task-sources.d.ts +1 -1
  396. package/dist/scripts/validate-task-sources.js +1 -1
  397. package/dist/scripts/validate.d.ts +13 -0
  398. package/dist/scripts/validate.js +79 -0
  399. package/dist/scripts/webhook-server.d.ts +26 -0
  400. package/dist/scripts/webhook-server.js +147 -0
  401. package/dist/scripts/weekly-digest.d.ts +24 -0
  402. package/dist/scripts/weekly-digest.js +144 -0
  403. package/dist/sinks/format-slack.d.ts +64 -0
  404. package/dist/sinks/format-slack.js +306 -0
  405. package/dist/sinks/slack-sink.d.ts +27 -0
  406. package/dist/sinks/slack-sink.js +78 -0
  407. package/dist/sinks/types.d.ts +1 -1
  408. package/dist/sinks/types.js +1 -1
  409. package/dist/sinks/webhook-sink.d.ts +19 -0
  410. package/dist/sinks/webhook-sink.js +50 -0
  411. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  412. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  413. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  414. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  415. package/dist/tasks/literacy/functions.task.ts +70 -0
  416. package/dist/tasks/literacy/groq.task.ts +259 -0
  417. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  418. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  419. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  420. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  421. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  422. package/package.json +24 -24
  423. package/tasks/.expanded.agentic.yaml +280 -0
  424. package/tasks/.expanded.yaml +565 -0
  425. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  426. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  427. package/tasks/literacy/content-lake.task.ts +181 -0
  428. package/tasks/literacy/frameworks.task.ts +1 -0
  429. package/tasks/literacy/functions.task.ts +1 -0
  430. package/tasks/literacy/groq.task.ts +1 -0
  431. package/tasks/literacy/image-handling.task.ts +95 -0
  432. package/tasks/literacy/nextjs-live.task.ts +2 -1
  433. package/tasks/literacy/portable-text.task.ts +169 -0
  434. package/tasks/literacy/studio-setup.task.ts +5 -2
  435. package/tasks/literacy/visual-editing.task.ts +1 -0
  436. package/LICENSE +0 -21
  437. package/tasks/frameworks.yaml +0 -98
  438. package/tasks/functions.yaml +0 -51
  439. package/tasks/groq.yaml +0 -216
  440. package/tasks/nextjs-live.yaml +0 -62
  441. package/tasks/studio-setup.yaml +0 -111
  442. package/tasks/visual-editing.yaml +0 -120
@@ -6,9 +6,8 @@
6
6
  * not live evaluation tasks.
7
7
  *
8
8
  * TypeScript output (default) uses define* helpers from @sanity/ailf-core
9
- * for full IDE autocomplete and type checking. YAML output preserves
10
- * inline comments from the source files. JSON output is a plain
11
- * serialization of the parsed data.
9
+ * for full IDE autocomplete and type checking. YAML output serializes the
10
+ * parsed task data. JSON output is a plain serialization of the parsed data.
12
11
  *
13
12
  * Usage:
14
13
  * ailf init # TypeScript output (default)
@@ -6,9 +6,8 @@
6
6
  * not live evaluation tasks.
7
7
  *
8
8
  * TypeScript output (default) uses define* helpers from @sanity/ailf-core
9
- * for full IDE autocomplete and type checking. YAML output preserves
10
- * inline comments from the source files. JSON output is a plain
11
- * serialization of the parsed data.
9
+ * for full IDE autocomplete and type checking. YAML output serializes the
10
+ * parsed task data. JSON output is a plain serialization of the parsed data.
12
11
  *
13
12
  * Usage:
14
13
  * ailf init # TypeScript output (default)
@@ -20,7 +19,7 @@
20
19
  import { Command } from "commander";
21
20
  import { existsSync, mkdirSync, writeFileSync } from "fs";
22
21
  import { resolve, relative } from "path";
23
- import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_TS_FILE_NAMES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
22
+ import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_EXAMPLES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
24
23
  // ---------------------------------------------------------------------------
25
24
  // Command factory
26
25
  // ---------------------------------------------------------------------------
@@ -54,6 +53,10 @@ function rel(from, to) {
54
53
  const r = relative(from, to);
55
54
  return r.startsWith(".") ? r : `./${r}`;
56
55
  }
56
+ /** Filter task stems by mode using TASK_EXAMPLES metadata */
57
+ function taskStemsForMode(mode) {
58
+ return TASK_EXAMPLES.filter((t) => t.mode === mode).map((t) => t.stem);
59
+ }
57
60
  // ---------------------------------------------------------------------------
58
61
  // Init logic
59
62
  // ---------------------------------------------------------------------------
@@ -66,6 +69,11 @@ async function runInit(opts) {
66
69
  }
67
70
  const format = opts.outputFormat;
68
71
  const force = opts.force;
72
+ if (format === "yaml") {
73
+ console.warn(" ⚠ --output-format yaml is deprecated. TypeScript (default) is the\n" +
74
+ " recommended format — it provides full IDE autocomplete via defineTask().\n" +
75
+ " YAML output will be removed in a future release.\n");
76
+ }
69
77
  // Resolve target from the caller's actual working directory
70
78
  const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
71
79
  const targetDir = resolve(callerCwd, opts.path);
@@ -115,71 +123,52 @@ async function runInit(opts) {
115
123
  // 3. Write example tasks to .ailf/tasks/
116
124
  const modeFilter = opts.mode;
117
125
  const isCustomMode = modeFilter === "custom";
126
+ // Determine which task stems to write based on mode filter
127
+ let stemsToWrite;
128
+ if (isCustomMode) {
129
+ // Custom mode: write one literacy example as a starting point
130
+ stemsToWrite = taskStemsForMode("literacy").slice(0, 1);
131
+ }
132
+ else if (modeFilter === "literacy") {
133
+ stemsToWrite = taskStemsForMode("literacy");
134
+ }
135
+ else if (modeFilter === "mcp-server") {
136
+ stemsToWrite = taskStemsForMode("mcp-server");
137
+ }
138
+ else if (modeFilter === "knowledge-probe") {
139
+ stemsToWrite = taskStemsForMode("knowledge-probe");
140
+ }
141
+ else {
142
+ // Default (no --mode): write all tasks
143
+ stemsToWrite = [...TASK_FILE_NAMES];
144
+ }
118
145
  if (format === "ts") {
119
- // TypeScript: *.task.ts files with defineTask helper
120
- // Default (no --mode): write literacy examples + draft MCP/probe examples
121
- // --mode literacy: only literacy examples
122
- // --mode mcp-server: only MCP examples (active, not draft)
123
- // --mode custom: only a custom example task
124
- if (!modeFilter || modeFilter === "literacy") {
125
- for (const stem of TASK_TS_FILE_NAMES) {
126
- const taskPath = resolve(tasksDir, `${stem}.task.ts`);
127
- const content = taskTsFiles[stem];
128
- if (writeIfNew(taskPath, content, force)) {
129
- written.push(rel(targetDir, taskPath));
130
- }
131
- else {
132
- skipped.push(rel(targetDir, taskPath));
133
- }
134
- }
135
- }
136
- // Draft examples for other modes (default init only)
137
- if (!modeFilter) {
138
- const mcpPath = resolve(tasksDir, "example-mcp-tool-usage.task.ts");
139
- if (writeIfNew(mcpPath, MCP_DRAFT_TASK_TS, force)) {
140
- written.push(rel(targetDir, mcpPath));
141
- }
142
- else {
143
- skipped.push(rel(targetDir, mcpPath));
144
- }
145
- const probePath = resolve(tasksDir, "example-knowledge-probe.task.ts");
146
- if (writeIfNew(probePath, PROBE_DRAFT_TASK_TS, force)) {
147
- written.push(rel(targetDir, probePath));
148
- }
149
- else {
150
- skipped.push(rel(targetDir, probePath));
146
+ for (const stem of stemsToWrite) {
147
+ let content = taskTsFiles[stem];
148
+ if (!content)
149
+ continue;
150
+ // For MCP-only init, activate the draft task
151
+ if (modeFilter === "mcp-server") {
152
+ content = content.replace('status: "draft",', '// status: "active", // Activated — this task runs in evaluations');
151
153
  }
152
- }
153
- // MCP-only init
154
- if (modeFilter === "mcp-server") {
155
- const mcpContent = MCP_DRAFT_TASK_TS.replace('status: "draft",', '// status: "active", // Activated — this task runs in evaluations');
156
- const mcpPath = resolve(tasksDir, "example-mcp-tool-usage.task.ts");
157
- if (writeIfNew(mcpPath, mcpContent, force)) {
158
- written.push(rel(targetDir, mcpPath));
154
+ const fileName = isCustomMode && stem === stemsToWrite[0]
155
+ ? "example-custom.task.ts"
156
+ : `${stem}.task.ts`;
157
+ const taskPath = resolve(tasksDir, fileName);
158
+ if (writeIfNew(taskPath, content, force)) {
159
+ written.push(rel(targetDir, taskPath));
159
160
  }
160
161
  else {
161
- skipped.push(rel(targetDir, mcpPath));
162
- }
163
- }
164
- // Custom preset scaffold
165
- if (isCustomMode) {
166
- const customTaskPath = resolve(tasksDir, "example-custom.task.ts");
167
- // Reuse the GROQ literacy task as a starting point
168
- if (taskTsFiles[TASK_TS_FILE_NAMES[0]]) {
169
- if (writeIfNew(customTaskPath, taskTsFiles[TASK_TS_FILE_NAMES[0]], force)) {
170
- written.push(rel(targetDir, customTaskPath));
171
- }
172
- else {
173
- skipped.push(rel(targetDir, customTaskPath));
174
- }
162
+ skipped.push(rel(targetDir, taskPath));
175
163
  }
176
164
  }
177
165
  }
178
166
  else if (format === "yaml") {
179
- // YAML: raw string passthrough (preserves comments)
180
- for (const stem of TASK_FILE_NAMES) {
181
- const taskPath = resolve(tasksDir, `${stem}.yaml`);
167
+ for (const stem of stemsToWrite) {
182
168
  const content = taskYamlFiles[stem];
169
+ if (!content)
170
+ continue;
171
+ const taskPath = resolve(tasksDir, `${stem}.yaml`);
183
172
  if (writeIfNew(taskPath, content, force)) {
184
173
  written.push(rel(targetDir, taskPath));
185
174
  }
@@ -193,8 +182,12 @@ async function runInit(opts) {
193
182
  const tasks = Array.isArray(allTaskData)
194
183
  ? allTaskData
195
184
  : [allTaskData];
185
+ // Build a set of task IDs that match the selected stems
186
+ const selectedIds = new Set(stemsToWrite.flatMap((s) => TASK_EXAMPLES.filter((t) => t.stem === s).map((t) => t.stem)));
196
187
  for (const task of tasks) {
197
188
  const taskId = task.id;
189
+ if (!selectedIds.has(taskId))
190
+ continue;
198
191
  const taskPath = resolve(tasksDir, `${taskId}.json`);
199
192
  const content = JSON.stringify([task], null, 2) + "\n";
200
193
  if (writeIfNew(taskPath, content, force)) {
@@ -263,8 +256,7 @@ async function runInit(opts) {
263
256
  if (format === "ts") {
264
257
  console.log();
265
258
  console.log(` 💡 TypeScript tasks (${taskExt}) give you full IDE autocomplete`);
266
- console.log(" via defineTask() from @sanity/ailf-core. YAML and JSON are");
267
- console.log(" also supported — re-run with --output-format yaml if preferred.");
259
+ console.log(" via defineTask() from @sanity/ailf-core.");
268
260
  }
269
261
  console.log();
270
262
  console.log(" 🔑 Retrieve secrets from 1Password (Sanity employees):");
@@ -282,114 +274,8 @@ async function runInit(opts) {
282
274
  console.log();
283
275
  }
284
276
  // ---------------------------------------------------------------------------
285
- // Draft example templates for non-literacy modes
277
+ // Custom preset scaffold template
286
278
  // ---------------------------------------------------------------------------
287
- const MCP_DRAFT_TASK_TS = `/**
288
- * Example Task: MCP Server tool-use evaluation (DRAFT).
289
- *
290
- * Tests whether an LLM can correctly discover and invoke Sanity MCP server
291
- * tools. Connects to the hosted Sanity MCP server at https://mcp.sanity.io.
292
- *
293
- * Prerequisites:
294
- * - A Sanity API token with read access (for token-based auth)
295
- * - Or: OAuth authentication will be prompted on first connect
296
- *
297
- * Authentication options:
298
- * 1. Token-based: set SANITY_API_TOKEN env var
299
- * 2. OAuth: the server prompts for login on first connect
300
- *
301
- * Setup: npx sanity@latest mcp configure
302
- * Docs: https://www.sanity.io/docs/ai/mcp-server
303
- *
304
- * This task is a DRAFT — it won't run unless activated or explicitly targeted.
305
- * To activate: change status to "active" or remove the status field.
306
- */
307
-
308
- import { defineTask } from "../_vendor/ailf-core/index.js"
309
-
310
- export default defineTask({
311
- mode: "mcp-server",
312
- id: "example-mcp-tool-usage",
313
- title: "MCP tool discovery and invocation",
314
- description: "Example — tests Sanity MCP server tool-use (draft)",
315
- area: "mcp",
316
-
317
- // ── Server configuration ────────────────────────────────────
318
- // The Sanity MCP server is hosted remotely at https://mcp.sanity.io.
319
- // Authentication via API token header or OAuth.
320
- //
321
- // For token auth, set SANITY_API_TOKEN in your environment.
322
- serverConfig: {
323
- transport: "streamable-http",
324
- url: "https://mcp.sanity.io",
325
- env: {
326
- SANITY_API_TOKEN: process.env.SANITY_API_TOKEN ?? "",
327
- },
328
- },
329
-
330
- prompt: {
331
- text: \`Use the available MCP tools to query all documents of type "article"
332
- in the Sanity dataset. Return the title and slug for each document.
333
- Limit results to 5 documents.\`,
334
- },
335
-
336
- assertions: [
337
- {
338
- type: "llm-rubric",
339
- template: "mcp-input-validation",
340
- criteria: [
341
- "Correctly identifies the query_documents tool",
342
- "Passes a valid GROQ query to filter by document type",
343
- "Requests only the needed fields (title, slug)",
344
- ],
345
- },
346
- ],
347
-
348
- status: "draft",
349
- })
350
- `;
351
- const PROBE_DRAFT_TASK_TS = `/**
352
- * Example Task: Knowledge probe baseline (DRAFT).
353
- *
354
- * Tests what the model knows about a topic without providing documentation.
355
- * Used to establish a baseline for comparison with literacy evaluations.
356
- * This task is a DRAFT — it won't run unless activated or explicitly targeted.
357
- *
358
- * To activate: change status to "active" or remove the status field.
359
- */
360
-
361
- import { defineTask } from "../_vendor/ailf-core/index.js"
362
-
363
- export default defineTask({
364
- mode: "knowledge-probe",
365
- id: "example-knowledge-probe",
366
- title: "Model knowledge of GROQ syntax",
367
- description: "Example — probes baseline model knowledge (draft)",
368
- area: "groq",
369
-
370
- prompt: {
371
- text: \`Explain the GROQ query language used by Sanity. Cover:
372
- 1. Basic query syntax and projections
373
- 2. How to filter and sort results
374
- 3. Common patterns for fetching related documents
375
- Provide working code examples.\`,
376
- },
377
-
378
- assertions: [
379
- {
380
- type: "llm-rubric",
381
- template: "task-completion",
382
- criteria: [
383
- "Demonstrates understanding of GROQ query syntax",
384
- "Shows filtering and projection patterns",
385
- "Code examples use valid GROQ syntax",
386
- ],
387
- },
388
- ],
389
-
390
- status: "draft",
391
- })
392
- `;
393
279
  const CUSTOM_PRESET_TS = `/**
394
280
  * Custom preset — your domain-specific evaluation configuration.
395
281
  *
@@ -401,7 +287,7 @@ const CUSTOM_PRESET_TS = `/**
401
287
  * To use a different mode (e.g., "mcp-server"), change the mode field.
402
288
  * Available built-in modes: literacy, mcp-server, knowledge-probe, agent-harness.
403
289
  *
404
- * @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/PRESETS.md
290
+ * @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/presets.md
405
291
  */
406
292
 
407
293
  import { definePreset } from "../_vendor/ailf-core/index.js"
@@ -36,6 +36,8 @@ export interface ResolvedOptions {
36
36
  noAutoScope: boolean;
37
37
  noCache: boolean;
38
38
  noRemoteCache: boolean;
39
+ /** Base directory for user-facing pipeline output artifacts (always resolved). */
40
+ outputDir: string;
39
41
  outputPath?: string;
40
42
  perspectiveOverride?: string;
41
43
  projectIdOverride?: string;
@@ -57,10 +59,14 @@ export interface ResolvedOptions {
57
59
  repoTasksPath?: string;
58
60
  taskOption?: string;
59
61
  tagOption?: string[];
60
- taskSourceType?: "content-lake" | "repo" | "yaml";
62
+ taskSourceType?: "content-lake" | "repo";
61
63
  urlArgs: string[];
62
64
  apiUrl: string;
63
65
  apiKey?: string;
66
+ captureEnabled: boolean;
67
+ captureDir?: string;
68
+ captureCompress: boolean;
69
+ captureExtras: boolean;
64
70
  }
65
71
  /**
66
72
  * Pure option resolution — computes ResolvedOptions from CLI flags without
@@ -10,7 +10,7 @@
10
10
  *
11
11
  * @see packages/eval/src/orchestration/ for the step-based pipeline
12
12
  */
13
- import { existsSync, readFileSync, writeFileSync } from "fs";
13
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
14
14
  import { dirname, resolve } from "path";
15
15
  import { fileURLToPath } from "url";
16
16
  import { classifyUrls } from "../pipeline/classify-url.js";
@@ -209,6 +209,23 @@ export function computeResolvedOptions(opts) {
209
209
  const remote = opts.remote || process.env.AILF_REMOTE === "1";
210
210
  const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
211
211
  const apiKey = process.env.AILF_API_KEY ?? undefined;
212
+ // Output directory: explicit flag → repo-task heuristic → default
213
+ const resolvedRepoTasksPath = opts.repoTasksPath
214
+ ? resolve(callerCwd, opts.repoTasksPath)
215
+ : undefined;
216
+ const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
217
+ let outputDir;
218
+ if (opts.outputDir) {
219
+ outputDir = resolve(callerCwd, opts.outputDir);
220
+ }
221
+ else if (resolvedTaskSourceType === "repo" || resolvedRepoTasksPath) {
222
+ outputDir = resolvedRepoTasksPath
223
+ ? resolve(resolvedRepoTasksPath, "..", "results", "latest")
224
+ : resolve(callerCwd, ".ailf", "results", "latest");
225
+ }
226
+ else {
227
+ outputDir = resolve(ROOT, "results", "latest");
228
+ }
212
229
  return {
213
230
  allowedOriginArgs,
214
231
  apiKey,
@@ -233,6 +250,7 @@ export function computeResolvedOptions(opts) {
233
250
  noAutoScope: opts.autoScope === false,
234
251
  noCache: !opts.cache,
235
252
  noRemoteCache: opts.remoteCache === false,
253
+ outputDir,
236
254
  outputPath: opts.output,
237
255
  perspectiveOverride,
238
256
  projectIdOverride,
@@ -250,24 +268,25 @@ export function computeResolvedOptions(opts) {
250
268
  skipFetch: opts.skipFetch,
251
269
  source: opts.source,
252
270
  studioOriginOverride,
253
- repoTasksPath: opts.repoTasksPath
254
- ? resolve(callerCwd, opts.repoTasksPath)
255
- : undefined,
271
+ repoTasksPath: resolvedRepoTasksPath,
256
272
  taskOption,
257
273
  tagOption,
258
- taskSourceType: resolveTaskSourceType(opts.taskSource),
274
+ taskSourceType: resolvedTaskSourceType,
259
275
  urlArgs,
276
+ captureEnabled: opts.capture || process.env.AILF_CAPTURE === "1",
277
+ captureDir: opts.captureDir ?? process.env.AILF_CAPTURE_DIR,
278
+ captureCompress: opts.captureCompress !== false &&
279
+ process.env.AILF_CAPTURE_COMPRESS !== "0",
280
+ captureExtras: opts.captureExtras !== false && process.env.AILF_CAPTURE_EXTRAS !== "0",
260
281
  };
261
282
  }
262
283
  /** Resolve and validate the --task-source flag value. */
263
284
  function resolveTaskSourceType(raw) {
264
285
  if (!raw || raw === "content-lake")
265
286
  return undefined; // default — Content Lake
266
- if (raw === "yaml")
267
- return "yaml";
268
287
  if (raw === "repo")
269
288
  return "repo";
270
- console.error(`❌ Invalid --task-source "${raw}". Must be "yaml", "repo", or "content-lake".`);
289
+ console.error(`❌ Invalid --task-source "${raw}". Must be "repo" or "content-lake".`);
271
290
  process.exit(1);
272
291
  }
273
292
  // ---------------------------------------------------------------------------
@@ -304,17 +323,26 @@ export async function executePipeline(cliOpts) {
304
323
  if (cliOpts.output) {
305
324
  config.outputPath = resolve(callerCwd, cliOpts.output);
306
325
  }
326
+ // Output dir: explicit CLI flag → repo-task heuristic → file-config default
327
+ if (cliOpts.outputDir) {
328
+ config.outputDir = resolve(callerCwd, cliOpts.outputDir);
329
+ }
330
+ else if (config.repoTasksPath) {
331
+ config.outputDir = resolve(config.repoTasksPath, "..", "results", "latest");
332
+ }
307
333
  // Create AppContext directly from the merged config so adapters
308
334
  // (especially taskSource) are wired from the file config's
309
335
  // taskSourceType — not from CLI defaults.
336
+ console.log(` 📂 Output directory: ${config.outputDir}`);
310
337
  const ctx = createAppContext(config);
311
338
  const pipelineStart = Date.now();
312
339
  const steps = buildStepSequence(ctx, pipelineStart);
313
340
  const result = await orchestratePipeline(ctx, steps);
314
- writePipelineResult(result);
341
+ writePipelineResult(result, config.outputDir);
315
342
  process.exit(result.success ? 0 : 1);
316
343
  }
317
344
  const o = resolveOptions(cliOpts);
345
+ console.log(` 📂 Output directory: ${o.outputDir}`);
318
346
  // Remote mode — submit to AILF API instead of running locally.
319
347
  // Use the caller's working directory (not the package root) because
320
348
  // remote mode reads .ailf/tasks/ from the user's repo, not from
@@ -350,7 +378,7 @@ export async function executePipeline(cliOpts) {
350
378
  const pipelineStart = Date.now();
351
379
  const steps = buildStepSequence(ctx, pipelineStart);
352
380
  const result = await orchestratePipeline(ctx, steps);
353
- writePipelineResult(result);
381
+ writePipelineResult(result, o.outputDir);
354
382
  process.exit(result.success ? 0 : 1);
355
383
  }
356
384
  // ---------------------------------------------------------------------------
@@ -362,15 +390,11 @@ export async function executePipeline(cliOpts) {
362
390
  function resolveOptions(opts) {
363
391
  return computeResolvedOptions(opts);
364
392
  }
365
- function writePipelineResult(result) {
366
- const resultFile = resolve(ROOT, "results", "latest", "pipeline-result.json");
367
- try {
368
- writeFileSync(resultFile, JSON.stringify(result, null, 2));
369
- console.log(` 📄 Pipeline result: ${resultFile}\n`);
370
- }
371
- catch {
372
- // results/latest/ may not exist yet — not critical
373
- }
393
+ function writePipelineResult(result, outputDir) {
394
+ mkdirSync(outputDir, { recursive: true });
395
+ const resultFile = resolve(outputDir, "pipeline-result.json");
396
+ writeFileSync(resultFile, JSON.stringify(result, null, 2));
397
+ console.log(` 📄 Pipeline result: ${resultFile}\n`);
374
398
  }
375
399
  /**
376
400
  * Load .ailf/config.yaml if --repo-tasks-path is set and the config file
@@ -5,7 +5,7 @@
5
5
  * options object, bridges to process.env for downstream modules, and
6
6
  * delegates to runPipeline().
7
7
  *
8
- * @see docs/CLI.md for the full flag reference.
8
+ * @see docs/cli.md for the full flag reference.
9
9
  */
10
10
  import { Command } from "commander";
11
11
  /**
@@ -37,6 +37,7 @@ export interface PipelineCliOptions {
37
37
  mode: string;
38
38
  variant?: string;
39
39
  output?: string;
40
+ outputDir?: string;
40
41
  promptfooUrl?: string;
41
42
  publish?: boolean;
42
43
  publishTag?: string;
@@ -63,5 +64,9 @@ export interface PipelineCliOptions {
63
64
  url: string[];
64
65
  urls: string[];
65
66
  apiUrl?: string;
67
+ capture: boolean;
68
+ captureDir?: string;
69
+ captureCompress: boolean;
70
+ captureExtras: boolean;
66
71
  }
67
72
  export declare function createPipelineCommand(): Command;
@@ -5,7 +5,7 @@
5
5
  * options object, bridges to process.env for downstream modules, and
6
6
  * delegates to runPipeline().
7
7
  *
8
- * @see docs/CLI.md for the full flag reference.
8
+ * @see docs/cli.md for the full flag reference.
9
9
  */
10
10
  import { Command } from "commander";
11
11
  import { LiteracyVariant } from "../pipeline/normalize-mode.js";
@@ -48,11 +48,16 @@ export function createPipelineCommand() {
48
48
  .option("--report-project <id>", "Sanity project ID for report store")
49
49
  .option("--config <path>", "Load pipeline config from a TS/JS/YAML/JSON file (overrides most CLI flags)")
50
50
  .option("-o, --output <path>", "Write PR comment markdown to file")
51
+ .option("--output-dir <path>", "Base directory for pipeline output artifacts (default: inferred from execution context)")
51
52
  .option("--promptfoo-url <url>", "Promptfoo share URL for report")
52
- .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge), yaml (tasks/*.yaml files, legacy)", "content-lake")
53
+ .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)", "content-lake")
53
54
  .option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
54
55
  .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
55
56
  .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
57
+ .option("--capture", "Enable artifact capture for this run", false)
58
+ .option("--capture-dir <path>", "Base directory for capture output (default: results/captures/)")
59
+ .option("--no-capture-compress", "Disable tar.gz compression of captures")
60
+ .option("--no-capture-extras", "Exclude mode-specific artifacts from captures")
56
61
  .action(async (opts) => {
57
62
  const { executePipeline } = await import("./pipeline-action.js");
58
63
  await executePipeline(opts);
@@ -20,6 +20,7 @@ export function createPrCommentCommand() {
20
20
  try {
21
21
  const ctx = createAppContext({
22
22
  rootDir: ROOT,
23
+ outputDir: resolve(ROOT, "results", "latest"),
23
24
  mode: "literacy",
24
25
  noAutoScope: false,
25
26
  skipFetch: true,
@@ -87,6 +87,7 @@ async function runPublishCommand(summaryPath, opts) {
87
87
  noAutoScope: false,
88
88
  noCache: true,
89
89
  noRemoteCache: true,
90
+ outputDir: resolve(ROOT, "results", "latest"),
90
91
  publishEnabled: true,
91
92
  publishTag: opts.tag,
92
93
  readinessEnabled: false,
@@ -74,8 +74,8 @@ Quick Start:
74
74
 
75
75
  Documentation:
76
76
  Repository https://github.com/sanity-io/ai-literacy-framework
77
- CLI Guide https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/CLI.md
78
- Getting Started https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/GETTING_STARTED.md
77
+ CLI Guide https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/cli.md
78
+ Getting Started https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/getting-started.md
79
79
 
80
80
  Run ailf <command> --help for detailed usage of any command.`;
81
81
  // ---------------------------------------------------------------------------
@@ -0,0 +1,5 @@
1
+ /**
2
+ * update-quality-scores command — update QUALITY_SCORE.md from scores.
3
+ */
4
+ import { Command } from "commander";
5
+ export declare function createUpdateQualityScoresCommand(): Command;
@@ -0,0 +1,20 @@
1
+ /**
2
+ * update-quality-scores command — update QUALITY_SCORE.md from scores.
3
+ */
4
+ import { Command } from "commander";
5
+ export function createUpdateQualityScoresCommand() {
6
+ return new Command("update-quality-scores")
7
+ .description("Update docs/QUALITY_SCORE.md from score-summary.json")
8
+ .action(async () => {
9
+ const { updateQualityScores } = await import("../scripts/update-quality-scores.js");
10
+ console.log("=== Updating QUALITY_SCORE.md from score-summary.json ===\n");
11
+ const result = updateQualityScores();
12
+ if (result.success) {
13
+ console.log(` ✅ ${result.message}`);
14
+ }
15
+ else {
16
+ console.error(` ❌ ${result.message}`);
17
+ process.exit(1);
18
+ }
19
+ });
20
+ }
@@ -13,15 +13,14 @@
13
13
  * - After: one factory, one place to change adapter wiring
14
14
  *
15
15
  * @see packages/core/src/ports/context.ts — AppContext interface
16
- * @see docs/exec-plans/ports-and-adapters/phase-7-composition-root.md
16
+ * @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
17
17
  */
18
18
  import { type AppContext, type AssertionRegistration, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
19
19
  /**
20
20
  * Create a fully wired AppContext from resolved configuration.
21
21
  *
22
22
  * Every adapter is constructed here and nowhere else (outside of tests).
23
- * Swapping an adapter (e.g., YamlTaskSource ContentLakeTaskSource)
24
- * is a one-line change in this function.
23
+ * Swapping an adapter is a one-line change in this function.
25
24
  */
26
25
  export declare function createAppContext(config: ResolvedConfig): AppContext;
27
26
  /**