@sanity/ailf 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (499) hide show
  1. package/README.md +0 -1
  2. package/canonical/grader-references/README.md +2 -2
  3. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  4. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  5. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  6. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  7. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  8. package/config/features.ts +1 -1
  9. package/config/models.ts +29 -12
  10. package/config/sources.ts +1 -1
  11. package/config/thresholds.ts +1 -1
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  13. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  17. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  18. package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
  19. package/dist/_vendor/ailf-core/config-helpers.js +51 -2
  20. package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
  21. package/dist/_vendor/ailf-core/examples/index.js +213 -94
  22. package/dist/_vendor/ailf-core/index.d.ts +3 -2
  23. package/dist/_vendor/ailf-core/index.js +2 -1
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  25. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  27. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  28. package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
  29. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  30. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  31. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  32. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  33. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  34. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  35. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  40. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  41. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  42. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  43. package/dist/_vendor/ailf-core/services/index.js +1 -1
  44. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  47. package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
  48. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
  49. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  50. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  51. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  52. package/dist/adapters/api-client/remediation.js +2 -2
  53. package/dist/adapters/config-sources/file-config-adapter.js +7 -1
  54. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  55. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  56. package/dist/adapters/index.d.ts +0 -1
  57. package/dist/adapters/index.js +0 -1
  58. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  59. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  60. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  61. package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
  62. package/dist/adapters/task-sources/index.d.ts +3 -4
  63. package/dist/adapters/task-sources/index.js +3 -4
  64. package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
  65. package/dist/adapters/task-sources/repo-schemas.js +228 -20
  66. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  67. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  68. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  69. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  70. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  71. package/dist/adapters/task-sources/repo-validation.js +126 -5
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
  73. package/dist/adapters/task-sources/task-file-loader.js +21 -7
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/coverage-audit.js +3 -1
  95. package/dist/commands/explain-handler.d.ts +1 -1
  96. package/dist/commands/explain-handler.js +37 -8
  97. package/dist/commands/fetch-docs.js +1 -0
  98. package/dist/commands/generate-configs.d.ts +3 -3
  99. package/dist/commands/generate-configs.js +20 -8
  100. package/dist/commands/init.d.ts +5 -4
  101. package/dist/commands/init.js +190 -25
  102. package/dist/commands/pipeline-action.d.ts +7 -1
  103. package/dist/commands/pipeline-action.js +43 -19
  104. package/dist/commands/pipeline.d.ts +6 -1
  105. package/dist/commands/pipeline.js +7 -2
  106. package/dist/commands/pr-comment.js +1 -0
  107. package/dist/commands/publish.js +1 -0
  108. package/dist/commands/shared/help.js +2 -2
  109. package/dist/commands/update-quality-scores.d.ts +5 -0
  110. package/dist/commands/update-quality-scores.js +20 -0
  111. package/dist/commands/validate-tasks.d.ts +2 -2
  112. package/dist/commands/validate-tasks.js +26 -15
  113. package/dist/composition-root.d.ts +15 -4
  114. package/dist/composition-root.js +100 -55
  115. package/dist/config/features.ts +23 -0
  116. package/dist/config/models.ts +100 -0
  117. package/dist/config/prompts.ts +16 -0
  118. package/dist/config/rubrics.ts +225 -0
  119. package/dist/config/schedules.ts +47 -0
  120. package/dist/config/sinks.ts +37 -0
  121. package/dist/config/sources.ts +21 -0
  122. package/dist/config/thresholds.ts +61 -0
  123. package/dist/index.d.ts +41 -0
  124. package/dist/index.js +48 -0
  125. package/dist/lib/agent-behavior-report.d.ts +8 -0
  126. package/dist/lib/agent-behavior-report.js +185 -0
  127. package/dist/lib/baseline.d.ts +19 -0
  128. package/dist/lib/baseline.js +153 -0
  129. package/dist/lib/calculate-scores.d.ts +23 -0
  130. package/dist/lib/calculate-scores.js +42 -0
  131. package/dist/lib/compare.d.ts +18 -0
  132. package/dist/lib/compare.js +170 -0
  133. package/dist/lib/coverage-audit.d.ts +4 -0
  134. package/dist/lib/coverage-audit.js +42 -0
  135. package/dist/lib/discovery-report.d.ts +13 -0
  136. package/dist/lib/discovery-report.js +57 -0
  137. package/dist/lib/fetch-docs.d.ts +30 -0
  138. package/dist/lib/fetch-docs.js +171 -0
  139. package/dist/lib/generate-configs.d.ts +25 -0
  140. package/dist/lib/generate-configs.js +42 -0
  141. package/dist/lib/grader-api.d.ts +21 -0
  142. package/dist/lib/grader-api.js +34 -0
  143. package/dist/lib/grader-compare.d.ts +19 -0
  144. package/dist/lib/grader-compare.js +91 -0
  145. package/dist/lib/grader-consistency.d.ts +27 -0
  146. package/dist/lib/grader-consistency.js +79 -0
  147. package/dist/lib/grader-sensitivity.d.ts +19 -0
  148. package/dist/lib/grader-sensitivity.js +75 -0
  149. package/dist/lib/grader-validate.d.ts +19 -0
  150. package/dist/lib/grader-validate.js +78 -0
  151. package/dist/lib/measure-retrieval.d.ts +14 -0
  152. package/dist/lib/measure-retrieval.js +71 -0
  153. package/dist/lib/pr-comment.d.ts +16 -0
  154. package/dist/lib/pr-comment.js +28 -0
  155. package/dist/lib/readiness-report.d.ts +13 -0
  156. package/dist/lib/readiness-report.js +108 -0
  157. package/dist/lib/webhook-server.d.ts +11 -0
  158. package/dist/lib/webhook-server.js +24 -0
  159. package/dist/lib/weekly-digest.d.ts +24 -0
  160. package/dist/lib/weekly-digest.js +148 -0
  161. package/dist/orchestration/build-app-context.js +13 -0
  162. package/dist/orchestration/build-step-sequence.js +4 -2
  163. package/dist/orchestration/cache-context.d.ts +23 -0
  164. package/dist/orchestration/cache-context.js +43 -0
  165. package/dist/orchestration/env-bridge.d.ts +21 -0
  166. package/dist/orchestration/env-bridge.js +66 -0
  167. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  168. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  169. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  170. package/dist/orchestration/step-runner.js +5 -1
  171. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  172. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  173. package/dist/orchestration/steps/callback-step.js +10 -1
  174. package/dist/orchestration/steps/compare-step.js +6 -3
  175. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  176. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  177. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  178. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  179. package/dist/orchestration/steps/fetch-docs-step.js +32 -19
  180. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  181. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  182. package/dist/orchestration/steps/generate-configs-step.js +77 -26
  183. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  184. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  185. package/dist/orchestration/steps/publish-report-step.js +19 -0
  186. package/dist/orchestration/steps/readiness-step.js +8 -3
  187. package/dist/orchestration/steps/report-step.js +17 -4
  188. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  189. package/dist/orchestration/steps/run-eval-step.js +51 -31
  190. package/dist/pipeline/agent-behavior-report.js +6 -0
  191. package/dist/pipeline/attribution.d.ts +1 -1
  192. package/dist/pipeline/attribution.js +1 -1
  193. package/dist/pipeline/cache.js +29 -15
  194. package/dist/pipeline/calculate-scores.d.ts +2 -0
  195. package/dist/pipeline/calculate-scores.js +70 -33
  196. package/dist/pipeline/chronic-failures.d.ts +55 -0
  197. package/dist/pipeline/chronic-failures.js +110 -0
  198. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  199. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  200. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  201. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
  202. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  203. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  204. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  205. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  206. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  207. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  208. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  209. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  210. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  211. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  212. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  213. package/dist/pipeline/compiler/config-loader.js +42 -2
  214. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  215. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  216. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  217. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  218. package/dist/pipeline/compiler/index.d.ts +2 -5
  219. package/dist/pipeline/compiler/index.js +2 -5
  220. package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
  221. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  222. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  223. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  224. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  225. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  226. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  227. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  228. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
  229. package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
  230. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  231. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  232. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  233. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  234. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  235. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  236. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  237. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  238. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  239. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  240. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  241. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  242. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  245. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  246. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  247. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  248. package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
  249. package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
  250. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  251. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  252. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  253. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  254. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  255. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  256. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  257. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  258. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  259. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  260. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  261. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  262. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  263. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  264. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  265. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  266. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  267. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  268. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  269. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  270. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  271. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  272. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  273. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  274. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
  275. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  276. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  277. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  278. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  279. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  280. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  281. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  282. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  283. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  284. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
  285. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  286. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  287. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  288. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  289. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
  290. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
  291. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  292. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
  293. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  294. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
  295. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  296. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  297. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
  298. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
  299. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
  300. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  301. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  302. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  303. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  304. package/dist/pipeline/compiler/preset-loader.js +99 -0
  305. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
  306. package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
  307. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  308. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  309. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  310. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  311. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  312. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  313. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  314. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  315. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  316. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  317. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  318. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  319. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  320. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  321. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  322. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  323. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  324. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  325. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  326. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  327. package/dist/pipeline/compiler/task-bridge.js +92 -0
  328. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  329. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  330. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  331. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  332. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  333. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  334. package/dist/pipeline/coverage-audit.d.ts +1 -1
  335. package/dist/pipeline/coverage-audit.js +1 -1
  336. package/dist/pipeline/degradations.d.ts +1 -1
  337. package/dist/pipeline/degradations.js +1 -1
  338. package/dist/pipeline/expand-tasks.d.ts +2 -2
  339. package/dist/pipeline/expand-tasks.js +2 -2
  340. package/dist/pipeline/failure-modes.d.ts +1 -1
  341. package/dist/pipeline/failure-modes.js +13 -1
  342. package/dist/pipeline/gap-analysis.d.ts +1 -1
  343. package/dist/pipeline/gap-analysis.js +3 -1
  344. package/dist/pipeline/generate-configs.d.ts +2 -2
  345. package/dist/pipeline/generate-configs.js +16 -9
  346. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  347. package/dist/pipeline/grader-compare-runner.js +7 -1
  348. package/dist/pipeline/grader-comparison.d.ts +1 -1
  349. package/dist/pipeline/grader-comparison.js +1 -1
  350. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  351. package/dist/pipeline/grader-consistency-runner.js +7 -1
  352. package/dist/pipeline/grader-consistency.d.ts +1 -1
  353. package/dist/pipeline/grader-consistency.js +1 -1
  354. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  355. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  356. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  357. package/dist/pipeline/grader-sensitivity.js +1 -1
  358. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  359. package/dist/pipeline/grader-validate-runner.js +2 -2
  360. package/dist/pipeline/grader-validation.d.ts +1 -1
  361. package/dist/pipeline/grader-validation.js +1 -1
  362. package/dist/pipeline/map-request-to-config.js +16 -2
  363. package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
  364. package/dist/pipeline/mirror-repo-tasks.js +10 -10
  365. package/dist/pipeline/plan-format.d.ts +1 -1
  366. package/dist/pipeline/plan-format.js +1 -1
  367. package/dist/pipeline/plan.d.ts +1 -1
  368. package/dist/pipeline/plan.js +68 -30
  369. package/dist/pipeline/probe.d.ts +1 -1
  370. package/dist/pipeline/probe.js +1 -1
  371. package/dist/pipeline/readiness-report.d.ts +2 -2
  372. package/dist/pipeline/readiness-report.js +2 -2
  373. package/dist/pipeline/release-classification.d.ts +1 -1
  374. package/dist/pipeline/release-classification.js +1 -1
  375. package/dist/pipeline/release-report.d.ts +1 -1
  376. package/dist/pipeline/release-report.js +1 -1
  377. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  378. package/dist/pipeline/repo-eval-comment.js +1 -1
  379. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  380. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  381. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  382. package/dist/pipeline/resolve-mappings.js +44 -44
  383. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  384. package/dist/pipeline/retrieval-metrics.js +28 -20
  385. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  386. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  387. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  388. package/dist/pipeline/steps/compare-step.js +90 -0
  389. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  390. package/dist/pipeline/steps/eval-step.js +347 -0
  391. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  392. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  393. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  394. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  395. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  396. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  397. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  398. package/dist/pipeline/steps/publish-report-step.js +243 -0
  399. package/dist/pipeline/steps/report-step.d.ts +13 -0
  400. package/dist/pipeline/steps/report-step.js +56 -0
  401. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  402. package/dist/pipeline/steps/update-scores-step.js +42 -0
  403. package/dist/pipeline/targeted-loo.d.ts +1 -1
  404. package/dist/pipeline/targeted-loo.js +1 -1
  405. package/dist/pipeline/thresholds.d.ts +1 -1
  406. package/dist/pipeline/thresholds.js +1 -1
  407. package/dist/pipeline/validate.js +13 -0
  408. package/dist/report-store.d.ts +17 -0
  409. package/dist/report-store.js +24 -0
  410. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  411. package/dist/scripts/agent-behavior-report.js +315 -0
  412. package/dist/scripts/baseline.d.ts +43 -0
  413. package/dist/scripts/baseline.js +267 -0
  414. package/dist/scripts/calculate-scores.d.ts +166 -0
  415. package/dist/scripts/calculate-scores.js +1296 -0
  416. package/dist/scripts/compare.d.ts +22 -0
  417. package/dist/scripts/compare.js +334 -0
  418. package/dist/scripts/coverage-audit.d.ts +44 -0
  419. package/dist/scripts/coverage-audit.js +209 -0
  420. package/dist/scripts/debug-eval.d.ts +19 -0
  421. package/dist/scripts/debug-eval.js +73 -0
  422. package/dist/scripts/discovery-report.d.ts +58 -0
  423. package/dist/scripts/discovery-report.js +250 -0
  424. package/dist/scripts/fetch-docs.d.ts +35 -0
  425. package/dist/scripts/fetch-docs.js +472 -0
  426. package/dist/scripts/generate-configs.d.ts +66 -0
  427. package/dist/scripts/generate-configs.js +459 -0
  428. package/dist/scripts/grader-api.d.ts +27 -0
  429. package/dist/scripts/grader-api.js +206 -0
  430. package/dist/scripts/grader-compare.d.ts +22 -0
  431. package/dist/scripts/grader-compare.js +368 -0
  432. package/dist/scripts/grader-consistency.d.ts +20 -0
  433. package/dist/scripts/grader-consistency.js +313 -0
  434. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  435. package/dist/scripts/grader-sensitivity.js +354 -0
  436. package/dist/scripts/grader-validate.d.ts +19 -0
  437. package/dist/scripts/grader-validate.js +267 -0
  438. package/dist/scripts/measure-retrieval.d.ts +10 -0
  439. package/dist/scripts/measure-retrieval.js +145 -0
  440. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  441. package/dist/scripts/migrate-task-mode.js +1 -1
  442. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  443. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  444. package/dist/scripts/pipeline.d.ts +76 -0
  445. package/dist/scripts/pipeline.js +1031 -0
  446. package/dist/scripts/pr-comment.d.ts +10 -0
  447. package/dist/scripts/pr-comment.js +510 -0
  448. package/dist/scripts/readiness-report.d.ts +88 -0
  449. package/dist/scripts/readiness-report.js +342 -0
  450. package/dist/scripts/update-quality-scores.d.ts +15 -0
  451. package/dist/scripts/update-quality-scores.js +184 -0
  452. package/dist/scripts/validate-task-sources.d.ts +1 -1
  453. package/dist/scripts/validate-task-sources.js +1 -1
  454. package/dist/scripts/validate.d.ts +13 -0
  455. package/dist/scripts/validate.js +79 -0
  456. package/dist/scripts/webhook-server.d.ts +26 -0
  457. package/dist/scripts/webhook-server.js +147 -0
  458. package/dist/scripts/weekly-digest.d.ts +24 -0
  459. package/dist/scripts/weekly-digest.js +144 -0
  460. package/dist/sinks/format-slack.d.ts +64 -0
  461. package/dist/sinks/format-slack.js +306 -0
  462. package/dist/sinks/slack-sink.d.ts +27 -0
  463. package/dist/sinks/slack-sink.js +78 -0
  464. package/dist/sinks/types.d.ts +1 -1
  465. package/dist/sinks/types.js +1 -1
  466. package/dist/sinks/webhook-sink.d.ts +19 -0
  467. package/dist/sinks/webhook-sink.js +50 -0
  468. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  469. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  470. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  471. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  472. package/dist/tasks/literacy/functions.task.ts +70 -0
  473. package/dist/tasks/literacy/groq.task.ts +259 -0
  474. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  475. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  476. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  477. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  478. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  479. package/package.json +32 -24
  480. package/tasks/.expanded.agentic.yaml +280 -0
  481. package/tasks/.expanded.yaml +565 -0
  482. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  483. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  484. package/tasks/literacy/content-lake.task.ts +181 -0
  485. package/tasks/literacy/frameworks.task.ts +1 -0
  486. package/tasks/literacy/functions.task.ts +1 -0
  487. package/tasks/literacy/groq.task.ts +1 -0
  488. package/tasks/literacy/image-handling.task.ts +95 -0
  489. package/tasks/literacy/nextjs-live.task.ts +2 -1
  490. package/tasks/literacy/portable-text.task.ts +169 -0
  491. package/tasks/literacy/studio-setup.task.ts +5 -2
  492. package/tasks/literacy/visual-editing.task.ts +1 -0
  493. package/LICENSE +0 -21
  494. package/tasks/frameworks.yaml +0 -98
  495. package/tasks/functions.yaml +0 -51
  496. package/tasks/groq.yaml +0 -216
  497. package/tasks/nextjs-live.yaml +0 -62
  498. package/tasks/studio-setup.yaml +0 -111
  499. package/tasks/visual-editing.yaml +0 -120
@@ -0,0 +1,253 @@
1
+ /**
2
+ * capture compare — compare two pipeline capture directories.
3
+ *
4
+ * Reads manifest.json from both captures, runs compareCaptures(),
5
+ * and prints a human-readable table or JSON diff report.
6
+ *
7
+ * Supports both raw directories and .tar.gz archives.
8
+ *
9
+ * Exit codes:
10
+ * 0 — captures are equivalent
11
+ * 1 — differences found
12
+ * 2 — error (missing files, invalid manifest, etc.)
13
+ */
14
+ import { execFileSync } from "node:child_process";
15
+ import { existsSync, mkdtempSync, readdirSync, rmSync, writeFileSync, } from "node:fs";
16
+ import { tmpdir } from "node:os";
17
+ import { join, resolve } from "node:path";
18
+ import { Command, Option } from "commander";
19
+ import { compareCaptures } from "../artifact-capture/comparator.js";
20
+ // ---------------------------------------------------------------------------
21
+ // Command factory
22
+ // ---------------------------------------------------------------------------
23
+ export function createCaptureCompareCommand() {
24
+ return new Command("compare")
25
+ .description("Compare two pipeline capture directories")
26
+ .argument("<baseline>", "Path to baseline capture (directory or .tar.gz)")
27
+ .argument("<experiment>", "Path to experiment capture (directory or .tar.gz)")
28
+ .addOption(new Option("-m, --mode <mode>", "Comparison mode")
29
+ .choices(["inventory", "structural", "strict"])
30
+ .default("inventory"))
31
+ .option("-f, --format <fmt>", "Output format: table or json", "table")
32
+ .option("-o, --output <path>", "Write JSON report to file")
33
+ .option("--score-threshold <n>", "Aggregate score regression threshold (points)", parseFloat, 5)
34
+ .option("--task-threshold <n>", "Per-task score regression threshold (points)", parseFloat, 10)
35
+ .option("--timing-threshold <n>", "Step timing multiplier threshold", parseFloat, 2)
36
+ .option("--json-depth <n>", "JSON structural diff depth", parseInt, 3)
37
+ .action(async (baselinePath, experimentPath, opts) => {
38
+ const cleanups = [];
39
+ try {
40
+ const baseline = resolveCapturePath(resolve(baselinePath), cleanups);
41
+ const experiment = resolveCapturePath(resolve(experimentPath), cleanups);
42
+ console.log("");
43
+ console.log(" ailf capture compare");
44
+ console.log(" " + "─".repeat(40));
45
+ console.log("");
46
+ console.log(` Baseline: ${baselinePath}`);
47
+ console.log(` Experiment: ${experimentPath}`);
48
+ console.log(` Mode: ${opts.mode}`);
49
+ console.log("");
50
+ const report = compareCaptures(baseline, experiment, {
51
+ mode: opts.mode,
52
+ scoreThresholds: {
53
+ aggregate: opts.scoreThreshold,
54
+ perTask: opts.taskThreshold,
55
+ },
56
+ timingThresholds: { multiplier: opts.timingThreshold },
57
+ jsonDiffDepth: opts.jsonDepth,
58
+ });
59
+ if (opts.format === "json") {
60
+ const json = JSON.stringify(report, null, 2);
61
+ if (opts.output) {
62
+ writeFileSync(opts.output, json, "utf-8");
63
+ console.log(` Report written to ${opts.output}`);
64
+ }
65
+ else {
66
+ console.log(json);
67
+ }
68
+ }
69
+ else {
70
+ printTableReport(report);
71
+ if (opts.output) {
72
+ const json = JSON.stringify(report, null, 2);
73
+ writeFileSync(opts.output, json, "utf-8");
74
+ console.log(` Report also written to ${opts.output}`);
75
+ }
76
+ }
77
+ process.exitCode = report.equivalent ? 0 : 1;
78
+ }
79
+ catch (err) {
80
+ console.error(` Error: ${err instanceof Error ? err.message : String(err)}`);
81
+ process.exitCode = 2;
82
+ }
83
+ finally {
84
+ for (const cleanup of cleanups) {
85
+ try {
86
+ cleanup();
87
+ }
88
+ catch {
89
+ // Best-effort cleanup
90
+ }
91
+ }
92
+ }
93
+ });
94
+ }
95
+ // ---------------------------------------------------------------------------
96
+ // Path resolution (handles tar.gz, subdirectories, raw dirs)
97
+ // ---------------------------------------------------------------------------
98
+ function resolveCapturePath(inputPath, cleanups) {
99
+ if (!existsSync(inputPath)) {
100
+ throw new Error(`Path does not exist: ${inputPath}`);
101
+ }
102
+ if (inputPath.endsWith(".tar.gz")) {
103
+ const tempDir = mkdtempSync(join(tmpdir(), "ailf-capture-cmp-"));
104
+ cleanups.push(() => rmSync(tempDir, { recursive: true, force: true }));
105
+ execFileSync("tar", ["-xzf", inputPath, "-C", tempDir]);
106
+ return findManifestDir(tempDir);
107
+ }
108
+ return findManifestDir(inputPath);
109
+ }
110
+ /**
111
+ * Find the directory containing manifest.json.
112
+ *
113
+ * Handles two cases:
114
+ * 1. Path IS the capture dir (contains manifest.json directly)
115
+ * 2. Path is the parent captures/ dir (contains a single timestamped subdir)
116
+ */
117
+ function findManifestDir(dir) {
118
+ if (existsSync(join(dir, "manifest.json")))
119
+ return dir;
120
+ // Look one level down for a capture subdirectory
121
+ const entries = readdirSync(dir).filter((e) => !e.startsWith(".") && !e.endsWith(".tar.gz"));
122
+ for (const entry of entries) {
123
+ const sub = join(dir, entry);
124
+ if (existsSync(join(sub, "manifest.json")))
125
+ return sub;
126
+ }
127
+ throw new Error(`No manifest.json found in ${dir} or its subdirectories. ` +
128
+ `Is this a valid capture directory?`);
129
+ }
130
+ // ---------------------------------------------------------------------------
131
+ // Table formatting
132
+ // ---------------------------------------------------------------------------
133
+ function printTableReport(report) {
134
+ // Inventory
135
+ console.log(" Inventory");
136
+ console.log(" " + "─".repeat(40));
137
+ console.log(` Common: ${report.inventory.common.length} artifact(s)`);
138
+ if (report.inventory.added.length > 0) {
139
+ console.log(` Added: ${report.inventory.added.length} (${report.inventory.added.join(", ")})`);
140
+ }
141
+ else {
142
+ console.log(" Added: 0");
143
+ }
144
+ if (report.inventory.removed.length > 0) {
145
+ console.log(` Removed: ${report.inventory.removed.length} (${report.inventory.removed.join(", ")})`);
146
+ }
147
+ else {
148
+ console.log(" Removed: 0");
149
+ }
150
+ console.log("");
151
+ // Content diff preview (structural/strict modes)
152
+ if (report.content && report.content.length > 0) {
153
+ console.log(" Content Changes");
154
+ console.log(" " + "─".repeat(40));
155
+ for (const diff of report.content.slice(0, 10)) {
156
+ printContentDiff(diff);
157
+ }
158
+ if (report.content.length > 10) {
159
+ console.log(` ... and ${report.content.length - 10} more changed artifact(s)`);
160
+ }
161
+ console.log("");
162
+ }
163
+ // Scores
164
+ if (report.scores) {
165
+ console.log(" Scores");
166
+ console.log(" " + "─".repeat(40));
167
+ const { baselineMean, currentMean, delta } = report.scores;
168
+ const icon = delta > 0 ? "+" : delta < 0 ? "" : " ";
169
+ console.log(` Aggregate: ${baselineMean.toFixed(1)} -> ${currentMean.toFixed(1)} (${icon}${delta.toFixed(1)})`);
170
+ if (report.scores.breaches.length > 0) {
171
+ console.log(` Breaches: ${report.scores.breaches.length}`);
172
+ for (const b of report.scores.breaches) {
173
+ console.log(` - ${b}`);
174
+ }
175
+ }
176
+ else {
177
+ console.log(" Breaches: none");
178
+ }
179
+ console.log("");
180
+ }
181
+ // Timing
182
+ if (report.timing) {
183
+ console.log(" Timing");
184
+ console.log(" " + "─".repeat(40));
185
+ const { totalDeltaMs } = report.timing;
186
+ const sign = totalDeltaMs >= 0 ? "+" : "";
187
+ console.log(` Total delta: ${sign}${totalDeltaMs}ms`);
188
+ if (report.timing.breaches.length > 0) {
189
+ console.log(` Breaches: ${report.timing.breaches.length}`);
190
+ for (const b of report.timing.breaches) {
191
+ console.log(` - ${b}`);
192
+ }
193
+ }
194
+ else {
195
+ console.log(" Breaches: none");
196
+ }
197
+ console.log("");
198
+ }
199
+ // Security
200
+ console.log(" Security");
201
+ console.log(" " + "─".repeat(40));
202
+ if (report.security.leaksFound) {
203
+ console.log(` Leaks: ${report.security.violations.length} finding(s)`);
204
+ for (const v of report.security.violations.slice(0, 5)) {
205
+ console.log(` - ${v.file}: ${v.detail}`);
206
+ }
207
+ if (report.security.violations.length > 5) {
208
+ console.log(` ... and ${report.security.violations.length - 5} more`);
209
+ }
210
+ }
211
+ else {
212
+ console.log(" Leaks: none");
213
+ }
214
+ console.log("");
215
+ // Result
216
+ if (report.equivalent) {
217
+ console.log(" Result: EQUIVALENT");
218
+ }
219
+ else {
220
+ console.log(` Result: DIFFERENCES FOUND`);
221
+ console.log(` ${report.summary}`);
222
+ }
223
+ console.log("");
224
+ }
225
+ /**
226
+ * Print up to 3 changed key paths for a content diff.
227
+ */
228
+ function printContentDiff(diff) {
229
+ console.log(` ${diff.artifactKey} (${diff.format})`);
230
+ if (Array.isArray(diff.changes)) {
231
+ // JSON diff — show up to 3 changed paths
232
+ const jsonChanges = diff.changes;
233
+ for (const change of jsonChanges.slice(0, 3)) {
234
+ if (change.baseline === undefined) {
235
+ console.log(` + ${change.path} (added)`);
236
+ }
237
+ else if (change.experiment === undefined) {
238
+ console.log(` - ${change.path} (removed)`);
239
+ }
240
+ else {
241
+ console.log(` ~ ${change.path} (changed)`);
242
+ }
243
+ }
244
+ if (jsonChanges.length > 3) {
245
+ console.log(` ... ${jsonChanges.length - 3} more change(s)`);
246
+ }
247
+ }
248
+ else {
249
+ // Text/markdown diff
250
+ const { addedLines, removedLines } = diff.changes;
251
+ console.log(` +${addedLines} / -${removedLines} lines`);
252
+ }
253
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * capture list — list pipeline captures in a directory.
3
+ *
4
+ * Scans a capture directory for subdirectories containing manifest.json,
5
+ * reads each manifest, and prints a summary table sorted by date.
6
+ *
7
+ * Usage:
8
+ * ailf capture list # default: results/captures/
9
+ * ailf capture list ./my-captures # custom directory
10
+ */
11
+ import { Command } from "commander";
12
+ export declare function createCaptureListCommand(): Command;
@@ -0,0 +1,147 @@
1
+ /**
2
+ * capture list — list pipeline captures in a directory.
3
+ *
4
+ * Scans a capture directory for subdirectories containing manifest.json,
5
+ * reads each manifest, and prints a summary table sorted by date.
6
+ *
7
+ * Usage:
8
+ * ailf capture list # default: results/captures/
9
+ * ailf capture list ./my-captures # custom directory
10
+ */
11
+ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
12
+ import { join, resolve } from "node:path";
13
+ import { Command } from "commander";
14
+ // ---------------------------------------------------------------------------
15
+ // Command factory
16
+ // ---------------------------------------------------------------------------
17
+ export function createCaptureListCommand() {
18
+ return new Command("list")
19
+ .description("List pipeline captures in a directory")
20
+ .argument("[dir]", "Captures directory (default: results/captures/)")
21
+ .option("-f, --format <fmt>", "Output format: table or json", "table")
22
+ .action(async (dir, opts) => {
23
+ const captureDir = resolve(dir ?? "results/captures");
24
+ if (!existsSync(captureDir)) {
25
+ console.error(` No captures directory found at ${captureDir}`);
26
+ console.error(" Run 'ailf pipeline --capture' to create captures.");
27
+ process.exitCode = 1;
28
+ return;
29
+ }
30
+ const captures = discoverCaptures(captureDir);
31
+ if (captures.length === 0) {
32
+ console.log(` No captures found in ${captureDir}`);
33
+ return;
34
+ }
35
+ // Sort by startedAt descending (newest first)
36
+ captures.sort((a, b) => new Date(b.startedAt).getTime() - new Date(a.startedAt).getTime());
37
+ if (opts.format === "json") {
38
+ console.log(JSON.stringify(captures, null, 2));
39
+ return;
40
+ }
41
+ console.log("");
42
+ console.log(" ailf capture list");
43
+ console.log(" " + "─".repeat(60));
44
+ console.log("");
45
+ console.log(" " +
46
+ "Date".padEnd(22) +
47
+ "Mode".padEnd(18) +
48
+ "Artifacts".padEnd(12) +
49
+ "Size".padEnd(10) +
50
+ "Path");
51
+ console.log(" " + "─".repeat(60));
52
+ for (const c of captures) {
53
+ const date = new Date(c.startedAt).toLocaleString("en-US", {
54
+ month: "short",
55
+ day: "2-digit",
56
+ hour: "2-digit",
57
+ minute: "2-digit",
58
+ });
59
+ const mode = c.mode.padEnd(18);
60
+ const artifacts = String(c.artifactCount).padEnd(12);
61
+ const size = formatBytes(c.totalBytes).padEnd(10);
62
+ console.log(` ${date.padEnd(22)}${mode}${artifacts}${size}${c.name}`);
63
+ }
64
+ console.log("");
65
+ console.log(` ${captures.length} capture(s) found in ${captureDir}`);
66
+ console.log("");
67
+ });
68
+ }
69
+ function discoverCaptures(captureDir) {
70
+ const entries = [];
71
+ for (const name of readdirSync(captureDir)) {
72
+ if (name.startsWith("."))
73
+ continue;
74
+ const fullPath = join(captureDir, name);
75
+ // Raw directory with manifest.json
76
+ const manifestPath = join(fullPath, "manifest.json");
77
+ if (existsSync(manifestPath)) {
78
+ try {
79
+ const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
80
+ const totalBytes = manifest.artifacts.reduce((sum, a) => sum + a.bytes, 0);
81
+ entries.push({
82
+ name,
83
+ path: fullPath,
84
+ mode: manifest.pipeline.mode,
85
+ startedAt: manifest.startedAt,
86
+ artifactCount: manifest.artifacts.length,
87
+ totalBytes,
88
+ compressed: false,
89
+ });
90
+ }
91
+ catch {
92
+ // Skip unparseable manifests
93
+ }
94
+ continue;
95
+ }
96
+ // .tar.gz archive — read size but don't extract
97
+ if (name.endsWith(".tar.gz")) {
98
+ try {
99
+ const stat = statSync(fullPath);
100
+ entries.push({
101
+ name,
102
+ path: fullPath,
103
+ mode: extractModeFromName(name),
104
+ startedAt: extractTimestampFromName(name),
105
+ artifactCount: -1, // Unknown without extracting
106
+ totalBytes: stat.size,
107
+ compressed: true,
108
+ });
109
+ }
110
+ catch {
111
+ // Skip
112
+ }
113
+ }
114
+ }
115
+ return entries;
116
+ }
117
+ /** Known modes — used to correctly extract mode from hyphenated filenames. */
118
+ const KNOWN_MODES = [
119
+ "literacy",
120
+ "mcp-server",
121
+ "agent-harness",
122
+ "knowledge-probe",
123
+ ];
124
+ function extractModeFromName(name) {
125
+ for (const mode of KNOWN_MODES) {
126
+ if (name.startsWith(mode + "-"))
127
+ return mode;
128
+ }
129
+ return name.split("-")[0] ?? "unknown";
130
+ }
131
+ function extractTimestampFromName(name) {
132
+ // Pattern: {mode}-YYYYMMDD-HHmmss-{id}.tar.gz
133
+ const match = name.match(/(\d{4})(\d{2})(\d{2})-(\d{2})(\d{2})(\d{2})/);
134
+ if (!match)
135
+ return "unknown";
136
+ const [, y, m, d, h, min, s] = match;
137
+ return `${y}-${m}-${d}T${h}:${min}:${s}Z`;
138
+ }
139
+ function formatBytes(bytes) {
140
+ if (bytes < 0)
141
+ return "?";
142
+ if (bytes < 1024)
143
+ return `${bytes}B`;
144
+ if (bytes < 1024 * 1024)
145
+ return `${(bytes / 1024).toFixed(1)}KB`;
146
+ return `${(bytes / 1024 / 1024).toFixed(1)}MB`;
147
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * capture command — manage and compare pipeline artifact captures.
3
+ *
4
+ * Parent command for capture-related subcommands:
5
+ * ailf capture compare <baseline> <experiment>
6
+ * ailf capture list [dir]
7
+ */
8
+ import { Command } from "commander";
9
+ export declare function createCaptureCommand(): Command;
@@ -0,0 +1,16 @@
1
+ /**
2
+ * capture command — manage and compare pipeline artifact captures.
3
+ *
4
+ * Parent command for capture-related subcommands:
5
+ * ailf capture compare <baseline> <experiment>
6
+ * ailf capture list [dir]
7
+ */
8
+ import { Command } from "commander";
9
+ import { createCaptureCompareCommand } from "./capture-compare.js";
10
+ import { createCaptureListCommand } from "./capture-list.js";
11
+ export function createCaptureCommand() {
12
+ const cmd = new Command("capture").description("Manage and compare pipeline artifact captures");
13
+ cmd.addCommand(createCaptureCompareCommand());
14
+ cmd.addCommand(createCaptureListCommand());
15
+ return cmd;
16
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * chronic-failures command — query recent reports to find tasks that
3
+ * consistently error above a configurable threshold.
4
+ *
5
+ * @see docs/exec-plans/eval-pipeline-timeout-resilience.md — Phase 5
6
+ */
7
+ import { Command } from "commander";
8
+ export declare function createChronicFailuresCommand(): Command;
@@ -0,0 +1,33 @@
1
+ /**
2
+ * chronic-failures command — query recent reports to find tasks that
3
+ * consistently error above a configurable threshold.
4
+ *
5
+ * @see docs/exec-plans/eval-pipeline-timeout-resilience.md — Phase 5
6
+ */
7
+ import { Command } from "commander";
8
+ import { detectChronicFailures, formatChronicFailuresConsole, } from "../pipeline/chronic-failures.js";
9
+ import { ReportStore } from "../report-store.js";
10
+ export function createChronicFailuresCommand() {
11
+ return new Command("chronic-failures")
12
+ .description("Identify tasks that error in >50% of recent evaluation runs")
13
+ .option("--lookback <n>", "Number of recent reports to analyze", (v) => parseInt(v, 10), 10)
14
+ .option("--threshold <n>", "Error rate threshold (0-1) for chronic classification", (v) => parseFloat(v), 0.5)
15
+ .option("--json", "Output raw JSON", false)
16
+ .action(async (opts) => {
17
+ const reportStore = new ReportStore();
18
+ const report = await detectChronicFailures(reportStore, {
19
+ lookback: opts.lookback,
20
+ threshold: opts.threshold,
21
+ });
22
+ if (opts.json) {
23
+ console.log(JSON.stringify(report, null, 2));
24
+ }
25
+ else {
26
+ console.log(formatChronicFailuresConsole(report));
27
+ }
28
+ // Exit with non-zero if chronic failures detected
29
+ if (report.failures.length > 0) {
30
+ process.exit(1);
31
+ }
32
+ });
33
+ }
@@ -8,6 +8,7 @@ import { Command } from "commander";
8
8
  import { dirname, resolve } from "path";
9
9
  import { fileURLToPath } from "url";
10
10
  import { countReferencedDocs, formatCoverageConsole, formatCoverageMarkdown, runCoverageAudit, } from "../pipeline/coverage-audit.js";
11
+ import { createLiteracyModeBase } from "../pipeline/compiler/mode-bases/index.js";
11
12
  import { createSanityLiteracyPreset } from "../pipeline/compiler/presets/index.js";
12
13
  const __dirname = dirname(fileURLToPath(import.meta.url));
13
14
  const ROOT = resolve(__dirname, "..", "..");
@@ -17,9 +18,10 @@ export function createCoverageAuditCommand() {
17
18
  .option("--format <fmt>", "Output format: table, md, markdown")
18
19
  .option("--json", "Output raw JSON", false)
19
20
  .action(async (opts) => {
20
- // Build a registry with preset features so coverage audit works
21
+ // Build a registry with mode base + preset so coverage audit works
21
22
  // even when config/features.ts is empty (preset is source of truth).
22
23
  const registry = new InMemoryPluginRegistry();
24
+ registry.registerModeBase(createLiteracyModeBase());
23
25
  registry.registerPreset(createSanityLiteracyPreset({ rootDir: ROOT }));
24
26
  const report = runCoverageAudit(ROOT, { registry });
25
27
  if (!report) {
@@ -17,7 +17,7 @@
17
17
  * Adding --explain support for a new command = adding one registry entry.
18
18
  * Commands not in the registry fall back to a minimal generic plan.
19
19
  *
20
- * @see docs/exec-plans/execution-preview.md
20
+ * @see docs/archive/exec-plans/execution-preview.md
21
21
  */
22
22
  import type { Command } from "commander";
23
23
  /**
@@ -17,7 +17,7 @@
17
17
  * Adding --explain support for a new command = adding one registry entry.
18
18
  * Commands not in the registry fall back to a minimal generic plan.
19
19
  *
20
- * @see docs/exec-plans/execution-preview.md
20
+ * @see docs/archive/exec-plans/execution-preview.md
21
21
  */
22
22
  import { TASK_FILE_NAMES } from "../_vendor/ailf-core/index.js";
23
23
  import { buildPipelinePlan, buildSimpleCommandPlan, } from "../pipeline/plan.js";
@@ -137,6 +137,24 @@ const EXPLAIN_REGISTRY = {
137
137
  },
138
138
  ],
139
139
  },
140
+ "chronic-failures": {
141
+ description: "Identify tasks that error in >50% of recent evaluation runs",
142
+ filesRead: [],
143
+ steps: [
144
+ {
145
+ cacheStatus: "miss",
146
+ name: "Query Content Lake",
147
+ reason: "Fetch testSummary.errors from recent reports",
148
+ willRun: true,
149
+ },
150
+ {
151
+ cacheStatus: "miss",
152
+ name: "Aggregate errors",
153
+ reason: "Compute per-task error rates across runs",
154
+ willRun: true,
155
+ },
156
+ ],
157
+ },
140
158
  "coverage-audit": {
141
159
  description: "Cross-reference feature registry against evaluation tasks for coverage gaps",
142
160
  filesRead: ["config/features.ts", "tasks/*.{yaml,task.ts,task.js}"],
@@ -295,7 +313,7 @@ const EXPLAIN_REGISTRY = {
295
313
  "measure-retrieval": {
296
314
  description: "Measure Sanity text search retrieval quality against canonical document annotations",
297
315
  filesCreated: ["results/latest/retrieval-metrics.json"],
298
- filesRead: ["tasks/*.yaml"],
316
+ filesRead: ["tasks/literacy/*.task.ts"],
299
317
  steps: [
300
318
  {
301
319
  cacheStatus: "miss",
@@ -576,16 +594,23 @@ export async function handleExplain(actionCommand, confirmExecution, rootDir) {
576
594
  */
577
595
  function buildInitExplainPlan(actionCommand, rootDir) {
578
596
  const opts = actionCommand.opts();
579
- const format = opts.outputFormat === "json" ? "json" : "yaml";
580
- const ext = format === "json" ? ".json" : ".yaml";
597
+ const format = opts.outputFormat === "json"
598
+ ? "json"
599
+ : opts.outputFormat === "yaml"
600
+ ? "yaml"
601
+ : "ts";
602
+ const taskExt = format === "ts" ? ".task.ts" : format === "yaml" ? ".yaml" : ".json";
603
+ const configFile = format === "ts"
604
+ ? "ailf.config.ts"
605
+ : `config.${format === "yaml" ? "yaml" : "json"}`;
581
606
  const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
582
607
  const targetDir = opts.path ?? ".";
583
608
  const ailfDir = `${targetDir}/.ailf`;
584
609
  const tasksDir = `${ailfDir}/tasks`;
585
610
  const taskFileNames = [...TASK_FILE_NAMES];
586
611
  const filesCreated = [
587
- `${ailfDir}/config${ext}`,
588
- ...taskFileNames.map((stem) => `${tasksDir}/${stem}${ext}`),
612
+ `${ailfDir}/${configFile}`,
613
+ ...taskFileNames.map((stem) => `${tasksDir}/${stem}${taskExt}`),
589
614
  `${ailfDir}/.gitignore`,
590
615
  ];
591
616
  return buildSimpleCommandPlan({
@@ -603,14 +628,14 @@ function buildInitExplainPlan(actionCommand, rootDir) {
603
628
  },
604
629
  {
605
630
  cacheStatus: "miss",
606
- name: `Write config${ext}`,
631
+ name: `Write ${configFile}`,
607
632
  reason: `Project configuration template (${format.toUpperCase()} format)`,
608
633
  willRun: true,
609
634
  },
610
635
  {
611
636
  cacheStatus: "miss",
612
637
  name: `Write example tasks (${taskFileNames.length} files)`,
613
- reason: `Commented starter tasks in ${tasksDir}/ (${format.toUpperCase()} format)`,
638
+ reason: `Starter tasks in ${tasksDir}/ (${format.toUpperCase()} format)`,
614
639
  willRun: true,
615
640
  },
616
641
  {
@@ -700,6 +725,10 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
700
725
  taskSource: raw.taskSource,
701
726
  remoteCache: raw.remoteCache,
702
727
  config: raw.config,
728
+ capture: raw.capture ?? false,
729
+ captureCompress: raw.captureCompress ?? true,
730
+ captureExtras: raw.captureExtras ?? true,
731
+ captureDir: raw.captureDir,
703
732
  };
704
733
  const resolved = computeResolvedOptions(withDefaults);
705
734
  const planOpts = {
@@ -41,6 +41,7 @@ async function executeFetchDocs(opts) {
41
41
  // Build a minimal ResolvedConfig for the composition root
42
42
  const ctx = createAppContext({
43
43
  rootDir: ROOT,
44
+ outputDir: resolve(ROOT, "results", "latest"),
44
45
  mode: "literacy",
45
46
  noAutoScope: false,
46
47
  skipFetch: false,
@@ -1,8 +1,8 @@
1
1
  /**
2
- * generate-configs command — generate promptfoo config files from models.yaml.
2
+ * generate-configs command — generate promptfoo config files via the compiler pipeline.
3
3
  *
4
- * Uses the composition root to wire adapters, then calls generateConfigs()
5
- * directly — the same code path as the pipeline.
4
+ * Uses the composition root to wire adapters, then runs GenerateConfigsStep
5
+ * directly — the same code path as the full pipeline.
6
6
  */
7
7
  import { Command } from "commander";
8
8
  export declare function createGenerateConfigsCommand(): Command;