@sanity/ailf 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (499) hide show
  1. package/README.md +0 -1
  2. package/canonical/grader-references/README.md +2 -2
  3. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  4. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  5. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  6. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  7. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  8. package/config/features.ts +1 -1
  9. package/config/models.ts +29 -12
  10. package/config/sources.ts +1 -1
  11. package/config/thresholds.ts +1 -1
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  13. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  17. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  18. package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
  19. package/dist/_vendor/ailf-core/config-helpers.js +51 -2
  20. package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
  21. package/dist/_vendor/ailf-core/examples/index.js +213 -94
  22. package/dist/_vendor/ailf-core/index.d.ts +3 -2
  23. package/dist/_vendor/ailf-core/index.js +2 -1
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  25. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  27. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  28. package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
  29. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  30. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  31. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  32. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  33. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  34. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  35. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  40. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  41. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  42. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  43. package/dist/_vendor/ailf-core/services/index.js +1 -1
  44. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  47. package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
  48. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
  49. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  50. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  51. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  52. package/dist/adapters/api-client/remediation.js +2 -2
  53. package/dist/adapters/config-sources/file-config-adapter.js +7 -1
  54. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  55. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  56. package/dist/adapters/index.d.ts +0 -1
  57. package/dist/adapters/index.js +0 -1
  58. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  59. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  60. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  61. package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
  62. package/dist/adapters/task-sources/index.d.ts +3 -4
  63. package/dist/adapters/task-sources/index.js +3 -4
  64. package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
  65. package/dist/adapters/task-sources/repo-schemas.js +228 -20
  66. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  67. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  68. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  69. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  70. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  71. package/dist/adapters/task-sources/repo-validation.js +126 -5
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
  73. package/dist/adapters/task-sources/task-file-loader.js +21 -7
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/coverage-audit.js +3 -1
  95. package/dist/commands/explain-handler.d.ts +1 -1
  96. package/dist/commands/explain-handler.js +37 -8
  97. package/dist/commands/fetch-docs.js +1 -0
  98. package/dist/commands/generate-configs.d.ts +3 -3
  99. package/dist/commands/generate-configs.js +20 -8
  100. package/dist/commands/init.d.ts +5 -4
  101. package/dist/commands/init.js +190 -25
  102. package/dist/commands/pipeline-action.d.ts +7 -1
  103. package/dist/commands/pipeline-action.js +43 -19
  104. package/dist/commands/pipeline.d.ts +6 -1
  105. package/dist/commands/pipeline.js +7 -2
  106. package/dist/commands/pr-comment.js +1 -0
  107. package/dist/commands/publish.js +1 -0
  108. package/dist/commands/shared/help.js +2 -2
  109. package/dist/commands/update-quality-scores.d.ts +5 -0
  110. package/dist/commands/update-quality-scores.js +20 -0
  111. package/dist/commands/validate-tasks.d.ts +2 -2
  112. package/dist/commands/validate-tasks.js +26 -15
  113. package/dist/composition-root.d.ts +15 -4
  114. package/dist/composition-root.js +100 -55
  115. package/dist/config/features.ts +23 -0
  116. package/dist/config/models.ts +100 -0
  117. package/dist/config/prompts.ts +16 -0
  118. package/dist/config/rubrics.ts +225 -0
  119. package/dist/config/schedules.ts +47 -0
  120. package/dist/config/sinks.ts +37 -0
  121. package/dist/config/sources.ts +21 -0
  122. package/dist/config/thresholds.ts +61 -0
  123. package/dist/index.d.ts +41 -0
  124. package/dist/index.js +48 -0
  125. package/dist/lib/agent-behavior-report.d.ts +8 -0
  126. package/dist/lib/agent-behavior-report.js +185 -0
  127. package/dist/lib/baseline.d.ts +19 -0
  128. package/dist/lib/baseline.js +153 -0
  129. package/dist/lib/calculate-scores.d.ts +23 -0
  130. package/dist/lib/calculate-scores.js +42 -0
  131. package/dist/lib/compare.d.ts +18 -0
  132. package/dist/lib/compare.js +170 -0
  133. package/dist/lib/coverage-audit.d.ts +4 -0
  134. package/dist/lib/coverage-audit.js +42 -0
  135. package/dist/lib/discovery-report.d.ts +13 -0
  136. package/dist/lib/discovery-report.js +57 -0
  137. package/dist/lib/fetch-docs.d.ts +30 -0
  138. package/dist/lib/fetch-docs.js +171 -0
  139. package/dist/lib/generate-configs.d.ts +25 -0
  140. package/dist/lib/generate-configs.js +42 -0
  141. package/dist/lib/grader-api.d.ts +21 -0
  142. package/dist/lib/grader-api.js +34 -0
  143. package/dist/lib/grader-compare.d.ts +19 -0
  144. package/dist/lib/grader-compare.js +91 -0
  145. package/dist/lib/grader-consistency.d.ts +27 -0
  146. package/dist/lib/grader-consistency.js +79 -0
  147. package/dist/lib/grader-sensitivity.d.ts +19 -0
  148. package/dist/lib/grader-sensitivity.js +75 -0
  149. package/dist/lib/grader-validate.d.ts +19 -0
  150. package/dist/lib/grader-validate.js +78 -0
  151. package/dist/lib/measure-retrieval.d.ts +14 -0
  152. package/dist/lib/measure-retrieval.js +71 -0
  153. package/dist/lib/pr-comment.d.ts +16 -0
  154. package/dist/lib/pr-comment.js +28 -0
  155. package/dist/lib/readiness-report.d.ts +13 -0
  156. package/dist/lib/readiness-report.js +108 -0
  157. package/dist/lib/webhook-server.d.ts +11 -0
  158. package/dist/lib/webhook-server.js +24 -0
  159. package/dist/lib/weekly-digest.d.ts +24 -0
  160. package/dist/lib/weekly-digest.js +148 -0
  161. package/dist/orchestration/build-app-context.js +13 -0
  162. package/dist/orchestration/build-step-sequence.js +4 -2
  163. package/dist/orchestration/cache-context.d.ts +23 -0
  164. package/dist/orchestration/cache-context.js +43 -0
  165. package/dist/orchestration/env-bridge.d.ts +21 -0
  166. package/dist/orchestration/env-bridge.js +66 -0
  167. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  168. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  169. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  170. package/dist/orchestration/step-runner.js +5 -1
  171. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  172. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  173. package/dist/orchestration/steps/callback-step.js +10 -1
  174. package/dist/orchestration/steps/compare-step.js +6 -3
  175. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  176. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  177. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  178. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  179. package/dist/orchestration/steps/fetch-docs-step.js +32 -19
  180. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  181. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  182. package/dist/orchestration/steps/generate-configs-step.js +77 -26
  183. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  184. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  185. package/dist/orchestration/steps/publish-report-step.js +19 -0
  186. package/dist/orchestration/steps/readiness-step.js +8 -3
  187. package/dist/orchestration/steps/report-step.js +17 -4
  188. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  189. package/dist/orchestration/steps/run-eval-step.js +51 -31
  190. package/dist/pipeline/agent-behavior-report.js +6 -0
  191. package/dist/pipeline/attribution.d.ts +1 -1
  192. package/dist/pipeline/attribution.js +1 -1
  193. package/dist/pipeline/cache.js +29 -15
  194. package/dist/pipeline/calculate-scores.d.ts +2 -0
  195. package/dist/pipeline/calculate-scores.js +70 -33
  196. package/dist/pipeline/chronic-failures.d.ts +55 -0
  197. package/dist/pipeline/chronic-failures.js +110 -0
  198. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  199. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  200. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  201. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
  202. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  203. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  204. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  205. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  206. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  207. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  208. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  209. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  210. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  211. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  212. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  213. package/dist/pipeline/compiler/config-loader.js +42 -2
  214. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  215. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  216. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  217. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  218. package/dist/pipeline/compiler/index.d.ts +2 -5
  219. package/dist/pipeline/compiler/index.js +2 -5
  220. package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
  221. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  222. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  223. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  224. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  225. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  226. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  227. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  228. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
  229. package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
  230. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  231. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  232. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  233. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  234. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  235. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  236. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  237. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  238. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  239. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  240. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  241. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  242. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  245. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  246. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  247. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  248. package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
  249. package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
  250. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  251. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  252. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  253. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  254. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  255. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  256. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  257. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  258. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  259. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  260. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  261. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  262. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  263. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  264. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  265. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  266. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  267. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  268. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  269. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  270. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  271. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  272. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  273. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  274. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
  275. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  276. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  277. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  278. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  279. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  280. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  281. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  282. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  283. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  284. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
  285. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  286. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  287. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  288. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  289. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
  290. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
  291. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  292. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
  293. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  294. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
  295. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  296. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  297. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
  298. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
  299. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
  300. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  301. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  302. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  303. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  304. package/dist/pipeline/compiler/preset-loader.js +99 -0
  305. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
  306. package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
  307. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  308. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  309. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  310. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  311. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  312. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  313. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  314. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  315. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  316. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  317. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  318. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  319. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  320. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  321. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  322. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  323. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  324. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  325. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  326. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  327. package/dist/pipeline/compiler/task-bridge.js +92 -0
  328. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  329. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  330. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  331. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  332. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  333. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  334. package/dist/pipeline/coverage-audit.d.ts +1 -1
  335. package/dist/pipeline/coverage-audit.js +1 -1
  336. package/dist/pipeline/degradations.d.ts +1 -1
  337. package/dist/pipeline/degradations.js +1 -1
  338. package/dist/pipeline/expand-tasks.d.ts +2 -2
  339. package/dist/pipeline/expand-tasks.js +2 -2
  340. package/dist/pipeline/failure-modes.d.ts +1 -1
  341. package/dist/pipeline/failure-modes.js +13 -1
  342. package/dist/pipeline/gap-analysis.d.ts +1 -1
  343. package/dist/pipeline/gap-analysis.js +3 -1
  344. package/dist/pipeline/generate-configs.d.ts +2 -2
  345. package/dist/pipeline/generate-configs.js +16 -9
  346. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  347. package/dist/pipeline/grader-compare-runner.js +7 -1
  348. package/dist/pipeline/grader-comparison.d.ts +1 -1
  349. package/dist/pipeline/grader-comparison.js +1 -1
  350. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  351. package/dist/pipeline/grader-consistency-runner.js +7 -1
  352. package/dist/pipeline/grader-consistency.d.ts +1 -1
  353. package/dist/pipeline/grader-consistency.js +1 -1
  354. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  355. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  356. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  357. package/dist/pipeline/grader-sensitivity.js +1 -1
  358. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  359. package/dist/pipeline/grader-validate-runner.js +2 -2
  360. package/dist/pipeline/grader-validation.d.ts +1 -1
  361. package/dist/pipeline/grader-validation.js +1 -1
  362. package/dist/pipeline/map-request-to-config.js +16 -2
  363. package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
  364. package/dist/pipeline/mirror-repo-tasks.js +10 -10
  365. package/dist/pipeline/plan-format.d.ts +1 -1
  366. package/dist/pipeline/plan-format.js +1 -1
  367. package/dist/pipeline/plan.d.ts +1 -1
  368. package/dist/pipeline/plan.js +68 -30
  369. package/dist/pipeline/probe.d.ts +1 -1
  370. package/dist/pipeline/probe.js +1 -1
  371. package/dist/pipeline/readiness-report.d.ts +2 -2
  372. package/dist/pipeline/readiness-report.js +2 -2
  373. package/dist/pipeline/release-classification.d.ts +1 -1
  374. package/dist/pipeline/release-classification.js +1 -1
  375. package/dist/pipeline/release-report.d.ts +1 -1
  376. package/dist/pipeline/release-report.js +1 -1
  377. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  378. package/dist/pipeline/repo-eval-comment.js +1 -1
  379. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  380. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  381. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  382. package/dist/pipeline/resolve-mappings.js +44 -44
  383. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  384. package/dist/pipeline/retrieval-metrics.js +28 -20
  385. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  386. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  387. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  388. package/dist/pipeline/steps/compare-step.js +90 -0
  389. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  390. package/dist/pipeline/steps/eval-step.js +347 -0
  391. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  392. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  393. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  394. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  395. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  396. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  397. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  398. package/dist/pipeline/steps/publish-report-step.js +243 -0
  399. package/dist/pipeline/steps/report-step.d.ts +13 -0
  400. package/dist/pipeline/steps/report-step.js +56 -0
  401. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  402. package/dist/pipeline/steps/update-scores-step.js +42 -0
  403. package/dist/pipeline/targeted-loo.d.ts +1 -1
  404. package/dist/pipeline/targeted-loo.js +1 -1
  405. package/dist/pipeline/thresholds.d.ts +1 -1
  406. package/dist/pipeline/thresholds.js +1 -1
  407. package/dist/pipeline/validate.js +13 -0
  408. package/dist/report-store.d.ts +17 -0
  409. package/dist/report-store.js +24 -0
  410. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  411. package/dist/scripts/agent-behavior-report.js +315 -0
  412. package/dist/scripts/baseline.d.ts +43 -0
  413. package/dist/scripts/baseline.js +267 -0
  414. package/dist/scripts/calculate-scores.d.ts +166 -0
  415. package/dist/scripts/calculate-scores.js +1296 -0
  416. package/dist/scripts/compare.d.ts +22 -0
  417. package/dist/scripts/compare.js +334 -0
  418. package/dist/scripts/coverage-audit.d.ts +44 -0
  419. package/dist/scripts/coverage-audit.js +209 -0
  420. package/dist/scripts/debug-eval.d.ts +19 -0
  421. package/dist/scripts/debug-eval.js +73 -0
  422. package/dist/scripts/discovery-report.d.ts +58 -0
  423. package/dist/scripts/discovery-report.js +250 -0
  424. package/dist/scripts/fetch-docs.d.ts +35 -0
  425. package/dist/scripts/fetch-docs.js +472 -0
  426. package/dist/scripts/generate-configs.d.ts +66 -0
  427. package/dist/scripts/generate-configs.js +459 -0
  428. package/dist/scripts/grader-api.d.ts +27 -0
  429. package/dist/scripts/grader-api.js +206 -0
  430. package/dist/scripts/grader-compare.d.ts +22 -0
  431. package/dist/scripts/grader-compare.js +368 -0
  432. package/dist/scripts/grader-consistency.d.ts +20 -0
  433. package/dist/scripts/grader-consistency.js +313 -0
  434. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  435. package/dist/scripts/grader-sensitivity.js +354 -0
  436. package/dist/scripts/grader-validate.d.ts +19 -0
  437. package/dist/scripts/grader-validate.js +267 -0
  438. package/dist/scripts/measure-retrieval.d.ts +10 -0
  439. package/dist/scripts/measure-retrieval.js +145 -0
  440. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  441. package/dist/scripts/migrate-task-mode.js +1 -1
  442. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  443. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  444. package/dist/scripts/pipeline.d.ts +76 -0
  445. package/dist/scripts/pipeline.js +1031 -0
  446. package/dist/scripts/pr-comment.d.ts +10 -0
  447. package/dist/scripts/pr-comment.js +510 -0
  448. package/dist/scripts/readiness-report.d.ts +88 -0
  449. package/dist/scripts/readiness-report.js +342 -0
  450. package/dist/scripts/update-quality-scores.d.ts +15 -0
  451. package/dist/scripts/update-quality-scores.js +184 -0
  452. package/dist/scripts/validate-task-sources.d.ts +1 -1
  453. package/dist/scripts/validate-task-sources.js +1 -1
  454. package/dist/scripts/validate.d.ts +13 -0
  455. package/dist/scripts/validate.js +79 -0
  456. package/dist/scripts/webhook-server.d.ts +26 -0
  457. package/dist/scripts/webhook-server.js +147 -0
  458. package/dist/scripts/weekly-digest.d.ts +24 -0
  459. package/dist/scripts/weekly-digest.js +144 -0
  460. package/dist/sinks/format-slack.d.ts +64 -0
  461. package/dist/sinks/format-slack.js +306 -0
  462. package/dist/sinks/slack-sink.d.ts +27 -0
  463. package/dist/sinks/slack-sink.js +78 -0
  464. package/dist/sinks/types.d.ts +1 -1
  465. package/dist/sinks/types.js +1 -1
  466. package/dist/sinks/webhook-sink.d.ts +19 -0
  467. package/dist/sinks/webhook-sink.js +50 -0
  468. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  469. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  470. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  471. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  472. package/dist/tasks/literacy/functions.task.ts +70 -0
  473. package/dist/tasks/literacy/groq.task.ts +259 -0
  474. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  475. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  476. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  477. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  478. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  479. package/package.json +32 -24
  480. package/tasks/.expanded.agentic.yaml +280 -0
  481. package/tasks/.expanded.yaml +565 -0
  482. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  483. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  484. package/tasks/literacy/content-lake.task.ts +181 -0
  485. package/tasks/literacy/frameworks.task.ts +1 -0
  486. package/tasks/literacy/functions.task.ts +1 -0
  487. package/tasks/literacy/groq.task.ts +1 -0
  488. package/tasks/literacy/image-handling.task.ts +95 -0
  489. package/tasks/literacy/nextjs-live.task.ts +2 -1
  490. package/tasks/literacy/portable-text.task.ts +169 -0
  491. package/tasks/literacy/studio-setup.task.ts +5 -2
  492. package/tasks/literacy/visual-editing.task.ts +1 -0
  493. package/LICENSE +0 -21
  494. package/tasks/frameworks.yaml +0 -98
  495. package/tasks/functions.yaml +0 -51
  496. package/tasks/groq.yaml +0 -216
  497. package/tasks/nextjs-live.yaml +0 -62
  498. package/tasks/studio-setup.yaml +0 -111
  499. package/tasks/visual-editing.yaml +0 -120
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Environment variable bridge — writes ResolvedConfig values to process.env
3
+ * so that lib/*.ts modules (which still read process.env) work correctly.
4
+ *
5
+ * This replaces the former global applyEnvironment() with an explicit
6
+ * per-step bridge. Each orchestration step calls this before invoking
7
+ * its lib/*.ts main() function.
8
+ *
9
+ * Phase 9 will eliminate this file entirely by giving lib/*.ts main()
10
+ * functions typed option parameters.
11
+ *
12
+ * @see docs/exec-plans/active/ports-and-adapters/phase-8-delete-legacy-step-layer.md
13
+ */
14
+ import type { ResolvedConfig } from "../_vendor/ailf-core/index.d.ts";
15
+ /**
16
+ * Bridge ResolvedConfig values to process.env.
17
+ *
18
+ * Idempotent — safe to call multiple times. Only sets env vars for
19
+ * config values that are defined (never deletes or resets).
20
+ */
21
+ export declare function bridgeConfigToEnv(config: ResolvedConfig): void;
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Environment variable bridge — writes ResolvedConfig values to process.env
3
+ * so that lib/*.ts modules (which still read process.env) work correctly.
4
+ *
5
+ * This replaces the former global applyEnvironment() with an explicit
6
+ * per-step bridge. Each orchestration step calls this before invoking
7
+ * its lib/*.ts main() function.
8
+ *
9
+ * Phase 9 will eliminate this file entirely by giving lib/*.ts main()
10
+ * functions typed option parameters.
11
+ *
12
+ * @see docs/exec-plans/active/ports-and-adapters/phase-8-delete-legacy-step-layer.md
13
+ */
14
+ /**
15
+ * Bridge ResolvedConfig values to process.env.
16
+ *
17
+ * Idempotent — safe to call multiple times. Only sets env vars for
18
+ * config values that are defined (never deletes or resets).
19
+ */
20
+ export function bridgeConfigToEnv(config) {
21
+ // Mode
22
+ process.env.EVAL_MODE = config.mode;
23
+ // Search mode
24
+ if (config.searchMode !== "open") {
25
+ process.env.EVAL_SEARCH_MODE = config.searchMode;
26
+ }
27
+ // Source
28
+ if (config.source) {
29
+ process.env.DOC_SOURCE = config.source;
30
+ }
31
+ // URL-derived overrides
32
+ if (config.urls?.[0]) {
33
+ process.env.DOC_BASE_URL = config.urls[0];
34
+ }
35
+ // Sanity overrides
36
+ if (config.datasetOverride) {
37
+ process.env.SANITY_DATASET = config.datasetOverride;
38
+ }
39
+ if (config.projectIdOverride) {
40
+ process.env.SANITY_PROJECT_ID = config.projectIdOverride;
41
+ }
42
+ if (config.perspectiveOverride) {
43
+ process.env.SANITY_PERSPECTIVE = config.perspectiveOverride;
44
+ }
45
+ if (config.studioOriginOverride) {
46
+ process.env.SANITY_STUDIO_ORIGIN = config.studioOriginOverride;
47
+ }
48
+ if (config.sanityDocumentArgs?.length) {
49
+ process.env.SANITY_DOCUMENT_IDS = config.sanityDocumentArgs.join(",");
50
+ }
51
+ // Custom headers
52
+ if (config.headers) {
53
+ process.env.DOC_HEADERS = JSON.stringify(config.headers);
54
+ }
55
+ // Allowed origins
56
+ if (config.allowedOrigins?.length) {
57
+ process.env.DOC_ALLOWED_ORIGINS = config.allowedOrigins.join(",");
58
+ }
59
+ // Scoping filters
60
+ if (config.areas) {
61
+ process.env.EVAL_FILTER_AREAS = config.areas.join(",");
62
+ }
63
+ if (config.tasks) {
64
+ process.env.EVAL_FILTER_TASKS = config.tasks.join(",");
65
+ }
66
+ }
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Shared task loading for pipeline orchestration steps.
3
+ *
4
+ * Both FetchDocsStep and GenerateConfigsStep need to see the same set of
5
+ * tasks. This function loads from filesystem .task.ts files — the
6
+ * authoritative source for the current pipeline architecture.
7
+ *
8
+ * Background: The composition root wires ctx.taskSource to
9
+ * ContentLakeTaskSource by default, but GenerateConfigsStep bypasses it
10
+ * and loads directly from the filesystem. FetchDocsStep must use the
11
+ * same source to avoid a mismatch where configs reference context files
12
+ * that were never fetched.
13
+ *
14
+ * @see packages/eval/src/orchestration/steps/generate-configs-step.ts
15
+ * @see packages/eval/src/orchestration/steps/fetch-docs-step.ts
16
+ */
17
+ import type { GeneralizedTaskDefinition } from "../_vendor/ailf-core/index.d.ts";
18
+ export interface LoadPipelineTasksOptions {
19
+ /** Absolute path to the eval package root (packages/eval) */
20
+ rootDir: string;
21
+ /** Evaluation mode — determines the tasks/{mode}/ subdirectory */
22
+ mode: string;
23
+ /** Optional extra directory for repo-based tasks (--repo-tasks-path) */
24
+ repoTasksPath?: string;
25
+ }
26
+ /**
27
+ * Load task definitions from the filesystem, matching the pipeline's
28
+ * authoritative task source.
29
+ *
30
+ * Discovers and loads `*.task.ts` files from `tasks/{mode}/` and
31
+ * optionally `--repo-tasks-path`. Tasks whose `mode` field doesn't
32
+ * match the requested mode are excluded.
33
+ */
34
+ export declare function loadPipelineTasks(opts: LoadPipelineTasksOptions): Promise<GeneralizedTaskDefinition[]>;
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Shared task loading for pipeline orchestration steps.
3
+ *
4
+ * Both FetchDocsStep and GenerateConfigsStep need to see the same set of
5
+ * tasks. This function loads from filesystem .task.ts files — the
6
+ * authoritative source for the current pipeline architecture.
7
+ *
8
+ * Background: The composition root wires ctx.taskSource to
9
+ * ContentLakeTaskSource by default, but GenerateConfigsStep bypasses it
10
+ * and loads directly from the filesystem. FetchDocsStep must use the
11
+ * same source to avoid a mismatch where configs reference context files
12
+ * that were never fetched.
13
+ *
14
+ * @see packages/eval/src/orchestration/steps/generate-configs-step.ts
15
+ * @see packages/eval/src/orchestration/steps/fetch-docs-step.ts
16
+ */
17
+ import { resolve } from "path";
18
+ import { discoverTsTaskFiles, loadTsTaskFile, } from "../adapters/task-sources/task-file-loader.js";
19
+ import { resolveVendoredSubdir } from "../pipeline/compiler/config-loader.js";
20
+ /**
21
+ * Load task definitions from the filesystem, matching the pipeline's
22
+ * authoritative task source.
23
+ *
24
+ * Discovers and loads `*.task.ts` files from `tasks/{mode}/` and
25
+ * optionally `--repo-tasks-path`. Tasks whose `mode` field doesn't
26
+ * match the requested mode are excluded.
27
+ */
28
+ export async function loadPipelineTasks(opts) {
29
+ const tasksDir = resolveVendoredSubdir(opts.rootDir, `tasks/${opts.mode}`);
30
+ const dirs = [tasksDir];
31
+ if (opts.repoTasksPath) {
32
+ const repoDir = resolve(opts.repoTasksPath);
33
+ if (!dirs.includes(repoDir)) {
34
+ dirs.push(repoDir);
35
+ }
36
+ }
37
+ const tasks = [];
38
+ for (const dir of dirs) {
39
+ const files = discoverTsTaskFiles(dir);
40
+ for (const file of files) {
41
+ const raw = await loadTsTaskFile(file);
42
+ for (const t of raw.tasks) {
43
+ const task = t;
44
+ // Filter to matching mode (skip tasks from other modes in same dir)
45
+ if (!("mode" in task) || task.mode === opts.mode) {
46
+ tasks.push(task);
47
+ }
48
+ }
49
+ }
50
+ }
51
+ return tasks;
52
+ }
@@ -20,10 +20,20 @@ import { runStep } from "./step-runner.js";
20
20
  * underlying Sanity client. Best-effort — failures are logged and
21
21
  * never block the pipeline.
22
22
  */
23
- async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, status, errorInfo) {
23
+ async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, status, errorInfo, jobUpdates) {
24
24
  const jobId = ctx.config.jobId;
25
25
  if (!jobId)
26
26
  return;
27
+ // Accumulate update for artifact capture
28
+ jobUpdates?.push({
29
+ jobId,
30
+ stepName,
31
+ completedSteps,
32
+ totalSteps,
33
+ status,
34
+ errorInfo,
35
+ timestamp: new Date().toISOString(),
36
+ });
27
37
  // Use the report store's write capability to patch the job document.
28
38
  // The report store exposes a Sanity client — we access it through
29
39
  // a best-effort PATCH via the same client infrastructure.
@@ -59,6 +69,51 @@ async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, stat
59
69
  }
60
70
  }
61
71
  // ---------------------------------------------------------------------------
72
+ // Artifact capture
73
+ // ---------------------------------------------------------------------------
74
+ /**
75
+ * Capture a snapshot of the pipeline config, final state, and step results.
76
+ * Strips secrets (API keys, tokens) from the config.
77
+ */
78
+ function capturePipelineContext(ctx, state, results) {
79
+ if (!ctx.collector.enabled)
80
+ return;
81
+ const sanitized = Object.fromEntries(Object.entries(ctx.config).filter(([k]) => !/token|secret|key/i.test(k)));
82
+ ctx.collector.capture("pipeline", "pipeline-context", {
83
+ config: sanitized,
84
+ state: {
85
+ reportId: state.reportId,
86
+ evalFingerprint: state.evalFingerprint,
87
+ belowCritical: state.belowCritical,
88
+ remoteCacheHits: state.remoteCacheHits
89
+ ? [...state.remoteCacheHits]
90
+ : undefined,
91
+ releaseAutoScope: state.releaseAutoScope,
92
+ testSummary: state.testSummary,
93
+ },
94
+ steps: Object.entries(results).map(([name, result]) => ({
95
+ name,
96
+ status: result.status,
97
+ durationMs: result.status !== "skipped" ? result.durationMs : undefined,
98
+ })),
99
+ });
100
+ }
101
+ /**
102
+ * Flush captured artifacts to disk. Non-blocking — failures are logged
103
+ * but never affect the pipeline result.
104
+ */
105
+ async function flushArtifacts(ctx) {
106
+ if (!ctx.collector.enabled)
107
+ return;
108
+ try {
109
+ const result = await ctx.collector.flush();
110
+ ctx.logger.info(`Captured ${result.artifactCount} artifacts → ${result.destination}`);
111
+ }
112
+ catch (err) {
113
+ ctx.logger.warn(`Artifact capture flush failed: ${err instanceof Error ? err.message : err}`);
114
+ }
115
+ }
116
+ // ---------------------------------------------------------------------------
62
117
  // Orchestrator
63
118
  // ---------------------------------------------------------------------------
64
119
  /**
@@ -76,6 +131,7 @@ export async function orchestratePipeline(ctx, steps) {
76
131
  const validation = { issues: [], valid: true };
77
132
  const pipelineStart = Date.now();
78
133
  const hasJob = !!ctx.config.jobId;
134
+ const jobUpdates = [];
79
135
  ctx.logger.section("ai-literacy-framework — Evaluation Pipeline");
80
136
  ctx.logger.debug(`Pipeline starting with ${steps.length} steps`, {
81
137
  steps: steps.map((s) => s.name),
@@ -86,7 +142,7 @@ export async function orchestratePipeline(ctx, steps) {
86
142
  });
87
143
  // Report initial running status
88
144
  if (hasJob) {
89
- await reportJobProgress(ctx, steps[0]?.name ?? "init", 0, steps.length, "running");
145
+ await reportJobProgress(ctx, steps[0]?.name ?? "init", 0, steps.length, "running", undefined, jobUpdates);
90
146
  }
91
147
  for (let i = 0; i < steps.length; i++) {
92
148
  const step = steps[i];
@@ -94,7 +150,7 @@ export async function orchestratePipeline(ctx, steps) {
94
150
  ctx.logger.section(step.name);
95
151
  // Report current step progress
96
152
  if (hasJob) {
97
- await reportJobProgress(ctx, step.name, i, steps.length, "running");
153
+ await reportJobProgress(ctx, step.name, i, steps.length, "running", undefined, jobUpdates);
98
154
  }
99
155
  const result = await runStep(step, ctx, state);
100
156
  results[step.name] = result;
@@ -111,8 +167,15 @@ export async function orchestratePipeline(ctx, steps) {
111
167
  await reportJobProgress(ctx, step.name, i + 1, steps.length, "failed", {
112
168
  message: failedError,
113
169
  step: step.name,
114
- });
170
+ }, jobUpdates);
115
171
  }
172
+ // Capture pipeline context and job updates before flushing
173
+ capturePipelineContext(ctx, state, results);
174
+ if (jobUpdates.length > 0) {
175
+ ctx.collector.capture("job-store", "job-updates", jobUpdates);
176
+ }
177
+ // Flush captured artifacts even on failure (partial capture is useful)
178
+ await flushArtifacts(ctx);
116
179
  return {
117
180
  belowCritical: state.belowCritical,
118
181
  durationMs: Date.now() - pipelineStart,
@@ -129,7 +192,7 @@ export async function orchestratePipeline(ctx, steps) {
129
192
  }
130
193
  // Report step completion
131
194
  if (hasJob) {
132
- await reportJobProgress(ctx, step.name, i + 1, steps.length, "running");
195
+ await reportJobProgress(ctx, step.name, i + 1, steps.length, "running", undefined, jobUpdates);
133
196
  }
134
197
  }
135
198
  const durationMs = Date.now() - pipelineStart;
@@ -166,6 +229,13 @@ export async function orchestratePipeline(ctx, steps) {
166
229
  ctx.logger.warn("Failed to report job completion — continuing");
167
230
  }
168
231
  }
232
+ // Capture pipeline context and job updates before flushing
233
+ capturePipelineContext(ctx, state, results);
234
+ if (jobUpdates.length > 0) {
235
+ ctx.collector.capture("job-store", "job-updates", jobUpdates);
236
+ }
237
+ // Flush captured artifacts (non-blocking — failures never affect pipeline result)
238
+ await flushArtifacts(ctx);
169
239
  return {
170
240
  belowCritical: state.belowCritical,
171
241
  durationMs,
@@ -36,8 +36,12 @@ export async function runStep(step, ctx, state = {}) {
36
36
  if (canCache) {
37
37
  try {
38
38
  const inputs = step.cacheInputs(ctx);
39
+ const context = step.cacheContext?.(ctx);
39
40
  ctx.logger.debug(`[${step.name}] Cache inputs: ${inputs.length} files`);
40
- const key = await ctx.cache.computeKey(inputs);
41
+ if (context?.length) {
42
+ ctx.logger.debug(`[${step.name}] Cache context: ${context.join(", ")}`);
43
+ }
44
+ const key = await ctx.cache.computeKey(inputs, context);
41
45
  cacheKey = key;
42
46
  ctx.logger.debug(`[${step.name}] Cache key: ${key}`);
43
47
  const cached = await ctx.cache.lookup(step.name, key);
@@ -10,4 +10,5 @@ export declare class CalculateScoresStep implements PipelineStep {
10
10
  check(): ValidationIssue[];
11
11
  execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
12
12
  cacheInputs(ctx: AppContext): string[];
13
+ cacheContext(ctx: AppContext): string[];
13
14
  }
@@ -4,9 +4,11 @@
4
4
  * Calls calculateAndWriteScores() from pipeline/calculate-scores.ts with
5
5
  * typed options derived from AppContext. No env bridge needed.
6
6
  */
7
+ import { existsSync } from "node:fs";
7
8
  import { join } from "path";
8
9
  import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
9
10
  import { getStepInputPaths } from "../../pipeline/cache.js";
11
+ import { buildCacheContext } from "../cache-context.js";
10
12
  import { calculateAndWriteScores } from "../../pipeline/calculate-scores.js";
11
13
  import { checkResultsExist, checkScoreSummaryValid, } from "../../pipeline/checks.js";
12
14
  import { resultsFileForMode } from "../../pipeline/eval-constants.js";
@@ -118,6 +120,14 @@ export class CalculateScoresStep {
118
120
  if (belowCritical.length > 0) {
119
121
  state.belowCritical = belowCritical;
120
122
  }
123
+ // Capture score artifacts
124
+ const resultsDir = join(ctx.config.rootDir, "results", "latest");
125
+ for (const file of ["score-summary.json", "grader-judgments.json"]) {
126
+ const filePath = join(resultsDir, file);
127
+ if (existsSync(filePath)) {
128
+ ctx.collector.captureFile("calculate-scores", file.replace(".json", ""), filePath);
129
+ }
130
+ }
121
131
  const criticalSuffix = belowCritical.length > 0
122
132
  ? ` (${belowCritical.length} area(s) below critical threshold: ${belowCritical.join(", ")})`
123
133
  : "";
@@ -130,4 +140,7 @@ export class CalculateScoresStep {
130
140
  cacheInputs(ctx) {
131
141
  return getStepInputPaths(ctx.config.rootDir, "calculate-scores");
132
142
  }
143
+ cacheContext(ctx) {
144
+ return buildCacheContext(ctx.config);
145
+ }
133
146
  }
@@ -52,11 +52,20 @@ export class CallbackStep {
52
52
  }
53
53
  // Deliver callback — read reportId from pipeline state (set by PublishReportStep)
54
54
  ctx.logger.info(`Delivering results to ${this.callback.url}`);
55
- const result = await deliverCallback(this.callback, {
55
+ const callbackPayload = {
56
56
  deliveredAt: new Date().toISOString(),
57
57
  jobId: this.jobId,
58
58
  reportId: state.reportId,
59
59
  summary,
60
+ };
61
+ // Capture callback payload (Tier 2 — no secrets: headers are NOT captured)
62
+ ctx.collector.capture("callback", "callback-payload", callbackPayload);
63
+ const result = await deliverCallback(this.callback, callbackPayload);
64
+ // Capture callback response status (not the body — that's the user's system)
65
+ ctx.collector.capture("callback", "callback-response", {
66
+ ok: result.ok,
67
+ attempts: result.attempts,
68
+ error: result.error,
60
69
  });
61
70
  if (result.ok) {
62
71
  return {
@@ -5,7 +5,7 @@
5
5
  * inlined directly from the former pipeline/steps/compare-step.ts.
6
6
  * This is an optional step — failure doesn't stop the pipeline.
7
7
  */
8
- import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
8
+ import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync, } from "fs";
9
9
  import { join, resolve } from "path";
10
10
  import { compare } from "../../pipeline/compare.js";
11
11
  export class CompareStep {
@@ -65,9 +65,12 @@ export class CompareStep {
65
65
  ? { noiseThreshold: ctx.config.compareThreshold }
66
66
  : undefined;
67
67
  const report = compare(baseline, experiment, options);
68
- // Write report
69
- const reportPath = resolve(rootDir, "results", "latest", "comparison-report.json");
68
+ // Write report to outputDir (respects --output-dir)
69
+ mkdirSync(ctx.config.outputDir, { recursive: true });
70
+ const reportPath = resolve(ctx.config.outputDir, "comparison-report.json");
70
71
  writeFileSync(reportPath, JSON.stringify(report, null, 2));
72
+ // Capture comparison report
73
+ ctx.collector.captureFile("compare", "comparison-report", reportPath);
71
74
  // Build summary
72
75
  const improved = report.improved.length;
73
76
  const regressed = report.regressed.length;
@@ -4,7 +4,7 @@
4
4
  * Calls pure functions from pipeline/discovery-report.ts directly.
5
5
  * Optional step — failure doesn't stop the pipeline.
6
6
  */
7
- import { existsSync, readFileSync, writeFileSync } from "fs";
7
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
8
8
  import { resolve } from "path";
9
9
  import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../../pipeline/discovery-report.js";
10
10
  export class DiscoveryReportStep {
@@ -34,7 +34,11 @@ export class DiscoveryReportStep {
34
34
  }
35
35
  const report = generateDiscoveryReport(scoreSummary, ctx.config.areas);
36
36
  const md = formatDiscoveryMarkdown(report);
37
- writeFileSync(resolve(root, "results", "latest", "discovery-report.md"), md);
37
+ // Write to outputDir (respects --output-dir)
38
+ mkdirSync(ctx.config.outputDir, { recursive: true });
39
+ const discoveryPath = resolve(ctx.config.outputDir, "discovery-report.md");
40
+ writeFileSync(discoveryPath, md);
41
+ ctx.collector.captureFile("discovery-report", "discovery-report", discoveryPath);
38
42
  console.log(md);
39
43
  const invisible = report.invisibleDocs.length;
40
44
  const f1 = report.overall.avgF1.toFixed(2);
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Shell delegation for the fetch-docs step.
3
+ *
4
+ * Isolates the execSync call so it can be replaced when the pipeline
5
+ * fully migrates to the DocFetcher port.
6
+ */
7
+ export interface ShellResult {
8
+ ok: boolean;
9
+ error?: string;
10
+ }
11
+ /**
12
+ * Run `pnpm fetch-docs` via shell.
13
+ *
14
+ * Returns a result object instead of throwing so the step can
15
+ * handle the failure uniformly.
16
+ */
17
+ export declare function runFetchDocsShell(rootDir: string, source?: string): ShellResult;
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Shell delegation for the fetch-docs step.
3
+ *
4
+ * Isolates the execSync call so it can be replaced when the pipeline
5
+ * fully migrates to the DocFetcher port.
6
+ */
7
+ import { execSync } from "child_process";
8
+ /**
9
+ * Run `pnpm fetch-docs` via shell.
10
+ *
11
+ * Returns a result object instead of throwing so the step can
12
+ * handle the failure uniformly.
13
+ */
14
+ export function runFetchDocsShell(rootDir, source) {
15
+ try {
16
+ const sourceArg = source ? ` --source ${source}` : "";
17
+ execSync(`pnpm fetch-docs${sourceArg}`, {
18
+ cwd: rootDir,
19
+ env: process.env,
20
+ stdio: "inherit",
21
+ });
22
+ return { ok: true };
23
+ }
24
+ catch (err) {
25
+ return {
26
+ ok: false,
27
+ error: err instanceof Error ? err.message : String(err),
28
+ };
29
+ }
30
+ }
@@ -16,4 +16,5 @@ export declare class FetchDocsStep implements PipelineStep {
16
16
  check(): ValidationIssue[];
17
17
  execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
18
18
  cacheInputs(ctx: AppContext): string[];
19
+ cacheContext(ctx: AppContext): string[];
19
20
  }
@@ -10,11 +10,13 @@
10
10
  * and stores a `releaseAutoScope` entry in PipelineState. Downstream
11
11
  * steps (GenerateConfigsStep, RunEvalStep) use this to narrow scope.
12
12
  */
13
- import { mkdirSync, writeFileSync } from "fs";
13
+ import { existsSync, mkdirSync, writeFileSync } from "fs";
14
14
  import { join } from "path";
15
15
  import { isIdRef, isPathRef, isSlugRef, } from "../../_vendor/ailf-core/index.js";
16
16
  import { getStepInputPaths } from "../../pipeline/cache.js";
17
+ import { buildCacheContext } from "../cache-context.js";
17
18
  import { checkCanonicalContextsExist } from "../../pipeline/checks.js";
19
+ import { loadPipelineTasks } from "../load-pipeline-tasks.js";
18
20
  import { loadSource } from "../../sources.js";
19
21
  import { configToSourceOverrides } from "../config-to-source-overrides.js";
20
22
  export class FetchDocsStep {
@@ -27,16 +29,22 @@ export class FetchDocsStep {
27
29
  return { status: "skipped", reason: "--skip-fetch" };
28
30
  }
29
31
  const start = Date.now();
30
- // Precondition: at least one task has canonical doc mappings
31
- const allTasks = await ctx.taskSource.loadTasks(buildFilter(ctx));
32
+ // Load tasks from the filesystem the same source GenerateConfigsStep
33
+ // uses. This replaces ctx.taskSource (ContentLakeTaskSource) which may
34
+ // have no ailf.task documents, causing a mismatch where generated
35
+ // configs reference context files that were never fetched.
36
+ const allTasks = await loadPipelineTasks({
37
+ rootDir: ctx.config.rootDir,
38
+ mode: ctx.config.mode,
39
+ repoTasksPath: ctx.config.repoTasksPath,
40
+ });
32
41
  // Bridge: narrow to literacy tasks for canonical doc access
33
42
  const literacyTasks = allTasks.filter((t) => t.mode === "literacy");
34
43
  const tasksWithDocs = literacyTasks.filter((t) => (t.context?.docs?.length ?? 0) > 0);
35
44
  if (tasksWithDocs.length === 0) {
36
45
  return {
37
- durationMs: Date.now() - start,
38
- error: "No tasks with canonical_docs found. Add canonical_docs to your task definitions.",
39
- status: "failed",
46
+ status: "skipped",
47
+ reason: "No literacy tasks with canonical_docs nothing to fetch",
40
48
  };
41
49
  }
42
50
  // Resolve source once with typed overrides
@@ -72,6 +80,21 @@ export class FetchDocsStep {
72
80
  if (result.metadata) {
73
81
  writeMetadataFiles(ctx.config.rootDir, result.metadata);
74
82
  }
83
+ // Capture metadata files (mode-specific extras)
84
+ if (ctx.collector.extrasEnabled) {
85
+ const contextsDir = join(ctx.config.rootDir, "contexts");
86
+ for (const [type, filename] of [
87
+ ["document-manifest", "document-manifest.json"],
88
+ ["release-impact", "release-impact.json"],
89
+ ["document-overlay", "document-overlay.json"],
90
+ ["url-fetch", "url-fetch.json"],
91
+ ]) {
92
+ const filePath = join(contextsDir, filename);
93
+ if (existsSync(filePath)) {
94
+ ctx.collector.captureFile("fetch-docs", type, filePath);
95
+ }
96
+ }
97
+ }
75
98
  }
76
99
  catch (err) {
77
100
  return {
@@ -118,19 +141,9 @@ export class FetchDocsStep {
118
141
  cacheInputs(ctx) {
119
142
  return getStepInputPaths(ctx.config.rootDir, "fetch-docs");
120
143
  }
121
- }
122
- // ---------------------------------------------------------------------------
123
- // Helpers
124
- // ---------------------------------------------------------------------------
125
- function buildFilter(ctx) {
126
- const { areas, tasks, tags } = ctx.config;
127
- if (!areas && !tasks && !tags)
128
- return undefined;
129
- return {
130
- ...(areas ? { areas } : {}),
131
- ...(tasks ? { taskIds: tasks } : {}),
132
- ...(tags ? { tags } : {}),
133
- };
144
+ cacheContext(ctx) {
145
+ return buildCacheContext(ctx.config);
146
+ }
134
147
  }
135
148
  /**
136
149
  * Write metadata files returned by DocFetcher to the contexts/ directory.
@@ -14,7 +14,7 @@
14
14
  *
15
15
  * This is an optional step — failure doesn't stop the pipeline.
16
16
  */
17
- import { existsSync, readFileSync, writeFileSync } from "fs";
17
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
18
18
  import { join, resolve } from "path";
19
19
  import { isSlugRef } from "../../_vendor/ailf-core/index.js";
20
20
  export class GapAnalysisStep {
@@ -56,7 +56,9 @@ export class GapAnalysisStep {
56
56
  console.log(formatFailureModesConsole(failureModeReport));
57
57
  const gapReport = buildGapAnalysisReport(failureModeReport, scoreSummary.scores);
58
58
  console.log(formatGapAnalysisConsole(gapReport));
59
- const outDir = resolve(root, "results", "latest");
59
+ // Write user-facing artifacts to outputDir (respects --output-dir)
60
+ const outDir = ctx.config.outputDir;
61
+ mkdirSync(outDir, { recursive: true });
60
62
  writeFileSync(join(outDir, "failure-modes.json"), JSON.stringify(failureModeReport, null, 2));
61
63
  writeFileSync(join(outDir, "gap-analysis.json"), JSON.stringify(gapReport, null, 2));
62
64
  const manifestPath = resolve(root, "contexts", "document-manifest.json");
@@ -166,6 +168,15 @@ export class GapAnalysisStep {
166
168
  scores: enrichedScores,
167
169
  };
168
170
  writeFileSync(scoreSummaryPath, JSON.stringify(enrichedSummary, null, 2));
171
+ // Capture gap analysis artifacts
172
+ const failureModesPath = join(outDir, "failure-modes.json");
173
+ if (existsSync(failureModesPath)) {
174
+ ctx.collector.captureFile("gap-analysis", "failure-modes", failureModesPath);
175
+ }
176
+ const gapReportPath = join(outDir, "gap-analysis.json");
177
+ if (existsSync(gapReportPath)) {
178
+ ctx.collector.captureFile("gap-analysis", "gap-report", gapReportPath);
179
+ }
169
180
  const gapCount = gapReport.gaps.length;
170
181
  const classRate = failureModeReport.classificationRate.toFixed(0);
171
182
  return {
@@ -24,4 +24,5 @@ export declare class GenerateConfigsStep implements PipelineStep {
24
24
  private compileAll;
25
25
  private checkLiteracyPostconditions;
26
26
  cacheInputs(ctx: AppContext): string[];
27
+ cacheContext(ctx: AppContext): string[];
27
28
  }