@sanity/ailf 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (499) hide show
  1. package/README.md +0 -1
  2. package/canonical/grader-references/README.md +2 -2
  3. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  4. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  5. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  6. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  7. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  8. package/config/features.ts +1 -1
  9. package/config/models.ts +29 -12
  10. package/config/sources.ts +1 -1
  11. package/config/thresholds.ts +1 -1
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  13. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  17. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  18. package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
  19. package/dist/_vendor/ailf-core/config-helpers.js +51 -2
  20. package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
  21. package/dist/_vendor/ailf-core/examples/index.js +213 -94
  22. package/dist/_vendor/ailf-core/index.d.ts +3 -2
  23. package/dist/_vendor/ailf-core/index.js +2 -1
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  25. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  27. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  28. package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
  29. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  30. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  31. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  32. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  33. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  34. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  35. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  40. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  41. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  42. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  43. package/dist/_vendor/ailf-core/services/index.js +1 -1
  44. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  47. package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
  48. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
  49. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  50. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  51. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  52. package/dist/adapters/api-client/remediation.js +2 -2
  53. package/dist/adapters/config-sources/file-config-adapter.js +7 -1
  54. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  55. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  56. package/dist/adapters/index.d.ts +0 -1
  57. package/dist/adapters/index.js +0 -1
  58. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  59. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  60. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  61. package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
  62. package/dist/adapters/task-sources/index.d.ts +3 -4
  63. package/dist/adapters/task-sources/index.js +3 -4
  64. package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
  65. package/dist/adapters/task-sources/repo-schemas.js +228 -20
  66. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  67. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  68. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  69. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  70. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  71. package/dist/adapters/task-sources/repo-validation.js +126 -5
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
  73. package/dist/adapters/task-sources/task-file-loader.js +21 -7
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/coverage-audit.js +3 -1
  95. package/dist/commands/explain-handler.d.ts +1 -1
  96. package/dist/commands/explain-handler.js +37 -8
  97. package/dist/commands/fetch-docs.js +1 -0
  98. package/dist/commands/generate-configs.d.ts +3 -3
  99. package/dist/commands/generate-configs.js +20 -8
  100. package/dist/commands/init.d.ts +5 -4
  101. package/dist/commands/init.js +190 -25
  102. package/dist/commands/pipeline-action.d.ts +7 -1
  103. package/dist/commands/pipeline-action.js +43 -19
  104. package/dist/commands/pipeline.d.ts +6 -1
  105. package/dist/commands/pipeline.js +7 -2
  106. package/dist/commands/pr-comment.js +1 -0
  107. package/dist/commands/publish.js +1 -0
  108. package/dist/commands/shared/help.js +2 -2
  109. package/dist/commands/update-quality-scores.d.ts +5 -0
  110. package/dist/commands/update-quality-scores.js +20 -0
  111. package/dist/commands/validate-tasks.d.ts +2 -2
  112. package/dist/commands/validate-tasks.js +26 -15
  113. package/dist/composition-root.d.ts +15 -4
  114. package/dist/composition-root.js +100 -55
  115. package/dist/config/features.ts +23 -0
  116. package/dist/config/models.ts +100 -0
  117. package/dist/config/prompts.ts +16 -0
  118. package/dist/config/rubrics.ts +225 -0
  119. package/dist/config/schedules.ts +47 -0
  120. package/dist/config/sinks.ts +37 -0
  121. package/dist/config/sources.ts +21 -0
  122. package/dist/config/thresholds.ts +61 -0
  123. package/dist/index.d.ts +41 -0
  124. package/dist/index.js +48 -0
  125. package/dist/lib/agent-behavior-report.d.ts +8 -0
  126. package/dist/lib/agent-behavior-report.js +185 -0
  127. package/dist/lib/baseline.d.ts +19 -0
  128. package/dist/lib/baseline.js +153 -0
  129. package/dist/lib/calculate-scores.d.ts +23 -0
  130. package/dist/lib/calculate-scores.js +42 -0
  131. package/dist/lib/compare.d.ts +18 -0
  132. package/dist/lib/compare.js +170 -0
  133. package/dist/lib/coverage-audit.d.ts +4 -0
  134. package/dist/lib/coverage-audit.js +42 -0
  135. package/dist/lib/discovery-report.d.ts +13 -0
  136. package/dist/lib/discovery-report.js +57 -0
  137. package/dist/lib/fetch-docs.d.ts +30 -0
  138. package/dist/lib/fetch-docs.js +171 -0
  139. package/dist/lib/generate-configs.d.ts +25 -0
  140. package/dist/lib/generate-configs.js +42 -0
  141. package/dist/lib/grader-api.d.ts +21 -0
  142. package/dist/lib/grader-api.js +34 -0
  143. package/dist/lib/grader-compare.d.ts +19 -0
  144. package/dist/lib/grader-compare.js +91 -0
  145. package/dist/lib/grader-consistency.d.ts +27 -0
  146. package/dist/lib/grader-consistency.js +79 -0
  147. package/dist/lib/grader-sensitivity.d.ts +19 -0
  148. package/dist/lib/grader-sensitivity.js +75 -0
  149. package/dist/lib/grader-validate.d.ts +19 -0
  150. package/dist/lib/grader-validate.js +78 -0
  151. package/dist/lib/measure-retrieval.d.ts +14 -0
  152. package/dist/lib/measure-retrieval.js +71 -0
  153. package/dist/lib/pr-comment.d.ts +16 -0
  154. package/dist/lib/pr-comment.js +28 -0
  155. package/dist/lib/readiness-report.d.ts +13 -0
  156. package/dist/lib/readiness-report.js +108 -0
  157. package/dist/lib/webhook-server.d.ts +11 -0
  158. package/dist/lib/webhook-server.js +24 -0
  159. package/dist/lib/weekly-digest.d.ts +24 -0
  160. package/dist/lib/weekly-digest.js +148 -0
  161. package/dist/orchestration/build-app-context.js +13 -0
  162. package/dist/orchestration/build-step-sequence.js +4 -2
  163. package/dist/orchestration/cache-context.d.ts +23 -0
  164. package/dist/orchestration/cache-context.js +43 -0
  165. package/dist/orchestration/env-bridge.d.ts +21 -0
  166. package/dist/orchestration/env-bridge.js +66 -0
  167. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  168. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  169. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  170. package/dist/orchestration/step-runner.js +5 -1
  171. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  172. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  173. package/dist/orchestration/steps/callback-step.js +10 -1
  174. package/dist/orchestration/steps/compare-step.js +6 -3
  175. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  176. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  177. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  178. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  179. package/dist/orchestration/steps/fetch-docs-step.js +32 -19
  180. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  181. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  182. package/dist/orchestration/steps/generate-configs-step.js +77 -26
  183. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  184. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  185. package/dist/orchestration/steps/publish-report-step.js +19 -0
  186. package/dist/orchestration/steps/readiness-step.js +8 -3
  187. package/dist/orchestration/steps/report-step.js +17 -4
  188. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  189. package/dist/orchestration/steps/run-eval-step.js +51 -31
  190. package/dist/pipeline/agent-behavior-report.js +6 -0
  191. package/dist/pipeline/attribution.d.ts +1 -1
  192. package/dist/pipeline/attribution.js +1 -1
  193. package/dist/pipeline/cache.js +29 -15
  194. package/dist/pipeline/calculate-scores.d.ts +2 -0
  195. package/dist/pipeline/calculate-scores.js +70 -33
  196. package/dist/pipeline/chronic-failures.d.ts +55 -0
  197. package/dist/pipeline/chronic-failures.js +110 -0
  198. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  199. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  200. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  201. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
  202. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  203. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  204. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  205. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  206. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  207. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  208. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  209. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  210. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  211. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  212. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  213. package/dist/pipeline/compiler/config-loader.js +42 -2
  214. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  215. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  216. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  217. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  218. package/dist/pipeline/compiler/index.d.ts +2 -5
  219. package/dist/pipeline/compiler/index.js +2 -5
  220. package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
  221. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  222. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  223. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  224. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  225. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  226. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  227. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  228. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
  229. package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
  230. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  231. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  232. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  233. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  234. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  235. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  236. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  237. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  238. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  239. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  240. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  241. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  242. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  245. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  246. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  247. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  248. package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
  249. package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
  250. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  251. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  252. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  253. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  254. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  255. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  256. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  257. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  258. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  259. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  260. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  261. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  262. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  263. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  264. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  265. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  266. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  267. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  268. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  269. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  270. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  271. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  272. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  273. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  274. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
  275. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  276. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  277. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  278. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  279. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  280. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  281. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  282. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  283. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  284. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
  285. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  286. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  287. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  288. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  289. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
  290. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
  291. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  292. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
  293. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  294. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
  295. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  296. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  297. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
  298. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
  299. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
  300. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  301. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  302. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  303. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  304. package/dist/pipeline/compiler/preset-loader.js +99 -0
  305. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
  306. package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
  307. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  308. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  309. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  310. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  311. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  312. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  313. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  314. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  315. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  316. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  317. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  318. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  319. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  320. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  321. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  322. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  323. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  324. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  325. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  326. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  327. package/dist/pipeline/compiler/task-bridge.js +92 -0
  328. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  329. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  330. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  331. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  332. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  333. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  334. package/dist/pipeline/coverage-audit.d.ts +1 -1
  335. package/dist/pipeline/coverage-audit.js +1 -1
  336. package/dist/pipeline/degradations.d.ts +1 -1
  337. package/dist/pipeline/degradations.js +1 -1
  338. package/dist/pipeline/expand-tasks.d.ts +2 -2
  339. package/dist/pipeline/expand-tasks.js +2 -2
  340. package/dist/pipeline/failure-modes.d.ts +1 -1
  341. package/dist/pipeline/failure-modes.js +13 -1
  342. package/dist/pipeline/gap-analysis.d.ts +1 -1
  343. package/dist/pipeline/gap-analysis.js +3 -1
  344. package/dist/pipeline/generate-configs.d.ts +2 -2
  345. package/dist/pipeline/generate-configs.js +16 -9
  346. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  347. package/dist/pipeline/grader-compare-runner.js +7 -1
  348. package/dist/pipeline/grader-comparison.d.ts +1 -1
  349. package/dist/pipeline/grader-comparison.js +1 -1
  350. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  351. package/dist/pipeline/grader-consistency-runner.js +7 -1
  352. package/dist/pipeline/grader-consistency.d.ts +1 -1
  353. package/dist/pipeline/grader-consistency.js +1 -1
  354. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  355. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  356. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  357. package/dist/pipeline/grader-sensitivity.js +1 -1
  358. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  359. package/dist/pipeline/grader-validate-runner.js +2 -2
  360. package/dist/pipeline/grader-validation.d.ts +1 -1
  361. package/dist/pipeline/grader-validation.js +1 -1
  362. package/dist/pipeline/map-request-to-config.js +16 -2
  363. package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
  364. package/dist/pipeline/mirror-repo-tasks.js +10 -10
  365. package/dist/pipeline/plan-format.d.ts +1 -1
  366. package/dist/pipeline/plan-format.js +1 -1
  367. package/dist/pipeline/plan.d.ts +1 -1
  368. package/dist/pipeline/plan.js +68 -30
  369. package/dist/pipeline/probe.d.ts +1 -1
  370. package/dist/pipeline/probe.js +1 -1
  371. package/dist/pipeline/readiness-report.d.ts +2 -2
  372. package/dist/pipeline/readiness-report.js +2 -2
  373. package/dist/pipeline/release-classification.d.ts +1 -1
  374. package/dist/pipeline/release-classification.js +1 -1
  375. package/dist/pipeline/release-report.d.ts +1 -1
  376. package/dist/pipeline/release-report.js +1 -1
  377. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  378. package/dist/pipeline/repo-eval-comment.js +1 -1
  379. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  380. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  381. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  382. package/dist/pipeline/resolve-mappings.js +44 -44
  383. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  384. package/dist/pipeline/retrieval-metrics.js +28 -20
  385. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  386. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  387. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  388. package/dist/pipeline/steps/compare-step.js +90 -0
  389. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  390. package/dist/pipeline/steps/eval-step.js +347 -0
  391. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  392. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  393. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  394. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  395. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  396. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  397. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  398. package/dist/pipeline/steps/publish-report-step.js +243 -0
  399. package/dist/pipeline/steps/report-step.d.ts +13 -0
  400. package/dist/pipeline/steps/report-step.js +56 -0
  401. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  402. package/dist/pipeline/steps/update-scores-step.js +42 -0
  403. package/dist/pipeline/targeted-loo.d.ts +1 -1
  404. package/dist/pipeline/targeted-loo.js +1 -1
  405. package/dist/pipeline/thresholds.d.ts +1 -1
  406. package/dist/pipeline/thresholds.js +1 -1
  407. package/dist/pipeline/validate.js +13 -0
  408. package/dist/report-store.d.ts +17 -0
  409. package/dist/report-store.js +24 -0
  410. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  411. package/dist/scripts/agent-behavior-report.js +315 -0
  412. package/dist/scripts/baseline.d.ts +43 -0
  413. package/dist/scripts/baseline.js +267 -0
  414. package/dist/scripts/calculate-scores.d.ts +166 -0
  415. package/dist/scripts/calculate-scores.js +1296 -0
  416. package/dist/scripts/compare.d.ts +22 -0
  417. package/dist/scripts/compare.js +334 -0
  418. package/dist/scripts/coverage-audit.d.ts +44 -0
  419. package/dist/scripts/coverage-audit.js +209 -0
  420. package/dist/scripts/debug-eval.d.ts +19 -0
  421. package/dist/scripts/debug-eval.js +73 -0
  422. package/dist/scripts/discovery-report.d.ts +58 -0
  423. package/dist/scripts/discovery-report.js +250 -0
  424. package/dist/scripts/fetch-docs.d.ts +35 -0
  425. package/dist/scripts/fetch-docs.js +472 -0
  426. package/dist/scripts/generate-configs.d.ts +66 -0
  427. package/dist/scripts/generate-configs.js +459 -0
  428. package/dist/scripts/grader-api.d.ts +27 -0
  429. package/dist/scripts/grader-api.js +206 -0
  430. package/dist/scripts/grader-compare.d.ts +22 -0
  431. package/dist/scripts/grader-compare.js +368 -0
  432. package/dist/scripts/grader-consistency.d.ts +20 -0
  433. package/dist/scripts/grader-consistency.js +313 -0
  434. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  435. package/dist/scripts/grader-sensitivity.js +354 -0
  436. package/dist/scripts/grader-validate.d.ts +19 -0
  437. package/dist/scripts/grader-validate.js +267 -0
  438. package/dist/scripts/measure-retrieval.d.ts +10 -0
  439. package/dist/scripts/measure-retrieval.js +145 -0
  440. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  441. package/dist/scripts/migrate-task-mode.js +1 -1
  442. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  443. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  444. package/dist/scripts/pipeline.d.ts +76 -0
  445. package/dist/scripts/pipeline.js +1031 -0
  446. package/dist/scripts/pr-comment.d.ts +10 -0
  447. package/dist/scripts/pr-comment.js +510 -0
  448. package/dist/scripts/readiness-report.d.ts +88 -0
  449. package/dist/scripts/readiness-report.js +342 -0
  450. package/dist/scripts/update-quality-scores.d.ts +15 -0
  451. package/dist/scripts/update-quality-scores.js +184 -0
  452. package/dist/scripts/validate-task-sources.d.ts +1 -1
  453. package/dist/scripts/validate-task-sources.js +1 -1
  454. package/dist/scripts/validate.d.ts +13 -0
  455. package/dist/scripts/validate.js +79 -0
  456. package/dist/scripts/webhook-server.d.ts +26 -0
  457. package/dist/scripts/webhook-server.js +147 -0
  458. package/dist/scripts/weekly-digest.d.ts +24 -0
  459. package/dist/scripts/weekly-digest.js +144 -0
  460. package/dist/sinks/format-slack.d.ts +64 -0
  461. package/dist/sinks/format-slack.js +306 -0
  462. package/dist/sinks/slack-sink.d.ts +27 -0
  463. package/dist/sinks/slack-sink.js +78 -0
  464. package/dist/sinks/types.d.ts +1 -1
  465. package/dist/sinks/types.js +1 -1
  466. package/dist/sinks/webhook-sink.d.ts +19 -0
  467. package/dist/sinks/webhook-sink.js +50 -0
  468. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  469. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  470. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  471. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  472. package/dist/tasks/literacy/functions.task.ts +70 -0
  473. package/dist/tasks/literacy/groq.task.ts +259 -0
  474. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  475. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  476. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  477. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  478. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  479. package/package.json +32 -24
  480. package/tasks/.expanded.agentic.yaml +280 -0
  481. package/tasks/.expanded.yaml +565 -0
  482. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  483. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  484. package/tasks/literacy/content-lake.task.ts +181 -0
  485. package/tasks/literacy/frameworks.task.ts +1 -0
  486. package/tasks/literacy/functions.task.ts +1 -0
  487. package/tasks/literacy/groq.task.ts +1 -0
  488. package/tasks/literacy/image-handling.task.ts +95 -0
  489. package/tasks/literacy/nextjs-live.task.ts +2 -1
  490. package/tasks/literacy/portable-text.task.ts +169 -0
  491. package/tasks/literacy/studio-setup.task.ts +5 -2
  492. package/tasks/literacy/visual-editing.task.ts +1 -0
  493. package/LICENSE +0 -21
  494. package/tasks/frameworks.yaml +0 -98
  495. package/tasks/functions.yaml +0 -51
  496. package/tasks/groq.yaml +0 -216
  497. package/tasks/nextjs-live.yaml +0 -62
  498. package/tasks/studio-setup.yaml +0 -111
  499. package/tasks/visual-editing.yaml +0 -120
@@ -0,0 +1,493 @@
1
+ /**
2
+ * CaptureComparator — compares two capture directories and produces a diff report.
3
+ *
4
+ * Reads manifest.json from both directories and computes:
5
+ * - Inventory diff (added/removed/common artifacts)
6
+ * - Content diff (structural or strict, for common artifacts)
7
+ * - Score comparison (from score-summary.json)
8
+ * - Timing comparison (from pipeline-context.json)
9
+ * - Metadata comparison (mode, variant, config keys)
10
+ * - Security scan (regex for leaked secrets)
11
+ *
12
+ * Implementation for the types defined in @sanity/ailf-core.
13
+ */
14
+ import { existsSync, readFileSync } from "node:fs";
15
+ import { join } from "node:path";
16
+ // ---------------------------------------------------------------------------
17
+ // Defaults
18
+ // ---------------------------------------------------------------------------
19
+ const DEFAULT_OPTIONS = {
20
+ mode: "inventory",
21
+ scoreThresholds: { aggregate: 5, perTask: 10 },
22
+ timingThresholds: { multiplier: 2.0 },
23
+ jsonDiffDepth: 3,
24
+ };
25
+ const DEFAULT_EPHEMERAL_FIELDS = new Set([
26
+ "captureId",
27
+ "startedAt",
28
+ "completedAt",
29
+ "capturedAt",
30
+ "durationMs",
31
+ ]);
32
+ // ---------------------------------------------------------------------------
33
+ // Public API
34
+ // ---------------------------------------------------------------------------
35
+ /**
36
+ * Compare two capture directories and produce a structured diff report.
37
+ *
38
+ * @param baselineDir - Path to the baseline capture directory (contains manifest.json)
39
+ * @param experimentDir - Path to the experiment capture directory
40
+ * @param opts - Comparison options (mode, thresholds, etc.)
41
+ */
42
+ export function compareCaptures(baselineDir, experimentDir, opts) {
43
+ const options = { ...DEFAULT_OPTIONS, ...opts };
44
+ const ephemeral = new Set([
45
+ ...DEFAULT_EPHEMERAL_FIELDS,
46
+ ...(options.ephemeralFields ?? []),
47
+ ]);
48
+ const baselineManifest = readManifest(baselineDir);
49
+ const experimentManifest = readManifest(experimentDir);
50
+ // Inventory diff
51
+ const inventory = computeInventoryDiff(baselineManifest, experimentManifest);
52
+ // Security scan (always runs)
53
+ const security = scanForSecrets(baselineDir, experimentDir, baselineManifest, experimentManifest);
54
+ // Content diff (structural/strict only)
55
+ let content;
56
+ if (options.mode !== "inventory" && inventory.common.length > 0) {
57
+ content = computeContentDiffs(baselineDir, experimentDir, baselineManifest, experimentManifest, inventory.common, options.mode, options.jsonDiffDepth ?? 3, ephemeral);
58
+ }
59
+ // Score comparison
60
+ const scores = computeScoreDiff(baselineDir, experimentDir, baselineManifest, experimentManifest, options.scoreThresholds ?? DEFAULT_OPTIONS.scoreThresholds);
61
+ // Timing comparison
62
+ const timing = computeTimingDiff(baselineDir, experimentDir, baselineManifest, experimentManifest, options.timingThresholds ?? DEFAULT_OPTIONS.timingThresholds);
63
+ // Metadata comparison
64
+ const metadata = computeMetadataDiff(baselineDir, experimentDir, baselineManifest, experimentManifest, ephemeral);
65
+ // Determine equivalence
66
+ const violations = [];
67
+ if (inventory.added.length > 0)
68
+ violations.push(`${inventory.added.length} artifact(s) added`);
69
+ if (inventory.removed.length > 0)
70
+ violations.push(`${inventory.removed.length} artifact(s) removed`);
71
+ if (content && content.length > 0)
72
+ violations.push(`${content.length} artifact(s) changed`);
73
+ if (scores?.breaches.length)
74
+ violations.push(`${scores.breaches.length} score breach(es)`);
75
+ if (timing?.breaches.length)
76
+ violations.push(`${timing.breaches.length} timing breach(es)`);
77
+ if (security.leaksFound)
78
+ violations.push(`${security.violations.length} secret leak(s)`);
79
+ const equivalent = violations.length === 0;
80
+ return {
81
+ equivalent,
82
+ summary: equivalent
83
+ ? "Captures are equivalent."
84
+ : `Differences found: ${violations.join("; ")}.`,
85
+ mode: options.mode,
86
+ inventory,
87
+ ...(content ? { content } : {}),
88
+ ...(scores ? { scores } : {}),
89
+ ...(timing ? { timing } : {}),
90
+ ...(metadata ? { metadata } : {}),
91
+ security,
92
+ };
93
+ }
94
+ // ---------------------------------------------------------------------------
95
+ // Manifest reading
96
+ // ---------------------------------------------------------------------------
97
+ function readManifest(dir) {
98
+ const manifestPath = join(dir, "manifest.json");
99
+ if (!existsSync(manifestPath)) {
100
+ throw new Error(`No manifest.json found in ${dir}`);
101
+ }
102
+ return JSON.parse(readFileSync(manifestPath, "utf-8"));
103
+ }
104
+ // ---------------------------------------------------------------------------
105
+ // Inventory diff
106
+ // ---------------------------------------------------------------------------
107
+ function artifactKey(entry) {
108
+ return `${entry.step}/${entry.type}`;
109
+ }
110
+ function computeInventoryDiff(baseline, experiment) {
111
+ const baselineKeys = new Set(baseline.artifacts.map(artifactKey));
112
+ const experimentKeys = new Set(experiment.artifacts.map(artifactKey));
113
+ const added = [...experimentKeys].filter((k) => !baselineKeys.has(k));
114
+ const removed = [...baselineKeys].filter((k) => !experimentKeys.has(k));
115
+ const common = [...baselineKeys].filter((k) => experimentKeys.has(k));
116
+ return { added, removed, common };
117
+ }
118
+ // ---------------------------------------------------------------------------
119
+ // Content diff
120
+ // ---------------------------------------------------------------------------
121
+ function computeContentDiffs(baselineDir, experimentDir, baselineManifest, experimentManifest, commonKeys, mode, depth, ephemeral) {
122
+ const diffs = [];
123
+ const baselineByKey = new Map(baselineManifest.artifacts.map((a) => [artifactKey(a), a]));
124
+ const experimentByKey = new Map(experimentManifest.artifacts.map((a) => [artifactKey(a), a]));
125
+ for (const key of commonKeys) {
126
+ const baseEntry = baselineByKey.get(key);
127
+ const expEntry = experimentByKey.get(key);
128
+ const basePath = join(baselineDir, baseEntry.path);
129
+ const expPath = join(experimentDir, expEntry.path);
130
+ if (!existsSync(basePath) || !existsSync(expPath))
131
+ continue;
132
+ const baseContent = readFileSync(basePath, "utf-8");
133
+ const expContent = readFileSync(expPath, "utf-8");
134
+ if (baseEntry.format === "json") {
135
+ try {
136
+ const baseData = JSON.parse(baseContent);
137
+ const expData = JSON.parse(expContent);
138
+ const stripped1 = stripEphemeral(baseData, ephemeral);
139
+ const stripped2 = stripEphemeral(expData, ephemeral);
140
+ if (mode === "strict") {
141
+ if (JSON.stringify(stripped1) !== JSON.stringify(stripped2)) {
142
+ const changes = diffJson(stripped1, stripped2, "", depth);
143
+ if (changes.length > 0) {
144
+ diffs.push({ artifactKey: key, format: "json", changes });
145
+ }
146
+ }
147
+ }
148
+ else {
149
+ // structural — compare keys/types only
150
+ const changes = diffJsonStructural(stripped1, stripped2, "", depth);
151
+ if (changes.length > 0) {
152
+ diffs.push({ artifactKey: key, format: "json", changes });
153
+ }
154
+ }
155
+ }
156
+ catch {
157
+ // Can't parse JSON — fall through to text comparison
158
+ if (baseContent !== expContent) {
159
+ diffs.push({
160
+ artifactKey: key,
161
+ format: "text",
162
+ changes: computeLineDiff(baseContent, expContent),
163
+ });
164
+ }
165
+ }
166
+ }
167
+ else {
168
+ // Text/markdown — line-level diff
169
+ if (baseContent !== expContent) {
170
+ diffs.push({
171
+ artifactKey: key,
172
+ format: baseEntry.format,
173
+ changes: computeLineDiff(baseContent, expContent),
174
+ });
175
+ }
176
+ }
177
+ }
178
+ return diffs;
179
+ }
180
+ function computeLineDiff(a, b) {
181
+ const aLines = a.split("\n");
182
+ const bLines = b.split("\n");
183
+ const aFreq = new Map();
184
+ for (const l of aLines)
185
+ aFreq.set(l, (aFreq.get(l) ?? 0) + 1);
186
+ const bFreq = new Map();
187
+ for (const l of bLines)
188
+ bFreq.set(l, (bFreq.get(l) ?? 0) + 1);
189
+ let addedLines = 0;
190
+ let removedLines = 0;
191
+ for (const [line, count] of bFreq) {
192
+ addedLines += Math.max(0, count - (aFreq.get(line) ?? 0));
193
+ }
194
+ for (const [line, count] of aFreq) {
195
+ removedLines += Math.max(0, count - (bFreq.get(line) ?? 0));
196
+ }
197
+ return { addedLines, removedLines };
198
+ }
199
+ // ---------------------------------------------------------------------------
200
+ // JSON diffing
201
+ // ---------------------------------------------------------------------------
202
+ function stripEphemeral(data, ephemeral) {
203
+ if (typeof data !== "object" || data === null)
204
+ return data;
205
+ if (Array.isArray(data))
206
+ return data.map((item) => stripEphemeral(item, ephemeral));
207
+ const result = {};
208
+ for (const [key, value] of Object.entries(data)) {
209
+ if (ephemeral.has(key))
210
+ continue;
211
+ result[key] = stripEphemeral(value, ephemeral);
212
+ }
213
+ return result;
214
+ }
215
+ /** Strict diff — compares values at each key path. */
216
+ function diffJson(a, b, path, depth) {
217
+ if (depth <= 0)
218
+ return [];
219
+ if (a === b)
220
+ return [];
221
+ const entries = [];
222
+ if (typeof a !== "object" ||
223
+ typeof b !== "object" ||
224
+ a === null ||
225
+ b === null) {
226
+ return [{ path: path || "(root)", baseline: a, experiment: b }];
227
+ }
228
+ if (Array.isArray(a) && Array.isArray(b)) {
229
+ if (JSON.stringify(a) !== JSON.stringify(b)) {
230
+ entries.push({ path: path || "(root)", baseline: a, experiment: b });
231
+ }
232
+ return entries;
233
+ }
234
+ const aObj = a;
235
+ const bObj = b;
236
+ const allKeys = new Set([...Object.keys(aObj), ...Object.keys(bObj)]);
237
+ for (const key of allKeys) {
238
+ const subPath = path ? `${path}.${key}` : key;
239
+ if (!(key in aObj)) {
240
+ entries.push({ path: subPath, experiment: bObj[key] });
241
+ }
242
+ else if (!(key in bObj)) {
243
+ entries.push({ path: subPath, baseline: aObj[key] });
244
+ }
245
+ else {
246
+ entries.push(...diffJson(aObj[key], bObj[key], subPath, depth - 1));
247
+ }
248
+ }
249
+ return entries;
250
+ }
251
+ /** Structural diff — only checks that keys exist and types match. */
252
+ function diffJsonStructural(a, b, path, depth) {
253
+ if (depth <= 0)
254
+ return [];
255
+ const typeA = typeof a;
256
+ const typeB = typeof b;
257
+ if (typeA !== typeB) {
258
+ return [{ path: path || "(root)", baseline: typeA, experiment: typeB }];
259
+ }
260
+ if (typeA !== "object" || a === null || b === null)
261
+ return [];
262
+ if (Array.isArray(a) !== Array.isArray(b)) {
263
+ return [
264
+ {
265
+ path: path || "(root)",
266
+ baseline: Array.isArray(a) ? "array" : "object",
267
+ experiment: Array.isArray(b) ? "array" : "object",
268
+ },
269
+ ];
270
+ }
271
+ if (Array.isArray(a))
272
+ return []; // Arrays: structural match if both are arrays
273
+ const entries = [];
274
+ const aObj = a;
275
+ const bObj = b;
276
+ const allKeys = new Set([...Object.keys(aObj), ...Object.keys(bObj)]);
277
+ for (const key of allKeys) {
278
+ const subPath = path ? `${path}.${key}` : key;
279
+ if (!(key in aObj)) {
280
+ entries.push({ path: subPath, experiment: typeof bObj[key] });
281
+ }
282
+ else if (!(key in bObj)) {
283
+ entries.push({ path: subPath, baseline: typeof aObj[key] });
284
+ }
285
+ else {
286
+ entries.push(...diffJsonStructural(aObj[key], bObj[key], subPath, depth - 1));
287
+ }
288
+ }
289
+ return entries;
290
+ }
291
+ // ---------------------------------------------------------------------------
292
+ // Score comparison
293
+ // ---------------------------------------------------------------------------
294
+ function computeScoreDiff(baselineDir, experimentDir, baselineManifest, experimentManifest, thresholds) {
295
+ const baseScore = findAndReadArtifact(baselineDir, baselineManifest, "calculate-scores", "score-summary");
296
+ const expScore = findAndReadArtifact(experimentDir, experimentManifest, "calculate-scores", "score-summary");
297
+ if (!baseScore || !expScore)
298
+ return undefined;
299
+ const baselineMean = baseScore.aggregate ?? 0;
300
+ const currentMean = expScore.aggregate ?? 0;
301
+ const delta = currentMean - baselineMean;
302
+ // Per-task comparison
303
+ const baseScores = baseScore.scores ?? [];
304
+ const expScores = expScore.scores ?? [];
305
+ const expByTask = new Map(expScores.map((s) => [s.task ?? "", s.score ?? 0]));
306
+ const perTask = [];
307
+ const breaches = [];
308
+ for (const base of baseScores) {
309
+ const task = base.task ?? "";
310
+ const baseVal = base.score ?? 0;
311
+ const expVal = expByTask.get(task);
312
+ if (expVal !== undefined) {
313
+ const taskDelta = expVal - baseVal;
314
+ perTask.push({
315
+ task,
316
+ baseline: baseVal,
317
+ current: expVal,
318
+ delta: taskDelta,
319
+ });
320
+ if (taskDelta < -thresholds.perTask) {
321
+ breaches.push(`${task}: dropped ${Math.abs(taskDelta)} points`);
322
+ }
323
+ }
324
+ }
325
+ if (delta < -thresholds.aggregate) {
326
+ breaches.push(`Aggregate score dropped ${Math.abs(delta).toFixed(1)} points (threshold: ${thresholds.aggregate})`);
327
+ }
328
+ return { baselineMean, currentMean, delta, perTask, breaches };
329
+ }
330
+ // ---------------------------------------------------------------------------
331
+ // Timing comparison
332
+ // ---------------------------------------------------------------------------
333
+ function computeTimingDiff(baselineDir, experimentDir, baselineManifest, experimentManifest, thresholds) {
334
+ const baseCtx = findAndReadArtifact(baselineDir, baselineManifest, "pipeline", "pipeline-context");
335
+ const expCtx = findAndReadArtifact(experimentDir, experimentManifest, "pipeline", "pipeline-context");
336
+ if (!baseCtx || !expCtx)
337
+ return undefined;
338
+ const baseSteps = (baseCtx.steps ?? []).filter((s) => s.durationMs !== undefined);
339
+ const expSteps = (expCtx.steps ?? []).filter((s) => s.durationMs !== undefined);
340
+ const expByName = new Map(expSteps.map((s) => [s.name, s.durationMs]));
341
+ const perStep = [];
342
+ const breaches = [];
343
+ let totalBaseline = 0;
344
+ let totalExperiment = 0;
345
+ for (const base of baseSteps) {
346
+ const expMs = expByName.get(base.name);
347
+ if (expMs !== undefined) {
348
+ const baseMs = base.durationMs;
349
+ const ratio = baseMs > 0 ? expMs / baseMs : expMs > 0 ? Infinity : 1;
350
+ perStep.push({
351
+ step: base.name,
352
+ baselineMs: baseMs,
353
+ currentMs: expMs,
354
+ ratio,
355
+ });
356
+ const stepThreshold = thresholds.perStep?.[base.name] ?? thresholds.multiplier;
357
+ if (ratio > stepThreshold) {
358
+ breaches.push(`${base.name}: ${ratio.toFixed(1)}x slower (threshold: ${stepThreshold}x)`);
359
+ }
360
+ totalBaseline += baseMs;
361
+ totalExperiment += expMs;
362
+ }
363
+ }
364
+ return {
365
+ totalDeltaMs: totalExperiment - totalBaseline,
366
+ perStep,
367
+ breaches,
368
+ };
369
+ }
370
+ // ---------------------------------------------------------------------------
371
+ // Metadata comparison
372
+ // ---------------------------------------------------------------------------
373
+ function computeMetadataDiff(baselineDir, experimentDir, baselineManifest, experimentManifest, ephemeral) {
374
+ const modeMatch = baselineManifest.pipeline.mode === experimentManifest.pipeline.mode;
375
+ const variantMatch = baselineManifest.pipeline.variant === experimentManifest.pipeline.variant;
376
+ // Compare config from pipeline-context
377
+ const baseCtx = findAndReadArtifact(baselineDir, baselineManifest, "pipeline", "pipeline-context");
378
+ const expCtx = findAndReadArtifact(experimentDir, experimentManifest, "pipeline", "pipeline-context");
379
+ let configDiffs = [];
380
+ if (baseCtx && expCtx) {
381
+ const baseConfig = baseCtx.config ?? {};
382
+ const expConfig = expCtx.config ?? {};
383
+ configDiffs = diffJsonStructural(stripEphemeral(baseConfig, ephemeral), stripEphemeral(expConfig, ephemeral), "config", 3);
384
+ }
385
+ return { modeMatch, variantMatch, configDiffs };
386
+ }
387
+ // ---------------------------------------------------------------------------
388
+ // Security scan
389
+ // ---------------------------------------------------------------------------
390
+ /** Patterns that indicate potential secret values (not just key names). */
391
+ const SECRET_VALUE_PATTERNS = [
392
+ /^sk-[a-zA-Z0-9_-]{20,}/, // OpenAI-style keys
393
+ /^sk[A-Z][a-zA-Z0-9]{20,}/, // Sanity-style tokens (e.g., skJ3rMwt…)
394
+ /^xoxb-/, // Slack bot tokens
395
+ /^ghp_/, // GitHub personal tokens
396
+ /^ghs_/, // GitHub server-to-server tokens
397
+ /^Bearer\s+\S{20,}/, // Authorization header values
398
+ ];
399
+ /**
400
+ * Keys that should never appear with non-empty string values in captured artifacts.
401
+ *
402
+ * Uses case-insensitive matching without word boundaries to handle camelCase
403
+ * (e.g., "apiToken", "secretKey"). Intentionally broader than the orchestrator's
404
+ * sanitization pattern (/token|secret|key/i) — this also catches "password",
405
+ * "credential", and "authorization" in artifacts that bypass orchestrator
406
+ * sanitization (e.g., in-memory captures from PublishReportStep or CallbackStep).
407
+ */
408
+ const SECRET_KEY_PATTERN = /(?:api[_-]?token|auth(?:orization|[_-]?token)|access[_-]?token)|secret|apiKey|api_key|password|credential|^set-cookie$|^cookie$/i;
409
+ function scanForSecrets(baselineDir, experimentDir, baselineManifest, experimentManifest) {
410
+ const violations = [];
411
+ for (const [dir, manifest] of [
412
+ [experimentDir, experimentManifest],
413
+ [baselineDir, baselineManifest],
414
+ ]) {
415
+ for (const artifact of manifest.artifacts) {
416
+ const filePath = join(dir, artifact.path);
417
+ if (!existsSync(filePath))
418
+ continue;
419
+ const content = readFileSync(filePath, "utf-8");
420
+ if (artifact.format === "json") {
421
+ try {
422
+ const data = JSON.parse(content);
423
+ scanJsonForSecrets(data, artifact.path, "", violations);
424
+ }
425
+ catch {
426
+ // Non-parseable — skip
427
+ }
428
+ }
429
+ else {
430
+ // Text/markdown: scan for secret-looking strings
431
+ for (const pattern of SECRET_VALUE_PATTERNS) {
432
+ if (pattern.test(content)) {
433
+ violations.push({
434
+ file: artifact.path,
435
+ detail: `Content matches secret pattern: ${pattern.source}`,
436
+ });
437
+ }
438
+ }
439
+ }
440
+ }
441
+ }
442
+ return { leaksFound: violations.length > 0, violations };
443
+ }
444
+ function scanJsonForSecrets(data, file, path, violations, depth = 0) {
445
+ if (depth > 10)
446
+ return;
447
+ if (typeof data === "string") {
448
+ for (const pattern of SECRET_VALUE_PATTERNS) {
449
+ if (pattern.test(data)) {
450
+ violations.push({
451
+ file,
452
+ detail: `Value at "${path}" matches secret pattern: ${pattern.source}`,
453
+ });
454
+ }
455
+ }
456
+ }
457
+ else if (Array.isArray(data)) {
458
+ for (let i = 0; i < data.length; i++) {
459
+ scanJsonForSecrets(data[i], file, `${path}[${i}]`, violations, depth + 1);
460
+ }
461
+ }
462
+ else if (typeof data === "object" && data !== null) {
463
+ for (const [key, value] of Object.entries(data)) {
464
+ // Check if a key name looks like it holds a secret AND has a string value
465
+ if (SECRET_KEY_PATTERN.test(key) &&
466
+ typeof value === "string" &&
467
+ value.length > 0) {
468
+ violations.push({
469
+ file,
470
+ detail: `Key "${path ? path + "." : ""}${key}" may contain a secret value`,
471
+ });
472
+ }
473
+ scanJsonForSecrets(value, file, path ? `${path}.${key}` : key, violations, depth + 1);
474
+ }
475
+ }
476
+ }
477
+ // ---------------------------------------------------------------------------
478
+ // Helpers
479
+ // ---------------------------------------------------------------------------
480
+ function findAndReadArtifact(dir, manifest, step, type) {
481
+ const entry = manifest.artifacts.find((a) => a.step === step && a.type === type);
482
+ if (!entry)
483
+ return undefined;
484
+ const filePath = join(dir, entry.path);
485
+ if (!existsSync(filePath))
486
+ return undefined;
487
+ try {
488
+ return JSON.parse(readFileSync(filePath, "utf-8"));
489
+ }
490
+ catch {
491
+ return undefined;
492
+ }
493
+ }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * FilesystemArtifactCollector — writes captured artifacts to a local directory.
3
+ *
4
+ * Accumulates artifact entries in memory during pipeline execution.
5
+ * On flush(), creates a structured directory with one subdirectory per
6
+ * step, writes all artifacts, and generates a manifest.json.
7
+ *
8
+ * Design principles:
9
+ * - capture() and captureFile() are synchronous (no I/O during step execution)
10
+ * - flush() does all I/O at pipeline end
11
+ * - Failures in capture/captureFile are swallowed (P5: non-blocking)
12
+ */
13
+ import type { ArtifactCollector, CaptureFlushResult } from "../_vendor/ailf-core/index.d.ts";
14
+ export interface FilesystemCollectorOptions {
15
+ /** Base directory for capture output (e.g., results/captures/) */
16
+ captureDir: string;
17
+ /** Pipeline mode (for directory naming) */
18
+ mode: string;
19
+ /** Whether to compress on flush (Phase 5 — currently ignored) */
20
+ compress: boolean;
21
+ /** Whether mode-specific extras are enabled */
22
+ extras: boolean;
23
+ /** Pipeline metadata for the manifest */
24
+ pipeline?: {
25
+ variant?: string;
26
+ source?: string;
27
+ areas?: string[];
28
+ };
29
+ }
30
+ export declare class FilesystemArtifactCollector implements ArtifactCollector {
31
+ readonly enabled = true;
32
+ readonly extrasEnabled: boolean;
33
+ private readonly entries;
34
+ private readonly captureId;
35
+ private readonly outputDir;
36
+ private readonly startedAt;
37
+ private readonly options;
38
+ constructor(options: FilesystemCollectorOptions);
39
+ capture(step: string, type: string, data: unknown, meta?: Record<string, unknown>): void;
40
+ captureFile(step: string, type: string, filePath: string, meta?: Record<string, unknown>): void;
41
+ flush(): Promise<CaptureFlushResult>;
42
+ }