@sanity/ailf 1.0.0 โ†’ 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (499) hide show
  1. package/README.md +0 -1
  2. package/canonical/grader-references/README.md +2 -2
  3. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  4. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  5. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  6. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  7. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  8. package/config/features.ts +1 -1
  9. package/config/models.ts +29 -12
  10. package/config/sources.ts +1 -1
  11. package/config/thresholds.ts +1 -1
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  13. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  17. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  18. package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
  19. package/dist/_vendor/ailf-core/config-helpers.js +51 -2
  20. package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
  21. package/dist/_vendor/ailf-core/examples/index.js +213 -94
  22. package/dist/_vendor/ailf-core/index.d.ts +3 -2
  23. package/dist/_vendor/ailf-core/index.js +2 -1
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  25. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  27. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  28. package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
  29. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  30. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  31. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  32. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  33. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  34. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  35. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  40. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  41. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  42. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  43. package/dist/_vendor/ailf-core/services/index.js +1 -1
  44. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  47. package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
  48. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
  49. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  50. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  51. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  52. package/dist/adapters/api-client/remediation.js +2 -2
  53. package/dist/adapters/config-sources/file-config-adapter.js +7 -1
  54. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  55. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  56. package/dist/adapters/index.d.ts +0 -1
  57. package/dist/adapters/index.js +0 -1
  58. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  59. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  60. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  61. package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
  62. package/dist/adapters/task-sources/index.d.ts +3 -4
  63. package/dist/adapters/task-sources/index.js +3 -4
  64. package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
  65. package/dist/adapters/task-sources/repo-schemas.js +228 -20
  66. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  67. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  68. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  69. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  70. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  71. package/dist/adapters/task-sources/repo-validation.js +126 -5
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
  73. package/dist/adapters/task-sources/task-file-loader.js +21 -7
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/coverage-audit.js +3 -1
  95. package/dist/commands/explain-handler.d.ts +1 -1
  96. package/dist/commands/explain-handler.js +37 -8
  97. package/dist/commands/fetch-docs.js +1 -0
  98. package/dist/commands/generate-configs.d.ts +3 -3
  99. package/dist/commands/generate-configs.js +20 -8
  100. package/dist/commands/init.d.ts +5 -4
  101. package/dist/commands/init.js +190 -25
  102. package/dist/commands/pipeline-action.d.ts +7 -1
  103. package/dist/commands/pipeline-action.js +43 -19
  104. package/dist/commands/pipeline.d.ts +6 -1
  105. package/dist/commands/pipeline.js +7 -2
  106. package/dist/commands/pr-comment.js +1 -0
  107. package/dist/commands/publish.js +1 -0
  108. package/dist/commands/shared/help.js +2 -2
  109. package/dist/commands/update-quality-scores.d.ts +5 -0
  110. package/dist/commands/update-quality-scores.js +20 -0
  111. package/dist/commands/validate-tasks.d.ts +2 -2
  112. package/dist/commands/validate-tasks.js +26 -15
  113. package/dist/composition-root.d.ts +15 -4
  114. package/dist/composition-root.js +100 -55
  115. package/dist/config/features.ts +23 -0
  116. package/dist/config/models.ts +100 -0
  117. package/dist/config/prompts.ts +16 -0
  118. package/dist/config/rubrics.ts +225 -0
  119. package/dist/config/schedules.ts +47 -0
  120. package/dist/config/sinks.ts +37 -0
  121. package/dist/config/sources.ts +21 -0
  122. package/dist/config/thresholds.ts +61 -0
  123. package/dist/index.d.ts +41 -0
  124. package/dist/index.js +48 -0
  125. package/dist/lib/agent-behavior-report.d.ts +8 -0
  126. package/dist/lib/agent-behavior-report.js +185 -0
  127. package/dist/lib/baseline.d.ts +19 -0
  128. package/dist/lib/baseline.js +153 -0
  129. package/dist/lib/calculate-scores.d.ts +23 -0
  130. package/dist/lib/calculate-scores.js +42 -0
  131. package/dist/lib/compare.d.ts +18 -0
  132. package/dist/lib/compare.js +170 -0
  133. package/dist/lib/coverage-audit.d.ts +4 -0
  134. package/dist/lib/coverage-audit.js +42 -0
  135. package/dist/lib/discovery-report.d.ts +13 -0
  136. package/dist/lib/discovery-report.js +57 -0
  137. package/dist/lib/fetch-docs.d.ts +30 -0
  138. package/dist/lib/fetch-docs.js +171 -0
  139. package/dist/lib/generate-configs.d.ts +25 -0
  140. package/dist/lib/generate-configs.js +42 -0
  141. package/dist/lib/grader-api.d.ts +21 -0
  142. package/dist/lib/grader-api.js +34 -0
  143. package/dist/lib/grader-compare.d.ts +19 -0
  144. package/dist/lib/grader-compare.js +91 -0
  145. package/dist/lib/grader-consistency.d.ts +27 -0
  146. package/dist/lib/grader-consistency.js +79 -0
  147. package/dist/lib/grader-sensitivity.d.ts +19 -0
  148. package/dist/lib/grader-sensitivity.js +75 -0
  149. package/dist/lib/grader-validate.d.ts +19 -0
  150. package/dist/lib/grader-validate.js +78 -0
  151. package/dist/lib/measure-retrieval.d.ts +14 -0
  152. package/dist/lib/measure-retrieval.js +71 -0
  153. package/dist/lib/pr-comment.d.ts +16 -0
  154. package/dist/lib/pr-comment.js +28 -0
  155. package/dist/lib/readiness-report.d.ts +13 -0
  156. package/dist/lib/readiness-report.js +108 -0
  157. package/dist/lib/webhook-server.d.ts +11 -0
  158. package/dist/lib/webhook-server.js +24 -0
  159. package/dist/lib/weekly-digest.d.ts +24 -0
  160. package/dist/lib/weekly-digest.js +148 -0
  161. package/dist/orchestration/build-app-context.js +13 -0
  162. package/dist/orchestration/build-step-sequence.js +4 -2
  163. package/dist/orchestration/cache-context.d.ts +23 -0
  164. package/dist/orchestration/cache-context.js +43 -0
  165. package/dist/orchestration/env-bridge.d.ts +21 -0
  166. package/dist/orchestration/env-bridge.js +66 -0
  167. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  168. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  169. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  170. package/dist/orchestration/step-runner.js +5 -1
  171. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  172. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  173. package/dist/orchestration/steps/callback-step.js +10 -1
  174. package/dist/orchestration/steps/compare-step.js +6 -3
  175. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  176. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  177. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  178. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  179. package/dist/orchestration/steps/fetch-docs-step.js +32 -19
  180. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  181. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  182. package/dist/orchestration/steps/generate-configs-step.js +77 -26
  183. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  184. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  185. package/dist/orchestration/steps/publish-report-step.js +19 -0
  186. package/dist/orchestration/steps/readiness-step.js +8 -3
  187. package/dist/orchestration/steps/report-step.js +17 -4
  188. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  189. package/dist/orchestration/steps/run-eval-step.js +51 -31
  190. package/dist/pipeline/agent-behavior-report.js +6 -0
  191. package/dist/pipeline/attribution.d.ts +1 -1
  192. package/dist/pipeline/attribution.js +1 -1
  193. package/dist/pipeline/cache.js +29 -15
  194. package/dist/pipeline/calculate-scores.d.ts +2 -0
  195. package/dist/pipeline/calculate-scores.js +70 -33
  196. package/dist/pipeline/chronic-failures.d.ts +55 -0
  197. package/dist/pipeline/chronic-failures.js +110 -0
  198. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  199. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  200. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  201. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
  202. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  203. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  204. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  205. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  206. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  207. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  208. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  209. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  210. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  211. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  212. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  213. package/dist/pipeline/compiler/config-loader.js +42 -2
  214. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  215. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  216. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  217. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  218. package/dist/pipeline/compiler/index.d.ts +2 -5
  219. package/dist/pipeline/compiler/index.js +2 -5
  220. package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
  221. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  222. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  223. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  224. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  225. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  226. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  227. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  228. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
  229. package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
  230. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  231. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  232. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  233. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  234. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  235. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  236. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  237. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  238. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  239. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  240. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  241. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  242. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  245. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  246. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  247. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  248. package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
  249. package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
  250. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  251. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  252. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  253. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  254. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  255. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  256. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  257. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  258. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  259. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  260. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  261. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  262. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  263. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  264. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  265. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  266. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  267. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  268. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  269. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  270. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  271. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  272. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  273. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  274. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
  275. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  276. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  277. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  278. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  279. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  280. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  281. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  282. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  283. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  284. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
  285. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  286. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  287. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  288. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  289. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
  290. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
  291. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  292. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
  293. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  294. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
  295. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  296. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  297. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
  298. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
  299. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
  300. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  301. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  302. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  303. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  304. package/dist/pipeline/compiler/preset-loader.js +99 -0
  305. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
  306. package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
  307. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  308. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  309. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  310. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  311. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  312. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  313. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  314. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  315. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  316. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  317. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  318. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  319. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  320. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  321. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  322. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  323. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  324. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  325. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  326. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  327. package/dist/pipeline/compiler/task-bridge.js +92 -0
  328. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  329. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  330. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  331. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  332. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  333. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  334. package/dist/pipeline/coverage-audit.d.ts +1 -1
  335. package/dist/pipeline/coverage-audit.js +1 -1
  336. package/dist/pipeline/degradations.d.ts +1 -1
  337. package/dist/pipeline/degradations.js +1 -1
  338. package/dist/pipeline/expand-tasks.d.ts +2 -2
  339. package/dist/pipeline/expand-tasks.js +2 -2
  340. package/dist/pipeline/failure-modes.d.ts +1 -1
  341. package/dist/pipeline/failure-modes.js +13 -1
  342. package/dist/pipeline/gap-analysis.d.ts +1 -1
  343. package/dist/pipeline/gap-analysis.js +3 -1
  344. package/dist/pipeline/generate-configs.d.ts +2 -2
  345. package/dist/pipeline/generate-configs.js +16 -9
  346. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  347. package/dist/pipeline/grader-compare-runner.js +7 -1
  348. package/dist/pipeline/grader-comparison.d.ts +1 -1
  349. package/dist/pipeline/grader-comparison.js +1 -1
  350. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  351. package/dist/pipeline/grader-consistency-runner.js +7 -1
  352. package/dist/pipeline/grader-consistency.d.ts +1 -1
  353. package/dist/pipeline/grader-consistency.js +1 -1
  354. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  355. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  356. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  357. package/dist/pipeline/grader-sensitivity.js +1 -1
  358. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  359. package/dist/pipeline/grader-validate-runner.js +2 -2
  360. package/dist/pipeline/grader-validation.d.ts +1 -1
  361. package/dist/pipeline/grader-validation.js +1 -1
  362. package/dist/pipeline/map-request-to-config.js +16 -2
  363. package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
  364. package/dist/pipeline/mirror-repo-tasks.js +10 -10
  365. package/dist/pipeline/plan-format.d.ts +1 -1
  366. package/dist/pipeline/plan-format.js +1 -1
  367. package/dist/pipeline/plan.d.ts +1 -1
  368. package/dist/pipeline/plan.js +68 -30
  369. package/dist/pipeline/probe.d.ts +1 -1
  370. package/dist/pipeline/probe.js +1 -1
  371. package/dist/pipeline/readiness-report.d.ts +2 -2
  372. package/dist/pipeline/readiness-report.js +2 -2
  373. package/dist/pipeline/release-classification.d.ts +1 -1
  374. package/dist/pipeline/release-classification.js +1 -1
  375. package/dist/pipeline/release-report.d.ts +1 -1
  376. package/dist/pipeline/release-report.js +1 -1
  377. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  378. package/dist/pipeline/repo-eval-comment.js +1 -1
  379. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  380. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  381. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  382. package/dist/pipeline/resolve-mappings.js +44 -44
  383. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  384. package/dist/pipeline/retrieval-metrics.js +28 -20
  385. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  386. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  387. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  388. package/dist/pipeline/steps/compare-step.js +90 -0
  389. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  390. package/dist/pipeline/steps/eval-step.js +347 -0
  391. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  392. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  393. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  394. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  395. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  396. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  397. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  398. package/dist/pipeline/steps/publish-report-step.js +243 -0
  399. package/dist/pipeline/steps/report-step.d.ts +13 -0
  400. package/dist/pipeline/steps/report-step.js +56 -0
  401. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  402. package/dist/pipeline/steps/update-scores-step.js +42 -0
  403. package/dist/pipeline/targeted-loo.d.ts +1 -1
  404. package/dist/pipeline/targeted-loo.js +1 -1
  405. package/dist/pipeline/thresholds.d.ts +1 -1
  406. package/dist/pipeline/thresholds.js +1 -1
  407. package/dist/pipeline/validate.js +13 -0
  408. package/dist/report-store.d.ts +17 -0
  409. package/dist/report-store.js +24 -0
  410. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  411. package/dist/scripts/agent-behavior-report.js +315 -0
  412. package/dist/scripts/baseline.d.ts +43 -0
  413. package/dist/scripts/baseline.js +267 -0
  414. package/dist/scripts/calculate-scores.d.ts +166 -0
  415. package/dist/scripts/calculate-scores.js +1296 -0
  416. package/dist/scripts/compare.d.ts +22 -0
  417. package/dist/scripts/compare.js +334 -0
  418. package/dist/scripts/coverage-audit.d.ts +44 -0
  419. package/dist/scripts/coverage-audit.js +209 -0
  420. package/dist/scripts/debug-eval.d.ts +19 -0
  421. package/dist/scripts/debug-eval.js +73 -0
  422. package/dist/scripts/discovery-report.d.ts +58 -0
  423. package/dist/scripts/discovery-report.js +250 -0
  424. package/dist/scripts/fetch-docs.d.ts +35 -0
  425. package/dist/scripts/fetch-docs.js +472 -0
  426. package/dist/scripts/generate-configs.d.ts +66 -0
  427. package/dist/scripts/generate-configs.js +459 -0
  428. package/dist/scripts/grader-api.d.ts +27 -0
  429. package/dist/scripts/grader-api.js +206 -0
  430. package/dist/scripts/grader-compare.d.ts +22 -0
  431. package/dist/scripts/grader-compare.js +368 -0
  432. package/dist/scripts/grader-consistency.d.ts +20 -0
  433. package/dist/scripts/grader-consistency.js +313 -0
  434. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  435. package/dist/scripts/grader-sensitivity.js +354 -0
  436. package/dist/scripts/grader-validate.d.ts +19 -0
  437. package/dist/scripts/grader-validate.js +267 -0
  438. package/dist/scripts/measure-retrieval.d.ts +10 -0
  439. package/dist/scripts/measure-retrieval.js +145 -0
  440. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  441. package/dist/scripts/migrate-task-mode.js +1 -1
  442. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  443. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  444. package/dist/scripts/pipeline.d.ts +76 -0
  445. package/dist/scripts/pipeline.js +1031 -0
  446. package/dist/scripts/pr-comment.d.ts +10 -0
  447. package/dist/scripts/pr-comment.js +510 -0
  448. package/dist/scripts/readiness-report.d.ts +88 -0
  449. package/dist/scripts/readiness-report.js +342 -0
  450. package/dist/scripts/update-quality-scores.d.ts +15 -0
  451. package/dist/scripts/update-quality-scores.js +184 -0
  452. package/dist/scripts/validate-task-sources.d.ts +1 -1
  453. package/dist/scripts/validate-task-sources.js +1 -1
  454. package/dist/scripts/validate.d.ts +13 -0
  455. package/dist/scripts/validate.js +79 -0
  456. package/dist/scripts/webhook-server.d.ts +26 -0
  457. package/dist/scripts/webhook-server.js +147 -0
  458. package/dist/scripts/weekly-digest.d.ts +24 -0
  459. package/dist/scripts/weekly-digest.js +144 -0
  460. package/dist/sinks/format-slack.d.ts +64 -0
  461. package/dist/sinks/format-slack.js +306 -0
  462. package/dist/sinks/slack-sink.d.ts +27 -0
  463. package/dist/sinks/slack-sink.js +78 -0
  464. package/dist/sinks/types.d.ts +1 -1
  465. package/dist/sinks/types.js +1 -1
  466. package/dist/sinks/webhook-sink.d.ts +19 -0
  467. package/dist/sinks/webhook-sink.js +50 -0
  468. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  469. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  470. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  471. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  472. package/dist/tasks/literacy/functions.task.ts +70 -0
  473. package/dist/tasks/literacy/groq.task.ts +259 -0
  474. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  475. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  476. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  477. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  478. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  479. package/package.json +32 -24
  480. package/tasks/.expanded.agentic.yaml +280 -0
  481. package/tasks/.expanded.yaml +565 -0
  482. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  483. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  484. package/tasks/literacy/content-lake.task.ts +181 -0
  485. package/tasks/literacy/frameworks.task.ts +1 -0
  486. package/tasks/literacy/functions.task.ts +1 -0
  487. package/tasks/literacy/groq.task.ts +1 -0
  488. package/tasks/literacy/image-handling.task.ts +95 -0
  489. package/tasks/literacy/nextjs-live.task.ts +2 -1
  490. package/tasks/literacy/portable-text.task.ts +169 -0
  491. package/tasks/literacy/studio-setup.task.ts +5 -2
  492. package/tasks/literacy/visual-editing.task.ts +1 -0
  493. package/LICENSE +0 -21
  494. package/tasks/frameworks.yaml +0 -98
  495. package/tasks/functions.yaml +0 -51
  496. package/tasks/groq.yaml +0 -216
  497. package/tasks/nextjs-live.yaml +0 -62
  498. package/tasks/studio-setup.yaml +0 -111
  499. package/tasks/visual-editing.yaml +0 -120
@@ -0,0 +1,147 @@
1
+ /**
2
+ * webhook-server.ts
3
+ *
4
+ * Local development server for testing the webhook handler.
5
+ *
6
+ * Starts an HTTP server that receives Sanity webhook payloads, processes
7
+ * them through the WebhookHandler, and logs results. Useful for local
8
+ * development and testing the full event-driven trigger flow.
9
+ *
10
+ * Usage:
11
+ * pnpm webhook-server # start on port 3333
12
+ * WEBHOOK_PORT=8080 pnpm webhook-server # custom port
13
+ *
14
+ * Test with curl:
15
+ * curl -X POST http://localhost:3333/webhook \
16
+ * -H "Content-Type: application/json" \
17
+ * -d '{"operation":"update","result":{"_id":"abc","_type":"article","slug":{"current":"groq-introduction"}}}'
18
+ *
19
+ * Endpoints:
20
+ * POST /webhook โ€” handle a Sanity webhook payload
21
+ * GET /health โ€” handler diagnostics (budget, pending, tracked slugs)
22
+ * GET /mappings โ€” list all tracked document slugs and their areas
23
+ *
24
+ * @see docs/design-docs/report-store/visibility-workflows.md
25
+ */
26
+ import { createServer } from "http";
27
+ import { dirname, resolve } from "path";
28
+ import { fileURLToPath } from "url";
29
+ import { allTrackedSlugs, buildReverseMapping, } from "../pipeline/reverse-mapping.js";
30
+ import { WebhookHandler } from "../webhook/handler.js";
31
+ const __dirname = dirname(fileURLToPath(import.meta.url));
32
+ const ROOT = resolve(__dirname, "..", "..");
33
+ // ---------------------------------------------------------------------------
34
+ // Configuration
35
+ // ---------------------------------------------------------------------------
36
+ const PORT = parseInt(process.env.WEBHOOK_PORT ?? "3333", 10);
37
+ const GITHUB_TOKEN = process.env.GITHUB_TOKEN ?? "";
38
+ const DRY_RUN = !GITHUB_TOKEN;
39
+ // ---------------------------------------------------------------------------
40
+ // Handler
41
+ // ---------------------------------------------------------------------------
42
+ const handler = new WebhookHandler({
43
+ dailyBudget: parseInt(process.env.WEBHOOK_DAILY_BUDGET ?? "20", 10),
44
+ debounceMs: parseInt(process.env.WEBHOOK_DEBOUNCE_MS ?? "10000", 10), // 10s for local dev
45
+ githubToken: GITHUB_TOKEN,
46
+ rootDir: ROOT,
47
+ });
48
+ // ---------------------------------------------------------------------------
49
+ // HTTP Server
50
+ // ---------------------------------------------------------------------------
51
+ const server = createServer((req, res) => {
52
+ void handleRequest(req, res);
53
+ });
54
+ async function handleRequest(req, res) {
55
+ const url = new URL(req.url ?? "/", `http://localhost:${PORT}`);
56
+ // CORS headers for local dev
57
+ res.setHeader("Access-Control-Allow-Origin", "*");
58
+ res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
59
+ res.setHeader("Access-Control-Allow-Headers", "Content-Type");
60
+ if (req.method === "OPTIONS") {
61
+ res.writeHead(200);
62
+ res.end();
63
+ return;
64
+ }
65
+ // Health check
66
+ if (url.pathname === "/health" && req.method === "GET") {
67
+ const diagnostics = handler.diagnostics();
68
+ res.writeHead(200, { "Content-Type": "application/json" });
69
+ res.end(JSON.stringify({ dryRun: DRY_RUN, ...diagnostics }, null, 2));
70
+ return;
71
+ }
72
+ // Mappings
73
+ if (url.pathname === "/mappings" && req.method === "GET") {
74
+ const reverseMapping = buildReverseMapping(ROOT);
75
+ const slugs = allTrackedSlugs(reverseMapping);
76
+ const mappings = Object.fromEntries(slugs.map((slug) => [slug, reverseMapping.get(slug)]));
77
+ res.writeHead(200, { "Content-Type": "application/json" });
78
+ res.end(JSON.stringify({ mappings, slugCount: slugs.length }, null, 2));
79
+ return;
80
+ }
81
+ // Webhook handler
82
+ if (url.pathname === "/webhook" && req.method === "POST") {
83
+ const body = await readBody(req);
84
+ let payload;
85
+ try {
86
+ payload = JSON.parse(body);
87
+ }
88
+ catch {
89
+ res.writeHead(400, { "Content-Type": "application/json" });
90
+ res.end(JSON.stringify({ error: "Invalid JSON" }));
91
+ return;
92
+ }
93
+ if (DRY_RUN) {
94
+ console.log("\n ๐Ÿ”ถ DRY RUN โ€” would dispatch (no GITHUB_TOKEN set)");
95
+ }
96
+ const result = handler.handle(payload);
97
+ console.log(` โ†’ ${result.status}:`, JSON.stringify(result));
98
+ res.writeHead(200, { "Content-Type": "application/json" });
99
+ res.end(JSON.stringify(result, null, 2));
100
+ return;
101
+ }
102
+ // 404
103
+ res.writeHead(404, { "Content-Type": "application/json" });
104
+ res.end(JSON.stringify({
105
+ endpoints: ["POST /webhook", "GET /health", "GET /mappings"],
106
+ error: "Not found",
107
+ }));
108
+ }
109
+ // ---------------------------------------------------------------------------
110
+ // Start
111
+ // ---------------------------------------------------------------------------
112
+ server.listen(PORT, () => {
113
+ const reverseMapping = buildReverseMapping(ROOT);
114
+ const slugCount = allTrackedSlugs(reverseMapping).length;
115
+ console.log();
116
+ console.log("=== AILF Webhook Server ===");
117
+ console.log();
118
+ console.log(` Port: ${PORT}`);
119
+ console.log(` Mode: ${DRY_RUN ? "DRY RUN (set GITHUB_TOKEN to dispatch)" : "LIVE"}`);
120
+ console.log(` Tracked slugs: ${slugCount}`);
121
+ console.log(` Debounce: ${handler.diagnostics().pendingSlugs}`);
122
+ console.log();
123
+ console.log(" Endpoints:");
124
+ console.log(` POST http://localhost:${PORT}/webhook โ€” handle webhook`);
125
+ console.log(` GET http://localhost:${PORT}/health โ€” diagnostics`);
126
+ console.log(` GET http://localhost:${PORT}/mappings โ€” slug โ†’ area map`);
127
+ console.log();
128
+ });
129
+ // Graceful shutdown
130
+ process.on("SIGINT", () => {
131
+ console.log("\n Shutting down โ€” flushing debounce window...");
132
+ void handler.shutdown().then(() => {
133
+ server.close();
134
+ process.exit(0);
135
+ });
136
+ });
137
+ // ---------------------------------------------------------------------------
138
+ // Helpers
139
+ // ---------------------------------------------------------------------------
140
+ function readBody(req) {
141
+ return new Promise((resolve, reject) => {
142
+ const chunks = [];
143
+ req.on("data", (chunk) => chunks.push(chunk));
144
+ req.on("end", () => resolve(Buffer.concat(chunks).toString()));
145
+ req.on("error", reject);
146
+ });
147
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * weekly-digest.ts
3
+ *
4
+ * CLI script to generate and deliver a weekly evaluation digest.
5
+ *
6
+ * Queries the Sanity Content Lake for all reports within the configured
7
+ * lookback window, computes trend analysis, and delivers the digest
8
+ * via configured channels (Slack, stdout, or both).
9
+ *
10
+ * Usage:
11
+ * pnpm weekly-digest # send to configured Slack webhook
12
+ * pnpm weekly-digest --dry-run # print to stdout only
13
+ * pnpm weekly-digest --lookback 14 # 14-day lookback window
14
+ * pnpm weekly-digest --json # output raw JSON
15
+ *
16
+ * Environment variables:
17
+ * SLACK_WEBHOOK_URL โ€” Slack incoming webhook URL
18
+ * SANITY_API_TOKEN โ€” Sanity read token
19
+ * AILF_TRIGGER_TYPE โ€” set to "scheduled" by the cron workflow
20
+ * AILF_SCHEDULE โ€” the schedule name (e.g., "weekly-digest")
21
+ *
22
+ * @see docs/design-docs/report-store/implementation.md โ€” Phase 5
23
+ */
24
+ export {};
@@ -0,0 +1,144 @@
1
+ /**
2
+ * weekly-digest.ts
3
+ *
4
+ * CLI script to generate and deliver a weekly evaluation digest.
5
+ *
6
+ * Queries the Sanity Content Lake for all reports within the configured
7
+ * lookback window, computes trend analysis, and delivers the digest
8
+ * via configured channels (Slack, stdout, or both).
9
+ *
10
+ * Usage:
11
+ * pnpm weekly-digest # send to configured Slack webhook
12
+ * pnpm weekly-digest --dry-run # print to stdout only
13
+ * pnpm weekly-digest --lookback 14 # 14-day lookback window
14
+ * pnpm weekly-digest --json # output raw JSON
15
+ *
16
+ * Environment variables:
17
+ * SLACK_WEBHOOK_URL โ€” Slack incoming webhook URL
18
+ * SANITY_API_TOKEN โ€” Sanity read token
19
+ * AILF_TRIGGER_TYPE โ€” set to "scheduled" by the cron workflow
20
+ * AILF_SCHEDULE โ€” the schedule name (e.g., "weekly-digest")
21
+ *
22
+ * @see docs/design-docs/report-store/implementation.md โ€” Phase 5
23
+ */
24
+ import { config as dotenvConfig } from "dotenv";
25
+ import { existsSync } from "fs";
26
+ import { dirname, resolve } from "path";
27
+ import { fileURLToPath } from "url";
28
+ import { generateDigest } from "../schedules/digest.js";
29
+ import { getDigestConfig } from "../schedules/loader.js";
30
+ import { formatWeeklyDigest } from "../sinks/slack/format.js";
31
+ // Load root .env (same override behavior as pipeline.ts)
32
+ const __dirname = dirname(fileURLToPath(import.meta.url));
33
+ const envPath = resolve(__dirname, "..", "..", "..", "..", ".env");
34
+ if (existsSync(envPath)) {
35
+ dotenvConfig({ override: true, path: envPath });
36
+ }
37
+ // ---------------------------------------------------------------------------
38
+ // CLI argument parsing
39
+ // ---------------------------------------------------------------------------
40
+ const args = process.argv.slice(2);
41
+ function getOption(name) {
42
+ const idx = args.indexOf(`--${name}`);
43
+ return idx >= 0 && idx + 1 < args.length ? args[idx + 1] : undefined;
44
+ }
45
+ function hasFlag(name) {
46
+ return args.includes(`--${name}`);
47
+ }
48
+ const DRY_RUN = hasFlag("dry-run");
49
+ const JSON_OUTPUT = hasFlag("json");
50
+ const lookbackOverride = getOption("lookback");
51
+ // ---------------------------------------------------------------------------
52
+ // Main
53
+ // ---------------------------------------------------------------------------
54
+ async function main() {
55
+ console.log();
56
+ console.log("=== AI Literacy Weekly Digest ===");
57
+ console.log();
58
+ // Load digest config
59
+ const digestConfig = getDigestConfig();
60
+ const lookbackDays = lookbackOverride
61
+ ? parseInt(lookbackOverride, 10)
62
+ : (digestConfig?.lookbackDays ?? 7);
63
+ console.log(` Lookback: ${lookbackDays} days`);
64
+ console.log(` Mode: ${DRY_RUN ? "dry run (stdout only)" : "live"}`);
65
+ console.log();
66
+ // Generate digest โ€” uses AILF_REPORT_* env vars for report store access,
67
+ // independent of SANITY_DATASET/SANITY_PROJECT_ID (which control doc evaluation)
68
+ const digest = await generateDigest({
69
+ dataset: process.env.AILF_REPORT_DATASET,
70
+ lookbackDays,
71
+ projectId: process.env.AILF_REPORT_PROJECT_ID,
72
+ token: process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN,
73
+ });
74
+ if (!digest) {
75
+ console.log(" No reports found in the lookback window. Nothing to send.");
76
+ process.exit(0);
77
+ }
78
+ // Output
79
+ console.log(` Reports found: ${digest.reportCount}`);
80
+ console.log(` Overall: ${Math.round(digest.overallLatest)} (${digest.overallTrend})`);
81
+ console.log(` Improved: ${digest.improved.join(", ") || "none"}`);
82
+ console.log(` Regressed: ${digest.regressed.join(", ") || "none"}`);
83
+ console.log(` Stable: ${digest.stable.join(", ") || "none"}`);
84
+ console.log();
85
+ if (JSON_OUTPUT) {
86
+ console.log(JSON.stringify(digest, null, 2));
87
+ return;
88
+ }
89
+ // Format for Slack
90
+ const message = formatWeeklyDigest(digest);
91
+ if (DRY_RUN) {
92
+ console.log(" --- Slack Message Preview ---");
93
+ console.log(` Text: ${message.text}`);
94
+ console.log();
95
+ for (const block of message.blocks) {
96
+ if (block.text) {
97
+ console.log(` [${block.type}] ${block.text.text}`);
98
+ }
99
+ if (block.fields) {
100
+ for (const field of block.fields) {
101
+ console.log(` [field] ${field.text}`);
102
+ }
103
+ }
104
+ if (block.elements) {
105
+ for (const el of block.elements) {
106
+ console.log(` [element] ${el.text}`);
107
+ }
108
+ }
109
+ }
110
+ console.log();
111
+ return;
112
+ }
113
+ // Deliver via Slack
114
+ const webhookUrl = digestConfig?.slackWebhookUrl ?? process.env.SLACK_WEBHOOK_URL;
115
+ if (!webhookUrl) {
116
+ console.warn(" โš ๏ธ No Slack webhook URL configured. Set SLACK_WEBHOOK_URL or configure in schedules.yaml");
117
+ console.log(" Printing digest to stdout instead:");
118
+ console.log();
119
+ console.log(` ${message.text}`);
120
+ return;
121
+ }
122
+ console.log(" Sending to Slack...");
123
+ try {
124
+ const response = await fetch(webhookUrl, {
125
+ body: JSON.stringify(message),
126
+ headers: { "Content-Type": "application/json" },
127
+ method: "POST",
128
+ });
129
+ if (response.ok) {
130
+ console.log(" โœ… Digest sent successfully");
131
+ }
132
+ else {
133
+ const text = await response.text();
134
+ console.warn(` โš ๏ธ Slack delivery failed: ${response.status} ${text}`);
135
+ }
136
+ }
137
+ catch (error) {
138
+ console.warn(` โš ๏ธ Slack delivery error: ${error instanceof Error ? error.message : String(error)}`);
139
+ }
140
+ }
141
+ main().catch((error) => {
142
+ console.error("Fatal error:", error);
143
+ process.exit(1);
144
+ });
@@ -0,0 +1,64 @@
1
+ /**
2
+ * sinks/format-slack.ts
3
+ *
4
+ * Formats evaluation report data into Slack Block Kit structures for the
5
+ * SlackSink. Provides two message formats:
6
+ *
7
+ * - `formatRegressionAlert` โ€” detailed regression notification with
8
+ * per-area dimension breakdowns
9
+ * - `formatScoreSummary` โ€” compact score overview for general reporting
10
+ *
11
+ * @see docs/design-docs/report-store/sink-architecture.md
12
+ */
13
+ import type { Report } from "../pipeline/types.js";
14
+ import type { DigestSummary } from "../schedules/digest.js";
15
+ export interface SlackMessage {
16
+ blocks: SlackBlock[];
17
+ text: string;
18
+ }
19
+ interface SlackBlock {
20
+ elements?: Array<{
21
+ text: string;
22
+ type: "mrkdwn" | "plain_text";
23
+ }>;
24
+ fields?: Array<{
25
+ text: string;
26
+ type: "mrkdwn" | "plain_text";
27
+ }>;
28
+ text?: {
29
+ text: string;
30
+ type: "mrkdwn" | "plain_text";
31
+ };
32
+ type: "context" | "divider" | "header" | "section";
33
+ }
34
+ /**
35
+ * Format a regression alert for areas that have regressed.
36
+ *
37
+ * Produces a rich Slack message with:
38
+ * - Header with overall score change
39
+ * - Context metadata (mode, source, timestamp, promptfoo link)
40
+ * - Per-area regression details with dimension breakdowns
41
+ * - Brief mentions of improved and unchanged areas
42
+ */
43
+ export declare function formatRegressionAlert(report: Report): SlackMessage;
44
+ /**
45
+ * Format a general score summary for Slack reporting.
46
+ *
47
+ * Produces a compact overview with:
48
+ * - Overall score with grade emoji
49
+ * - Per-area score table
50
+ * - Cost summary (if available)
51
+ * - Promptfoo link (if available)
52
+ */
53
+ export declare function formatScoreSummary(report: Report): SlackMessage;
54
+ /**
55
+ * Format a weekly digest summary for Slack.
56
+ *
57
+ * Produces a summary message covering score trends over a time window:
58
+ * - Header with overall trend direction and score
59
+ * - Per-area trend table with arrows
60
+ * - Lists of improved, regressed, and stable areas
61
+ * - Report count and time window metadata
62
+ */
63
+ export declare function formatWeeklyDigest(digest: DigestSummary): SlackMessage;
64
+ export {};
@@ -0,0 +1,306 @@
1
+ /**
2
+ * sinks/format-slack.ts
3
+ *
4
+ * Formats evaluation report data into Slack Block Kit structures for the
5
+ * SlackSink. Provides two message formats:
6
+ *
7
+ * - `formatRegressionAlert` โ€” detailed regression notification with
8
+ * per-area dimension breakdowns
9
+ * - `formatScoreSummary` โ€” compact score overview for general reporting
10
+ *
11
+ * @see docs/design-docs/report-store/sink-architecture.md
12
+ */
13
+ // ---------------------------------------------------------------------------
14
+ // Helpers
15
+ // ---------------------------------------------------------------------------
16
+ /**
17
+ * Format a regression alert for areas that have regressed.
18
+ *
19
+ * Produces a rich Slack message with:
20
+ * - Header with overall score change
21
+ * - Context metadata (mode, source, timestamp, promptfoo link)
22
+ * - Per-area regression details with dimension breakdowns
23
+ * - Brief mentions of improved and unchanged areas
24
+ */
25
+ export function formatRegressionAlert(report) {
26
+ const { comparison, provenance, summary } = report;
27
+ if (!comparison) {
28
+ return {
29
+ blocks: [
30
+ {
31
+ text: { text: "โš ๏ธ No comparison data available", type: "mrkdwn" },
32
+ type: "section",
33
+ },
34
+ ],
35
+ text: "No comparison data available",
36
+ };
37
+ }
38
+ const baselineScore = Math.round(comparison.baseline.overall.avgScore);
39
+ const experimentScore = Math.round(comparison.experiment.overall.avgScore);
40
+ const delta = Math.round(comparison.deltas.overall);
41
+ const blocks = [];
42
+ // Header โ€” emoji + title + overall score change
43
+ blocks.push({
44
+ text: {
45
+ text: `๐Ÿ“‰ *AI Literacy Score Regression*\n` +
46
+ `Overall: ${baselineScore} โ†’ ${experimentScore} (${formatDelta(delta)})`,
47
+ type: "mrkdwn",
48
+ },
49
+ type: "section",
50
+ });
51
+ // Context โ€” mode, source, timestamp, promptfoo link
52
+ const contextElements = [
53
+ { text: `*Mode:* ${provenance.mode}`, type: "mrkdwn" },
54
+ { text: `*Source:* ${provenance.source.name}`, type: "mrkdwn" },
55
+ { text: `*Date:* ${readableDate(summary.timestamp)}`, type: "mrkdwn" },
56
+ ];
57
+ if (provenance.promptfooUrl) {
58
+ contextElements.push({
59
+ text: `<${provenance.promptfooUrl}|View in Promptfoo>`,
60
+ type: "mrkdwn",
61
+ });
62
+ }
63
+ blocks.push({ elements: contextElements, type: "context" });
64
+ // Divider
65
+ blocks.push({ type: "divider" });
66
+ // Regressed areas โ€” detailed fields with dimension breakdowns
67
+ const regressedAreas = comparison.areas.filter((a) => a.change === "regressed");
68
+ if (regressedAreas.length > 0) {
69
+ const fields = regressedAreas.map((area) => ({
70
+ text: `*${area.area}:* ${Math.round(area.baseline)} โ†’ ` +
71
+ `${Math.round(area.experiment)} (${formatDelta(Math.round(area.delta))})\n` +
72
+ dimensionBreakdown(area.dimensions),
73
+ type: "mrkdwn",
74
+ }));
75
+ blocks.push({
76
+ fields,
77
+ text: {
78
+ text: `*Regressed Areas (${regressedAreas.length})*`,
79
+ type: "mrkdwn",
80
+ },
81
+ type: "section",
82
+ });
83
+ }
84
+ // Improved areas โ€” compact mention
85
+ if (comparison.improved.length > 0) {
86
+ blocks.push({
87
+ text: {
88
+ text: `๐Ÿ“ˆ ${comparison.improved.length} area${comparison.improved.length === 1 ? "" : "s"} improved: ${comparison.improved.join(", ")}`,
89
+ type: "mrkdwn",
90
+ },
91
+ type: "section",
92
+ });
93
+ }
94
+ // Unchanged areas โ€” brief mention
95
+ if (comparison.unchanged.length > 0) {
96
+ blocks.push({
97
+ text: {
98
+ text: `โžก๏ธ ${comparison.unchanged.length} area${comparison.unchanged.length === 1 ? "" : "s"} unchanged`,
99
+ type: "mrkdwn",
100
+ },
101
+ type: "section",
102
+ });
103
+ }
104
+ return {
105
+ blocks,
106
+ text: `๐Ÿ“‰ AI Literacy Score Regression: ${baselineScore} โ†’ ${experimentScore} (${formatDelta(delta)})`,
107
+ };
108
+ }
109
+ /**
110
+ * Format a general score summary for Slack reporting.
111
+ *
112
+ * Produces a compact overview with:
113
+ * - Overall score with grade emoji
114
+ * - Per-area score table
115
+ * - Cost summary (if available)
116
+ * - Promptfoo link (if available)
117
+ */
118
+ export function formatScoreSummary(report) {
119
+ const { provenance, summary } = report;
120
+ const overall = Math.round(summary.overall.avgScore);
121
+ const blocks = [];
122
+ // Header โ€” overall score with emoji
123
+ blocks.push({
124
+ text: {
125
+ text: `${gradeEmoji(overall)} *AI Literacy Score: ${overall}*`,
126
+ type: "mrkdwn",
127
+ },
128
+ type: "section",
129
+ });
130
+ // Context โ€” mode, source
131
+ const contextElements = [
132
+ { text: `*Mode:* ${provenance.mode}`, type: "mrkdwn" },
133
+ { text: `*Source:* ${provenance.source.name}`, type: "mrkdwn" },
134
+ { text: `*Date:* ${readableDate(summary.timestamp)}`, type: "mrkdwn" },
135
+ ];
136
+ blocks.push({ elements: contextElements, type: "context" });
137
+ // Divider
138
+ blocks.push({ type: "divider" });
139
+ // Per-area score table as markdown
140
+ const rows = summary.scores
141
+ .map((s) => {
142
+ const emoji = gradeEmoji(s.totalScore);
143
+ return `${emoji} *${s.feature}*: ${Math.round(s.totalScore)} _(T:${Math.round(s.taskCompletion)} ยท C:${Math.round(s.codeCorrectness)} ยท D:${Math.round(s.docCoverage)})_`;
144
+ })
145
+ .join("\n");
146
+ blocks.push({
147
+ text: { text: rows, type: "mrkdwn" },
148
+ type: "section",
149
+ });
150
+ // Cost summary
151
+ if (summary.overall.cost) {
152
+ blocks.push({
153
+ text: {
154
+ text: `๐Ÿ’ฐ Total cost: $${summary.overall.cost.total.toFixed(2)} ($${summary.overall.cost.perTest.toFixed(3)}/test)`,
155
+ type: "mrkdwn",
156
+ },
157
+ type: "section",
158
+ });
159
+ }
160
+ // Promptfoo link
161
+ if (provenance.promptfooUrl) {
162
+ blocks.push({
163
+ text: {
164
+ text: `๐Ÿ”— <${provenance.promptfooUrl}|View full results in Promptfoo>`,
165
+ type: "mrkdwn",
166
+ },
167
+ type: "section",
168
+ });
169
+ }
170
+ return {
171
+ blocks,
172
+ text: `${gradeEmoji(overall)} AI Literacy Score: ${overall}`,
173
+ };
174
+ }
175
+ /**
176
+ * Format a weekly digest summary for Slack.
177
+ *
178
+ * Produces a summary message covering score trends over a time window:
179
+ * - Header with overall trend direction and score
180
+ * - Per-area trend table with arrows
181
+ * - Lists of improved, regressed, and stable areas
182
+ * - Report count and time window metadata
183
+ */
184
+ export function formatWeeklyDigest(digest) {
185
+ const trendEmoji = digest.overallTrend === "improving"
186
+ ? "๐Ÿ“ˆ"
187
+ : digest.overallTrend === "regressing"
188
+ ? "๐Ÿ“‰"
189
+ : "โžก๏ธ";
190
+ const blocks = [];
191
+ // Header โ€” overall trend
192
+ blocks.push({
193
+ text: {
194
+ text: `${trendEmoji} *Weekly AI Literacy Digest*\n` +
195
+ `Overall: ${Math.round(digest.overallLatest)} (${formatDelta(Math.round(digest.overallDelta))} this week)`,
196
+ type: "mrkdwn",
197
+ },
198
+ type: "section",
199
+ });
200
+ // Context โ€” time window and report count
201
+ blocks.push({
202
+ elements: [
203
+ {
204
+ text: `*Period:* ${readableDate(digest.lookbackStart)} โ€“ ${readableDate(digest.lookbackEnd)}`,
205
+ type: "mrkdwn",
206
+ },
207
+ {
208
+ text: `*Reports:* ${digest.reportCount}`,
209
+ type: "mrkdwn",
210
+ },
211
+ ],
212
+ type: "context",
213
+ });
214
+ blocks.push({ type: "divider" });
215
+ // Per-area trend table
216
+ if (digest.areaTrends.length > 0) {
217
+ const rows = digest.areaTrends
218
+ .map((t) => {
219
+ const arrow = t.trend === "improving" ? "โ†‘" : t.trend === "regressing" ? "โ†“" : "โ†’";
220
+ const emoji = gradeEmoji(t.lastScore);
221
+ return `${emoji} *${t.area}*: ${Math.round(t.lastScore)} ${arrow} (${formatDelta(Math.round(t.scoreDelta))})`;
222
+ })
223
+ .join("\n");
224
+ blocks.push({
225
+ text: { text: rows, type: "mrkdwn" },
226
+ type: "section",
227
+ });
228
+ }
229
+ // Summary badges
230
+ if (digest.improved.length > 0) {
231
+ blocks.push({
232
+ text: {
233
+ text: `๐Ÿ“ˆ *Improved:* ${digest.improved.join(", ")}`,
234
+ type: "mrkdwn",
235
+ },
236
+ type: "section",
237
+ });
238
+ }
239
+ if (digest.regressed.length > 0) {
240
+ blocks.push({
241
+ text: {
242
+ text: `๐Ÿ“‰ *Regressed:* ${digest.regressed.join(", ")}`,
243
+ type: "mrkdwn",
244
+ },
245
+ type: "section",
246
+ });
247
+ }
248
+ if (digest.stable.length > 0) {
249
+ blocks.push({
250
+ text: {
251
+ text: `โžก๏ธ *Stable:* ${digest.stable.join(", ")}`,
252
+ type: "mrkdwn",
253
+ },
254
+ type: "section",
255
+ });
256
+ }
257
+ // Cost summary if available
258
+ if (digest.totalCost !== undefined) {
259
+ blocks.push({
260
+ text: {
261
+ text: `๐Ÿ’ฐ Total evaluation cost this week: ${digest.totalCost.toFixed(2)}`,
262
+ type: "mrkdwn",
263
+ },
264
+ type: "section",
265
+ });
266
+ }
267
+ return {
268
+ blocks,
269
+ text: `${trendEmoji} Weekly AI Literacy Digest: ${Math.round(digest.overallLatest)} (${formatDelta(Math.round(digest.overallDelta))})`,
270
+ };
271
+ }
272
+ /** Build a dimension breakdown string for an area delta */
273
+ function dimensionBreakdown(dimensions) {
274
+ return [
275
+ `Task: ${formatDelta(dimensions.taskCompletion.delta)}`,
276
+ `Code: ${formatDelta(dimensions.codeCorrectness.delta)}`,
277
+ `Docs: ${formatDelta(dimensions.docCoverage.delta)}`,
278
+ ].join(" ยท ");
279
+ }
280
+ /** Format a numeric delta with explicit sign: "+4", "-2", or "0" */
281
+ function formatDelta(n) {
282
+ if (n > 0)
283
+ return `+${n}`;
284
+ if (n < 0)
285
+ return `${n}`;
286
+ return "0";
287
+ }
288
+ /** Score-tier emoji: โœ… (โ‰ฅ80), ๐ŸŸก (โ‰ฅ70), ๐ŸŸ  (โ‰ฅ50), ๐Ÿ”ด (<50) */
289
+ function gradeEmoji(score) {
290
+ if (score >= 80)
291
+ return "โœ…";
292
+ if (score >= 70)
293
+ return "๐ŸŸก";
294
+ if (score >= 50)
295
+ return "๐ŸŸ ";
296
+ return "๐Ÿ”ด";
297
+ }
298
+ /** Format an ISO timestamp into a readable date string */
299
+ function readableDate(iso) {
300
+ const d = new Date(iso);
301
+ return d.toLocaleDateString("en-US", {
302
+ day: "numeric",
303
+ month: "short",
304
+ year: "numeric",
305
+ });
306
+ }