@sanity/ailf 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (499) hide show
  1. package/README.md +0 -1
  2. package/canonical/grader-references/README.md +2 -2
  3. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  4. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  5. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  6. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  7. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  8. package/config/features.ts +1 -1
  9. package/config/models.ts +29 -12
  10. package/config/sources.ts +1 -1
  11. package/config/thresholds.ts +1 -1
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  13. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  17. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  18. package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
  19. package/dist/_vendor/ailf-core/config-helpers.js +51 -2
  20. package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
  21. package/dist/_vendor/ailf-core/examples/index.js +213 -94
  22. package/dist/_vendor/ailf-core/index.d.ts +3 -2
  23. package/dist/_vendor/ailf-core/index.js +2 -1
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  25. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  27. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  28. package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
  29. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  30. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  31. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  32. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  33. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  34. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  35. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  40. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  41. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  42. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  43. package/dist/_vendor/ailf-core/services/index.js +1 -1
  44. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  47. package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
  48. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
  49. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  50. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  51. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  52. package/dist/adapters/api-client/remediation.js +2 -2
  53. package/dist/adapters/config-sources/file-config-adapter.js +7 -1
  54. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  55. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  56. package/dist/adapters/index.d.ts +0 -1
  57. package/dist/adapters/index.js +0 -1
  58. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  59. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  60. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  61. package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
  62. package/dist/adapters/task-sources/index.d.ts +3 -4
  63. package/dist/adapters/task-sources/index.js +3 -4
  64. package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
  65. package/dist/adapters/task-sources/repo-schemas.js +228 -20
  66. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  67. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  68. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  69. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  70. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  71. package/dist/adapters/task-sources/repo-validation.js +126 -5
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
  73. package/dist/adapters/task-sources/task-file-loader.js +21 -7
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/coverage-audit.js +3 -1
  95. package/dist/commands/explain-handler.d.ts +1 -1
  96. package/dist/commands/explain-handler.js +37 -8
  97. package/dist/commands/fetch-docs.js +1 -0
  98. package/dist/commands/generate-configs.d.ts +3 -3
  99. package/dist/commands/generate-configs.js +20 -8
  100. package/dist/commands/init.d.ts +5 -4
  101. package/dist/commands/init.js +190 -25
  102. package/dist/commands/pipeline-action.d.ts +7 -1
  103. package/dist/commands/pipeline-action.js +43 -19
  104. package/dist/commands/pipeline.d.ts +6 -1
  105. package/dist/commands/pipeline.js +7 -2
  106. package/dist/commands/pr-comment.js +1 -0
  107. package/dist/commands/publish.js +1 -0
  108. package/dist/commands/shared/help.js +2 -2
  109. package/dist/commands/update-quality-scores.d.ts +5 -0
  110. package/dist/commands/update-quality-scores.js +20 -0
  111. package/dist/commands/validate-tasks.d.ts +2 -2
  112. package/dist/commands/validate-tasks.js +26 -15
  113. package/dist/composition-root.d.ts +15 -4
  114. package/dist/composition-root.js +100 -55
  115. package/dist/config/features.ts +23 -0
  116. package/dist/config/models.ts +100 -0
  117. package/dist/config/prompts.ts +16 -0
  118. package/dist/config/rubrics.ts +225 -0
  119. package/dist/config/schedules.ts +47 -0
  120. package/dist/config/sinks.ts +37 -0
  121. package/dist/config/sources.ts +21 -0
  122. package/dist/config/thresholds.ts +61 -0
  123. package/dist/index.d.ts +41 -0
  124. package/dist/index.js +48 -0
  125. package/dist/lib/agent-behavior-report.d.ts +8 -0
  126. package/dist/lib/agent-behavior-report.js +185 -0
  127. package/dist/lib/baseline.d.ts +19 -0
  128. package/dist/lib/baseline.js +153 -0
  129. package/dist/lib/calculate-scores.d.ts +23 -0
  130. package/dist/lib/calculate-scores.js +42 -0
  131. package/dist/lib/compare.d.ts +18 -0
  132. package/dist/lib/compare.js +170 -0
  133. package/dist/lib/coverage-audit.d.ts +4 -0
  134. package/dist/lib/coverage-audit.js +42 -0
  135. package/dist/lib/discovery-report.d.ts +13 -0
  136. package/dist/lib/discovery-report.js +57 -0
  137. package/dist/lib/fetch-docs.d.ts +30 -0
  138. package/dist/lib/fetch-docs.js +171 -0
  139. package/dist/lib/generate-configs.d.ts +25 -0
  140. package/dist/lib/generate-configs.js +42 -0
  141. package/dist/lib/grader-api.d.ts +21 -0
  142. package/dist/lib/grader-api.js +34 -0
  143. package/dist/lib/grader-compare.d.ts +19 -0
  144. package/dist/lib/grader-compare.js +91 -0
  145. package/dist/lib/grader-consistency.d.ts +27 -0
  146. package/dist/lib/grader-consistency.js +79 -0
  147. package/dist/lib/grader-sensitivity.d.ts +19 -0
  148. package/dist/lib/grader-sensitivity.js +75 -0
  149. package/dist/lib/grader-validate.d.ts +19 -0
  150. package/dist/lib/grader-validate.js +78 -0
  151. package/dist/lib/measure-retrieval.d.ts +14 -0
  152. package/dist/lib/measure-retrieval.js +71 -0
  153. package/dist/lib/pr-comment.d.ts +16 -0
  154. package/dist/lib/pr-comment.js +28 -0
  155. package/dist/lib/readiness-report.d.ts +13 -0
  156. package/dist/lib/readiness-report.js +108 -0
  157. package/dist/lib/webhook-server.d.ts +11 -0
  158. package/dist/lib/webhook-server.js +24 -0
  159. package/dist/lib/weekly-digest.d.ts +24 -0
  160. package/dist/lib/weekly-digest.js +148 -0
  161. package/dist/orchestration/build-app-context.js +13 -0
  162. package/dist/orchestration/build-step-sequence.js +4 -2
  163. package/dist/orchestration/cache-context.d.ts +23 -0
  164. package/dist/orchestration/cache-context.js +43 -0
  165. package/dist/orchestration/env-bridge.d.ts +21 -0
  166. package/dist/orchestration/env-bridge.js +66 -0
  167. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  168. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  169. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  170. package/dist/orchestration/step-runner.js +5 -1
  171. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  172. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  173. package/dist/orchestration/steps/callback-step.js +10 -1
  174. package/dist/orchestration/steps/compare-step.js +6 -3
  175. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  176. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  177. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  178. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  179. package/dist/orchestration/steps/fetch-docs-step.js +32 -19
  180. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  181. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  182. package/dist/orchestration/steps/generate-configs-step.js +77 -26
  183. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  184. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  185. package/dist/orchestration/steps/publish-report-step.js +19 -0
  186. package/dist/orchestration/steps/readiness-step.js +8 -3
  187. package/dist/orchestration/steps/report-step.js +17 -4
  188. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  189. package/dist/orchestration/steps/run-eval-step.js +51 -31
  190. package/dist/pipeline/agent-behavior-report.js +6 -0
  191. package/dist/pipeline/attribution.d.ts +1 -1
  192. package/dist/pipeline/attribution.js +1 -1
  193. package/dist/pipeline/cache.js +29 -15
  194. package/dist/pipeline/calculate-scores.d.ts +2 -0
  195. package/dist/pipeline/calculate-scores.js +70 -33
  196. package/dist/pipeline/chronic-failures.d.ts +55 -0
  197. package/dist/pipeline/chronic-failures.js +110 -0
  198. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  199. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  200. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  201. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
  202. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  203. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  204. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  205. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  206. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  207. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  208. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  209. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  210. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  211. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  212. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  213. package/dist/pipeline/compiler/config-loader.js +42 -2
  214. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  215. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  216. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  217. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  218. package/dist/pipeline/compiler/index.d.ts +2 -5
  219. package/dist/pipeline/compiler/index.js +2 -5
  220. package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
  221. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  222. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  223. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  224. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  225. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  226. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  227. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  228. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
  229. package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
  230. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  231. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  232. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  233. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  234. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  235. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  236. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  237. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  238. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  239. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  240. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  241. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  242. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  245. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  246. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  247. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  248. package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
  249. package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
  250. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  251. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  252. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  253. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  254. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  255. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  256. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  257. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  258. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  259. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  260. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  261. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  262. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  263. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  264. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  265. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  266. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  267. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  268. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  269. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  270. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  271. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  272. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  273. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  274. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
  275. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  276. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  277. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  278. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  279. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  280. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  281. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  282. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  283. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  284. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
  285. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  286. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  287. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  288. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  289. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
  290. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
  291. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  292. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
  293. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  294. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
  295. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  296. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  297. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
  298. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
  299. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
  300. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  301. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  302. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  303. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  304. package/dist/pipeline/compiler/preset-loader.js +99 -0
  305. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
  306. package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
  307. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  308. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  309. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  310. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  311. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  312. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  313. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  314. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  315. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  316. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  317. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  318. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  319. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  320. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  321. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  322. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  323. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  324. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  325. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  326. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  327. package/dist/pipeline/compiler/task-bridge.js +92 -0
  328. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  329. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  330. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  331. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  332. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  333. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  334. package/dist/pipeline/coverage-audit.d.ts +1 -1
  335. package/dist/pipeline/coverage-audit.js +1 -1
  336. package/dist/pipeline/degradations.d.ts +1 -1
  337. package/dist/pipeline/degradations.js +1 -1
  338. package/dist/pipeline/expand-tasks.d.ts +2 -2
  339. package/dist/pipeline/expand-tasks.js +2 -2
  340. package/dist/pipeline/failure-modes.d.ts +1 -1
  341. package/dist/pipeline/failure-modes.js +13 -1
  342. package/dist/pipeline/gap-analysis.d.ts +1 -1
  343. package/dist/pipeline/gap-analysis.js +3 -1
  344. package/dist/pipeline/generate-configs.d.ts +2 -2
  345. package/dist/pipeline/generate-configs.js +16 -9
  346. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  347. package/dist/pipeline/grader-compare-runner.js +7 -1
  348. package/dist/pipeline/grader-comparison.d.ts +1 -1
  349. package/dist/pipeline/grader-comparison.js +1 -1
  350. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  351. package/dist/pipeline/grader-consistency-runner.js +7 -1
  352. package/dist/pipeline/grader-consistency.d.ts +1 -1
  353. package/dist/pipeline/grader-consistency.js +1 -1
  354. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  355. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  356. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  357. package/dist/pipeline/grader-sensitivity.js +1 -1
  358. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  359. package/dist/pipeline/grader-validate-runner.js +2 -2
  360. package/dist/pipeline/grader-validation.d.ts +1 -1
  361. package/dist/pipeline/grader-validation.js +1 -1
  362. package/dist/pipeline/map-request-to-config.js +16 -2
  363. package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
  364. package/dist/pipeline/mirror-repo-tasks.js +10 -10
  365. package/dist/pipeline/plan-format.d.ts +1 -1
  366. package/dist/pipeline/plan-format.js +1 -1
  367. package/dist/pipeline/plan.d.ts +1 -1
  368. package/dist/pipeline/plan.js +68 -30
  369. package/dist/pipeline/probe.d.ts +1 -1
  370. package/dist/pipeline/probe.js +1 -1
  371. package/dist/pipeline/readiness-report.d.ts +2 -2
  372. package/dist/pipeline/readiness-report.js +2 -2
  373. package/dist/pipeline/release-classification.d.ts +1 -1
  374. package/dist/pipeline/release-classification.js +1 -1
  375. package/dist/pipeline/release-report.d.ts +1 -1
  376. package/dist/pipeline/release-report.js +1 -1
  377. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  378. package/dist/pipeline/repo-eval-comment.js +1 -1
  379. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  380. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  381. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  382. package/dist/pipeline/resolve-mappings.js +44 -44
  383. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  384. package/dist/pipeline/retrieval-metrics.js +28 -20
  385. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  386. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  387. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  388. package/dist/pipeline/steps/compare-step.js +90 -0
  389. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  390. package/dist/pipeline/steps/eval-step.js +347 -0
  391. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  392. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  393. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  394. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  395. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  396. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  397. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  398. package/dist/pipeline/steps/publish-report-step.js +243 -0
  399. package/dist/pipeline/steps/report-step.d.ts +13 -0
  400. package/dist/pipeline/steps/report-step.js +56 -0
  401. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  402. package/dist/pipeline/steps/update-scores-step.js +42 -0
  403. package/dist/pipeline/targeted-loo.d.ts +1 -1
  404. package/dist/pipeline/targeted-loo.js +1 -1
  405. package/dist/pipeline/thresholds.d.ts +1 -1
  406. package/dist/pipeline/thresholds.js +1 -1
  407. package/dist/pipeline/validate.js +13 -0
  408. package/dist/report-store.d.ts +17 -0
  409. package/dist/report-store.js +24 -0
  410. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  411. package/dist/scripts/agent-behavior-report.js +315 -0
  412. package/dist/scripts/baseline.d.ts +43 -0
  413. package/dist/scripts/baseline.js +267 -0
  414. package/dist/scripts/calculate-scores.d.ts +166 -0
  415. package/dist/scripts/calculate-scores.js +1296 -0
  416. package/dist/scripts/compare.d.ts +22 -0
  417. package/dist/scripts/compare.js +334 -0
  418. package/dist/scripts/coverage-audit.d.ts +44 -0
  419. package/dist/scripts/coverage-audit.js +209 -0
  420. package/dist/scripts/debug-eval.d.ts +19 -0
  421. package/dist/scripts/debug-eval.js +73 -0
  422. package/dist/scripts/discovery-report.d.ts +58 -0
  423. package/dist/scripts/discovery-report.js +250 -0
  424. package/dist/scripts/fetch-docs.d.ts +35 -0
  425. package/dist/scripts/fetch-docs.js +472 -0
  426. package/dist/scripts/generate-configs.d.ts +66 -0
  427. package/dist/scripts/generate-configs.js +459 -0
  428. package/dist/scripts/grader-api.d.ts +27 -0
  429. package/dist/scripts/grader-api.js +206 -0
  430. package/dist/scripts/grader-compare.d.ts +22 -0
  431. package/dist/scripts/grader-compare.js +368 -0
  432. package/dist/scripts/grader-consistency.d.ts +20 -0
  433. package/dist/scripts/grader-consistency.js +313 -0
  434. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  435. package/dist/scripts/grader-sensitivity.js +354 -0
  436. package/dist/scripts/grader-validate.d.ts +19 -0
  437. package/dist/scripts/grader-validate.js +267 -0
  438. package/dist/scripts/measure-retrieval.d.ts +10 -0
  439. package/dist/scripts/measure-retrieval.js +145 -0
  440. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  441. package/dist/scripts/migrate-task-mode.js +1 -1
  442. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  443. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  444. package/dist/scripts/pipeline.d.ts +76 -0
  445. package/dist/scripts/pipeline.js +1031 -0
  446. package/dist/scripts/pr-comment.d.ts +10 -0
  447. package/dist/scripts/pr-comment.js +510 -0
  448. package/dist/scripts/readiness-report.d.ts +88 -0
  449. package/dist/scripts/readiness-report.js +342 -0
  450. package/dist/scripts/update-quality-scores.d.ts +15 -0
  451. package/dist/scripts/update-quality-scores.js +184 -0
  452. package/dist/scripts/validate-task-sources.d.ts +1 -1
  453. package/dist/scripts/validate-task-sources.js +1 -1
  454. package/dist/scripts/validate.d.ts +13 -0
  455. package/dist/scripts/validate.js +79 -0
  456. package/dist/scripts/webhook-server.d.ts +26 -0
  457. package/dist/scripts/webhook-server.js +147 -0
  458. package/dist/scripts/weekly-digest.d.ts +24 -0
  459. package/dist/scripts/weekly-digest.js +144 -0
  460. package/dist/sinks/format-slack.d.ts +64 -0
  461. package/dist/sinks/format-slack.js +306 -0
  462. package/dist/sinks/slack-sink.d.ts +27 -0
  463. package/dist/sinks/slack-sink.js +78 -0
  464. package/dist/sinks/types.d.ts +1 -1
  465. package/dist/sinks/types.js +1 -1
  466. package/dist/sinks/webhook-sink.d.ts +19 -0
  467. package/dist/sinks/webhook-sink.js +50 -0
  468. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  469. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  470. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  471. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  472. package/dist/tasks/literacy/functions.task.ts +70 -0
  473. package/dist/tasks/literacy/groq.task.ts +259 -0
  474. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  475. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  476. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  477. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  478. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  479. package/package.json +32 -24
  480. package/tasks/.expanded.agentic.yaml +280 -0
  481. package/tasks/.expanded.yaml +565 -0
  482. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  483. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  484. package/tasks/literacy/content-lake.task.ts +181 -0
  485. package/tasks/literacy/frameworks.task.ts +1 -0
  486. package/tasks/literacy/functions.task.ts +1 -0
  487. package/tasks/literacy/groq.task.ts +1 -0
  488. package/tasks/literacy/image-handling.task.ts +95 -0
  489. package/tasks/literacy/nextjs-live.task.ts +2 -1
  490. package/tasks/literacy/portable-text.task.ts +169 -0
  491. package/tasks/literacy/studio-setup.task.ts +5 -2
  492. package/tasks/literacy/visual-editing.task.ts +1 -0
  493. package/LICENSE +0 -21
  494. package/tasks/frameworks.yaml +0 -98
  495. package/tasks/functions.yaml +0 -51
  496. package/tasks/groq.yaml +0 -216
  497. package/tasks/nextjs-live.yaml +0 -62
  498. package/tasks/studio-setup.yaml +0 -111
  499. package/tasks/visual-editing.yaml +0 -120
@@ -0,0 +1,342 @@
1
+ /**
2
+ * readiness-report.ts
3
+ *
4
+ * Launch readiness report generator — Phase 5b of the Scenario Matrix
5
+ * implementation. Combines threshold evaluation, ceiling decomposition,
6
+ * and gap analysis into a single actionable readiness checklist for a
7
+ * given feature area.
8
+ *
9
+ * Usage:
10
+ * pnpm readiness-report --area visual-editing
11
+ * pnpm readiness-report --area groq --history
12
+ * pnpm readiness-report --area groq --output readiness.md
13
+ *
14
+ * Exports pure functions for unit testing:
15
+ * - generateReadinessReport() — builds the structured report
16
+ * - formatReadinessMarkdown() — renders the report as markdown
17
+ *
18
+ * @see docs/exec-plans/completed/scenario-matrix-implementation/phase-5-readiness-thresholds.md
19
+ */
20
+ import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
21
+ import { dirname, join, resolve } from "node:path";
22
+ import { fileURLToPath } from "node:url";
23
+ import { load } from "js-yaml";
24
+ import { ThresholdConfigSchema, } from "../pipeline/schemas.js";
25
+ import { evaluateThresholds } from "../pipeline/thresholds.js";
26
+ const __dirname = dirname(fileURLToPath(import.meta.url));
27
+ const ROOT = resolve(__dirname, "..", "..");
28
+ const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
29
+ const GAP_ANALYSIS_PATH = join(ROOT, "results", "latest", "gap-analysis.json");
30
+ const THRESHOLDS_PATH = join(ROOT, "config", "thresholds.yaml");
31
+ const BASELINES_DIR = join(ROOT, "results", "baselines");
32
+ // ---------------------------------------------------------------------------
33
+ // Pure functions (exported for testing)
34
+ // ---------------------------------------------------------------------------
35
+ /**
36
+ * Format a readiness report as markdown.
37
+ *
38
+ * Pure function — takes a structured report and returns a markdown string.
39
+ */
40
+ export function formatReadinessMarkdown(report) {
41
+ const lines = [];
42
+ const areaLabel = formatAreaLabel(report.area);
43
+ // Header
44
+ const statusEmoji = report.pass ? "✅" : "❌";
45
+ const statusLabel = report.pass ? "READY" : "NOT READY";
46
+ lines.push(`## 🚀 Launch Readiness: ${areaLabel}`);
47
+ lines.push("");
48
+ lines.push(`**Overall:** ${statusEmoji} ${statusLabel} (${fmt(report.score)}/100, threshold: ${report.threshold})`);
49
+ lines.push("");
50
+ // Dimension Checklist
51
+ lines.push("### Dimension Checklist");
52
+ lines.push("");
53
+ lines.push("| Dimension | Score | Threshold | Status |");
54
+ lines.push("|---|---|---|---|");
55
+ for (const dim of report.dimensions) {
56
+ const status = dim.pass ? "✅ Meets threshold" : "❌ Below threshold";
57
+ lines.push(`| ${dim.dimension} | ${fmt(dim.score)} | ${fmt(dim.threshold)} | ${status} |`);
58
+ }
59
+ lines.push("");
60
+ // Ceiling Analysis
61
+ lines.push("### Ceiling Analysis");
62
+ lines.push("");
63
+ lines.push("| Metric | Value | Assessment |");
64
+ lines.push("|---|---|---|");
65
+ const ceilingAssessment = report.ceiling.ceilingScore >= 60
66
+ ? "✅ Docs enable reasonable performance"
67
+ : "⚠️ Below 60 — docs need improvement";
68
+ lines.push(`| Ceiling Score | ${fmt(report.ceiling.ceilingScore)} | ${ceilingAssessment} |`);
69
+ const floorAssessment = report.ceiling.floorScore >= 30
70
+ ? "Model has moderate baseline knowledge"
71
+ : "Model has limited baseline knowledge";
72
+ lines.push(`| Floor Score | ${fmt(report.ceiling.floorScore)} | ${floorAssessment} |`);
73
+ const liftSign = report.ceiling.docLift >= 0 ? "+" : "";
74
+ const liftAssessment = report.ceiling.docLift < 0
75
+ ? "❌ Docs are hurting performance"
76
+ : report.ceiling.docLift >= 10
77
+ ? "✅ Docs add significant value"
78
+ : "⚠️ Docs add minimal value";
79
+ lines.push(`| Doc Lift | ${liftSign}${fmt(report.ceiling.docLift)} | ${liftAssessment} |`);
80
+ lines.push(`| Doc Quality Gap | ${fmt(report.ceiling.docQualityGap)} | ${report.ceiling.docQualityGap > 30 ? "Room for improvement via documentation" : "✅ Docs are high quality"} |`);
81
+ lines.push("");
82
+ // Failing Criteria (only shown when there are violations)
83
+ if (report.violations.length > 0) {
84
+ lines.push("### Failing Criteria");
85
+ for (let i = 0; i < report.violations.length; i++) {
86
+ const v = report.violations[i];
87
+ lines.push(`${i + 1}. **${v.description}**`);
88
+ }
89
+ lines.push("");
90
+ lines.push("### Recommendation");
91
+ const count = report.violations.length;
92
+ const itemWord = count === 1 ? "item" : "items";
93
+ lines.push(`Fix the ${count} ${itemWord} above and re-evaluate.`);
94
+ lines.push("");
95
+ }
96
+ // Gap Analysis (if available)
97
+ if (report.gaps.length > 0) {
98
+ lines.push("### Gap Analysis");
99
+ lines.push("");
100
+ lines.push("| Failure Mode | Est. Lift | Confidence | Remediation |");
101
+ lines.push("|---|---|---|---|");
102
+ for (const gap of report.gaps) {
103
+ const confIcon = gap.confidence === "high"
104
+ ? "🟢"
105
+ : gap.confidence === "medium"
106
+ ? "🟡"
107
+ : "🔴";
108
+ lines.push(`| ${gap.failureMode} | +${gap.estimatedLift.toFixed(1)} | ${confIcon} ${gap.confidence} | ${gap.remediation} |`);
109
+ }
110
+ lines.push("");
111
+ }
112
+ // Historical Progress (if available)
113
+ if (report.history.length > 0) {
114
+ lines.push("### Historical Progress");
115
+ lines.push("");
116
+ lines.push("| Date | Score | Tag |");
117
+ lines.push("|---|---|---|");
118
+ for (const entry of report.history) {
119
+ const date = entry.timestamp.slice(0, 10);
120
+ const tag = entry.tag ?? "—";
121
+ lines.push(`| ${date} | ${fmt(entry.score)} | ${tag} |`);
122
+ }
123
+ // Show current score as the last row
124
+ lines.push(`| ${new Date().toISOString().slice(0, 10)} | ${fmt(report.score)} | *current* |`);
125
+ lines.push("");
126
+ }
127
+ return lines.join("\n");
128
+ }
129
+ /**
130
+ * Generate a structured readiness report for a given feature area.
131
+ *
132
+ * This is a pure function — it takes all data as parameters and produces
133
+ * a structured report. No I/O.
134
+ */
135
+ export function generateReadinessReport(opts) {
136
+ const { area, gapAnalysis, history = [], scoreSummary, thresholdConfig, } = opts;
137
+ // Find the area's scores
138
+ const areaScore = scoreSummary.scores.find((s) => s.feature === area);
139
+ if (!areaScore) {
140
+ throw new Error(`Area "${area}" not found in score summary. Available areas: ${scoreSummary.scores.map((s) => s.feature).join(", ")}`);
141
+ }
142
+ // Evaluate thresholds for the full summary (to get violations)
143
+ const thresholdEvaluation = evaluateThresholds(scoreSummary, thresholdConfig);
144
+ // Filter violations to only this area
145
+ const areaViolations = thresholdEvaluation.violations.filter((v) => v.area === area);
146
+ // Resolve per-area thresholds (with defaults)
147
+ const areaOverrides = thresholdConfig.areas?.[area];
148
+ const compositeThreshold = areaOverrides?.composite ?? thresholdConfig.defaults.composite;
149
+ const dimDefaults = thresholdConfig.defaults.dimensions ?? {};
150
+ const dimOverrides = areaOverrides?.dimensions ?? {};
151
+ // Build dimension checks
152
+ const dimensions = [
153
+ {
154
+ dimension: "Task Completion",
155
+ pass: areaScore.taskCompletion >=
156
+ (dimOverrides["task-completion"] ??
157
+ dimDefaults["task-completion"] ??
158
+ 0),
159
+ score: areaScore.taskCompletion,
160
+ threshold: dimOverrides["task-completion"] ?? dimDefaults["task-completion"] ?? 0,
161
+ },
162
+ {
163
+ dimension: "Code Correctness",
164
+ pass: areaScore.codeCorrectness >=
165
+ (dimOverrides["code-correctness"] ??
166
+ dimDefaults["code-correctness"] ??
167
+ 0),
168
+ score: areaScore.codeCorrectness,
169
+ threshold: dimOverrides["code-correctness"] ??
170
+ dimDefaults["code-correctness"] ??
171
+ 0,
172
+ },
173
+ {
174
+ dimension: "Doc Coverage",
175
+ pass: areaScore.docCoverage >=
176
+ (dimOverrides["doc-coverage"] ?? dimDefaults["doc-coverage"] ?? 0),
177
+ score: areaScore.docCoverage,
178
+ threshold: dimOverrides["doc-coverage"] ?? dimDefaults["doc-coverage"] ?? 0,
179
+ },
180
+ ];
181
+ // Filter gap analysis to this area
182
+ const areaGaps = gapAnalysis?.gaps.filter((g) => g.area === area) ?? [];
183
+ // Ceiling decomposition
184
+ const ceiling = {
185
+ ceilingScore: areaScore.ceilingScore,
186
+ docLift: areaScore.docLift,
187
+ docQualityGap: areaScore.docQualityGap,
188
+ floorScore: areaScore.floorScore,
189
+ };
190
+ const pass = areaViolations.length === 0;
191
+ return {
192
+ area,
193
+ ceiling,
194
+ dimensions,
195
+ gaps: areaGaps,
196
+ history,
197
+ pass,
198
+ score: areaScore.totalScore,
199
+ threshold: compositeThreshold,
200
+ thresholdEvaluation,
201
+ violations: areaViolations,
202
+ };
203
+ }
204
+ // ---------------------------------------------------------------------------
205
+ // Formatting helpers (private)
206
+ // ---------------------------------------------------------------------------
207
+ /** Format a score for display (round to nearest integer) */
208
+ function fmt(n) {
209
+ return String(Math.round(n));
210
+ }
211
+ /** Convert kebab-case area name to title case */
212
+ function formatAreaLabel(area) {
213
+ return area
214
+ .split("-")
215
+ .map((w) => w.charAt(0).toUpperCase() + w.slice(1))
216
+ .join(" ");
217
+ }
218
+ // ---------------------------------------------------------------------------
219
+ // I/O helpers (used by CLI, not exported for testing)
220
+ // ---------------------------------------------------------------------------
221
+ function loadGapAnalysis(path) {
222
+ if (!existsSync(path))
223
+ return undefined;
224
+ return JSON.parse(readFileSync(path, "utf-8"));
225
+ }
226
+ function loadHistory(area, baselinesDir) {
227
+ if (!existsSync(baselinesDir))
228
+ return [];
229
+ const files = readdirSync(baselinesDir)
230
+ .filter((f) => f.endsWith(".json"))
231
+ .sort();
232
+ const entries = [];
233
+ for (const file of files) {
234
+ try {
235
+ const raw = readFileSync(join(baselinesDir, file), "utf-8");
236
+ const data = JSON.parse(raw);
237
+ const areaScore = data.scores?.find((s) => s.feature === area);
238
+ if (!areaScore)
239
+ continue;
240
+ // Extract tag from filename (e.g., "20260304_16_34_45_pre-groq.json")
241
+ const nameWithoutExt = file.replace(/\.json$/, "");
242
+ const parts = nameWithoutExt.split("_");
243
+ // Timestamps are like "20260304_16_34_45" (4 parts), rest is tag
244
+ const tag = parts.length > 4 ? parts.slice(4).join("_") : undefined;
245
+ entries.push({
246
+ score: areaScore.totalScore,
247
+ tag,
248
+ timestamp: data.timestamp ?? nameWithoutExt,
249
+ });
250
+ }
251
+ catch {
252
+ // Skip malformed baseline files
253
+ }
254
+ }
255
+ return entries;
256
+ }
257
+ function loadScoreSummary(path) {
258
+ if (!existsSync(path)) {
259
+ throw new Error(`Score summary not found at ${path}. Run \`pnpm pipeline\` first.`);
260
+ }
261
+ return JSON.parse(readFileSync(path, "utf-8"));
262
+ }
263
+ function loadThresholdConfig(path) {
264
+ if (!existsSync(path)) {
265
+ throw new Error(`Threshold config not found at ${path}.`);
266
+ }
267
+ const raw = readFileSync(path, "utf-8");
268
+ const parsed = load(raw);
269
+ const result = ThresholdConfigSchema.safeParse(parsed);
270
+ if (!result.success) {
271
+ const messages = result.error.issues
272
+ .map((i) => ` ${i.path.join(".")}: ${i.message}`)
273
+ .join("\n");
274
+ throw new Error(`Invalid thresholds.yaml:\n${messages}`);
275
+ }
276
+ return result.data;
277
+ }
278
+ // ---------------------------------------------------------------------------
279
+ // CLI
280
+ // ---------------------------------------------------------------------------
281
+ function main() {
282
+ const { area, history: includeHistory, output } = parseArgs(process.argv);
283
+ // Load data
284
+ const scoreSummary = loadScoreSummary(SCORE_SUMMARY_PATH);
285
+ const thresholdConfig = loadThresholdConfig(THRESHOLDS_PATH);
286
+ const gapAnalysis = loadGapAnalysis(GAP_ANALYSIS_PATH);
287
+ const history = includeHistory ? loadHistory(area, BASELINES_DIR) : [];
288
+ // Generate report
289
+ const report = generateReadinessReport({
290
+ area,
291
+ gapAnalysis,
292
+ history,
293
+ scoreSummary,
294
+ thresholdConfig,
295
+ });
296
+ // Format and output
297
+ const markdown = formatReadinessMarkdown(report);
298
+ if (output) {
299
+ writeFileSync(output, markdown, "utf-8");
300
+ console.error(`✅ Readiness report written to ${output}`);
301
+ }
302
+ else {
303
+ console.log(markdown);
304
+ }
305
+ // Exit with non-zero if not ready
306
+ if (!report.pass) {
307
+ process.exit(1);
308
+ }
309
+ }
310
+ function parseArgs(argv) {
311
+ const args = argv.slice(2);
312
+ let area;
313
+ let history = false;
314
+ let output;
315
+ for (let i = 0; i < args.length; i++) {
316
+ const arg = args[i];
317
+ if (arg === "--area" && i + 1 < args.length) {
318
+ area = args[++i];
319
+ }
320
+ else if (arg === "--history") {
321
+ history = true;
322
+ }
323
+ else if (arg === "--output" && i + 1 < args.length) {
324
+ output = args[++i];
325
+ }
326
+ }
327
+ if (!area) {
328
+ console.error("Usage: readiness-report --area <area> [--history] [--output <file>]");
329
+ console.error("");
330
+ console.error("Options:");
331
+ console.error(" --area <area> Feature area to check (required)");
332
+ console.error(" --history Include historical progress from baselines");
333
+ console.error(" --output <file> Write markdown to file instead of stdout");
334
+ process.exit(1);
335
+ }
336
+ return { area, history, output };
337
+ }
338
+ // Only run when invoked directly
339
+ if (process.argv[1]?.endsWith("readiness-report.ts") ||
340
+ process.argv[1]?.endsWith("readiness-report.js")) {
341
+ main();
342
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * update-quality-scores.ts
3
+ *
4
+ * Reads score-summary.json and updates the feature area quality grades
5
+ * table in docs/QUALITY_SCORE.md. Designed to run automatically after
6
+ * each evaluation as the final pipeline step.
7
+ *
8
+ * Usage:
9
+ * pnpm update-quality-scores
10
+ * tsx src/scripts/update-quality-scores.ts
11
+ */
12
+ export declare function updateQualityScores(): {
13
+ success: boolean;
14
+ message: string;
15
+ };
@@ -0,0 +1,184 @@
1
+ /**
2
+ * update-quality-scores.ts
3
+ *
4
+ * Reads score-summary.json and updates the feature area quality grades
5
+ * table in docs/QUALITY_SCORE.md. Designed to run automatically after
6
+ * each evaluation as the final pipeline step.
7
+ *
8
+ * Usage:
9
+ * pnpm update-quality-scores
10
+ * tsx src/scripts/update-quality-scores.ts
11
+ */
12
+ import { execSync } from "child_process";
13
+ import { existsSync, readFileSync, writeFileSync } from "fs";
14
+ import { dirname, join, resolve } from "path";
15
+ import { fileURLToPath } from "url";
16
+ const __dirname = dirname(fileURLToPath(import.meta.url));
17
+ const ROOT = resolve(__dirname, "..", "..");
18
+ const REPO_ROOT = resolve(ROOT, "..", "..");
19
+ const QUALITY_SCORE_PATH = join(REPO_ROOT, "docs", "QUALITY_SCORE.md");
20
+ const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
21
+ // ---------------------------------------------------------------------------
22
+ // Grading
23
+ // ---------------------------------------------------------------------------
24
+ export function updateQualityScores() {
25
+ // Read score summary
26
+ if (!existsSync(SCORE_SUMMARY_PATH)) {
27
+ return {
28
+ message: `Score summary not found at ${SCORE_SUMMARY_PATH}. Run 'pnpm calculate-scores' first.`,
29
+ success: false,
30
+ };
31
+ }
32
+ let summary;
33
+ try {
34
+ const raw = readFileSync(SCORE_SUMMARY_PATH, "utf-8");
35
+ const parsed = JSON.parse(raw);
36
+ // Normalize legacy field names (liftFromDocs → docLift)
37
+ summary = {
38
+ ...parsed,
39
+ scores: parsed.scores.map((s) => ({
40
+ ...s,
41
+ docLift: s.docLift ??
42
+ s.liftFromDocs ??
43
+ 0,
44
+ })),
45
+ };
46
+ }
47
+ catch (err) {
48
+ return {
49
+ message: `Failed to parse score summary: ${err instanceof Error ? err.message : String(err)}`,
50
+ success: false,
51
+ };
52
+ }
53
+ if (!summary.scores || summary.scores.length === 0) {
54
+ return { message: "Score summary contains no scores.", success: false };
55
+ }
56
+ // Read QUALITY_SCORE.md
57
+ if (!existsSync(QUALITY_SCORE_PATH)) {
58
+ return {
59
+ message: `QUALITY_SCORE.md not found at ${QUALITY_SCORE_PATH}.`,
60
+ success: false,
61
+ };
62
+ }
63
+ let markdown = readFileSync(QUALITY_SCORE_PATH, "utf-8");
64
+ // Replace the feature area table
65
+ const newTable = generateTable(summary.scores);
66
+ const tablePattern = /\| Feature Area\s+\| Score\s+\| Grade\s+\| Doc Lift\s+\| Key gap[^|]*\|\n\| [-\s|]+\|\n(\|[^\n]+\|\n)*/;
67
+ const match = tablePattern.exec(markdown);
68
+ if (!match) {
69
+ return {
70
+ message: "Could not find the feature area quality grades table in QUALITY_SCORE.md.",
71
+ success: false,
72
+ };
73
+ }
74
+ markdown =
75
+ markdown.slice(0, match.index) +
76
+ newTable +
77
+ "\n" +
78
+ markdown.slice(match.index + match[0].length);
79
+ // Write back
80
+ writeFileSync(QUALITY_SCORE_PATH, markdown);
81
+ // Format with Prettier to ensure consistent table formatting
82
+ // (emoji widths differ between padEnd and Prettier's table formatter)
83
+ try {
84
+ execSync("npx prettier --write " + QUALITY_SCORE_PATH, {
85
+ cwd: REPO_ROOT,
86
+ stdio: "pipe",
87
+ });
88
+ }
89
+ catch {
90
+ // Non-fatal — formatting is nice-to-have
91
+ }
92
+ return {
93
+ message: `Updated ${summary.scores.length} feature area scores in QUALITY_SCORE.md (avg: ${Math.round(summary.overall.avgScore)}, lift: +${Math.round(summary.overall.avgDocLift)})`,
94
+ success: true,
95
+ };
96
+ }
97
+ function generateTable(scores) {
98
+ // Sort by score descending
99
+ const sorted = [...scores].sort((a, b) => b.totalScore - a.totalScore);
100
+ // Build rows with data
101
+ const rows = sorted.map((s) => ({
102
+ feature: s.feature,
103
+ gap: keyGap(s, scores),
104
+ grade: grade(s.totalScore),
105
+ lift: "+" + s.docLift,
106
+ score: String(s.totalScore),
107
+ }));
108
+ // Calculate column widths from data (minimum widths from headers)
109
+ const cols = {
110
+ feature: Math.max(14, ...rows.map((r) => r.feature.length)),
111
+ gap: Math.max(7, ...rows.map((r) => r.gap.length)),
112
+ grade: 5,
113
+ lift: 8,
114
+ score: 5,
115
+ };
116
+ const fmtRow = (r) => `| ${r.feature.padEnd(cols.feature)} | ${r.score.padEnd(cols.score)} | ${r.grade.padEnd(cols.grade)} | ${r.lift.padEnd(cols.lift)} | ${r.gap.padEnd(cols.gap)} |`;
117
+ const header = fmtRow({
118
+ feature: "Feature Area",
119
+ gap: "Key gap",
120
+ grade: "Grade",
121
+ lift: "Doc Lift",
122
+ score: "Score",
123
+ });
124
+ const sep = `| ${"-".repeat(cols.feature)} | ${"-".repeat(cols.score)} | ${"-".repeat(cols.grade)} | ${"-".repeat(cols.lift)} | ${"-".repeat(cols.gap)} |`;
125
+ return [header, sep, ...rows.map(fmtRow)].join("\n");
126
+ }
127
+ // ---------------------------------------------------------------------------
128
+ // Table generation
129
+ // ---------------------------------------------------------------------------
130
+ function grade(score) {
131
+ if (score >= 80)
132
+ return "✅ A";
133
+ if (score >= 60)
134
+ return "🟡 B";
135
+ if (score >= 40)
136
+ return "🟠 C";
137
+ return "🔴 D";
138
+ }
139
+ // ---------------------------------------------------------------------------
140
+ // File update
141
+ // ---------------------------------------------------------------------------
142
+ function keyGap(s, allScores) {
143
+ // Below critical threshold
144
+ if (s.totalScore < 40) {
145
+ return "⚠️ Below critical — all dimensions underperform";
146
+ }
147
+ // Find the weakest dimension relative to max possible (all 0–100)
148
+ const dims = [
149
+ { max: 100, name: "task completion", score: s.taskCompletion },
150
+ { max: 100, name: "code correctness", score: s.codeCorrectness },
151
+ { max: 100, name: "doc coverage", score: s.docCoverage },
152
+ ];
153
+ // Sort by ratio (lowest first)
154
+ dims.sort((a, b) => a.score / a.max - b.score / b.max);
155
+ const weakest = dims[0];
156
+ // Check for notable strengths
157
+ const maxLift = Math.max(...allScores.map((sc) => sc.docLift));
158
+ const maxScore = Math.max(...allScores.map((sc) => sc.totalScore));
159
+ if (s.totalScore === maxScore) {
160
+ return `Strong — highest score; ${weakest.name} (${weakest.score}/${weakest.max})`;
161
+ }
162
+ if (s.docLift === maxLift) {
163
+ return `Highest doc lift; ${weakest.name} (${weakest.score}/${weakest.max})`;
164
+ }
165
+ if (weakest.score === 0) {
166
+ return `Zero ${weakest.name} score; lowest doc lift`;
167
+ }
168
+ return `${weakest.name[0].toUpperCase() + weakest.name.slice(1)} (${weakest.score}/${weakest.max}) holds back total score`;
169
+ }
170
+ // ---------------------------------------------------------------------------
171
+ // Main (when run directly)
172
+ // ---------------------------------------------------------------------------
173
+ if (process.argv[1]?.endsWith("update-quality-scores.ts") ||
174
+ process.argv[1]?.endsWith("update-quality-scores.js")) {
175
+ console.log("=== Updating QUALITY_SCORE.md from score-summary.json ===\n");
176
+ const result = updateQualityScores();
177
+ if (result.success) {
178
+ console.log(` ✅ ${result.message}`);
179
+ }
180
+ else {
181
+ console.error(` ❌ ${result.message}`);
182
+ process.exit(1);
183
+ }
184
+ }
@@ -16,6 +16,6 @@
16
16
  * - Migration script has been run (ailf.task documents exist in CL)
17
17
  * - SANITY_API_TOKEN configured for Content Lake reads
18
18
  *
19
- * @see docs/exec-plans/tasks-as-content/phase-3-migration.md
19
+ * @see docs/archive/exec-plans/tasks-as-content/phase-3-migration.md
20
20
  */
21
21
  export {};
@@ -16,7 +16,7 @@
16
16
  * - Migration script has been run (ailf.task documents exist in CL)
17
17
  * - SANITY_API_TOKEN configured for Content Lake reads
18
18
  *
19
- * @see docs/exec-plans/tasks-as-content/phase-3-migration.md
19
+ * @see docs/archive/exec-plans/tasks-as-content/phase-3-migration.md
20
20
  */
21
21
  import { config as dotenvConfig } from "dotenv";
22
22
  import { existsSync } from "fs";
@@ -0,0 +1,13 @@
1
+ /**
2
+ * validate.ts
3
+ *
4
+ * CLI script that validates pipeline configuration.
5
+ * Checks that all YAML files are consistent, all task-to-mapping
6
+ * cross-references are valid, and reference solutions exist.
7
+ *
8
+ * Usage:
9
+ * pnpm validate # validate everything
10
+ * pnpm validate --strict # treat warnings as errors
11
+ * pnpm validate --contexts # also check that context files exist
12
+ */
13
+ export {};
@@ -0,0 +1,79 @@
1
+ /**
2
+ * validate.ts
3
+ *
4
+ * CLI script that validates pipeline configuration.
5
+ * Checks that all YAML files are consistent, all task-to-mapping
6
+ * cross-references are valid, and reference solutions exist.
7
+ *
8
+ * Usage:
9
+ * pnpm validate # validate everything
10
+ * pnpm validate --strict # treat warnings as errors
11
+ * pnpm validate --contexts # also check that context files exist
12
+ */
13
+ import { dirname, resolve } from "path";
14
+ import { fileURLToPath } from "url";
15
+ import { checkContextsExist, checkEnvironment } from "../pipeline/checks.js";
16
+ import { validateConfiguration } from "../pipeline/validate.js";
17
+ const __dirname = dirname(fileURLToPath(import.meta.url));
18
+ const ROOT = resolve(__dirname, "..", "..");
19
+ // ---------------------------------------------------------------------------
20
+ // CLI argument parsing
21
+ // ---------------------------------------------------------------------------
22
+ const args = process.argv.slice(2);
23
+ const strict = args.includes("--strict");
24
+ const checkCtx = args.includes("--contexts");
25
+ // ---------------------------------------------------------------------------
26
+ // Run validation
27
+ // ---------------------------------------------------------------------------
28
+ console.log("=== ai-literacy-framework — Configuration Validator ===\n");
29
+ const result = validateConfiguration(ROOT);
30
+ // Optionally check contexts
31
+ if (checkCtx) {
32
+ // Dynamically import to get feature areas
33
+ const { ALL_FEATURE_AREAS } = await import("../sanity/queries.js");
34
+ const contextIssues = checkContextsExist(ROOT, ALL_FEATURE_AREAS);
35
+ result.issues.push(...contextIssues);
36
+ result.valid =
37
+ result.valid && contextIssues.every((i) => i.severity !== "error");
38
+ }
39
+ // Check environment
40
+ const envIssues = checkEnvironment(ROOT);
41
+ result.issues.push(...envIssues);
42
+ // ---------------------------------------------------------------------------
43
+ // Report results
44
+ // ---------------------------------------------------------------------------
45
+ const errors = result.issues.filter((i) => i.severity === "error");
46
+ const warnings = result.issues.filter((i) => i.severity === "warning");
47
+ if (errors.length > 0) {
48
+ console.log(`❌ ${errors.length} error(s):\n`);
49
+ for (const issue of errors) {
50
+ console.log(` ERROR [${issue.source}] ${issue.message}`);
51
+ if (issue.path)
52
+ console.log(` at ${issue.path}`);
53
+ }
54
+ console.log();
55
+ }
56
+ if (warnings.length > 0) {
57
+ console.log(`⚠️ ${warnings.length} warning(s):\n`);
58
+ for (const issue of warnings) {
59
+ console.log(` WARN [${issue.source}] ${issue.message}`);
60
+ if (issue.path)
61
+ console.log(` at ${issue.path}`);
62
+ }
63
+ console.log();
64
+ }
65
+ if (errors.length === 0 && warnings.length === 0) {
66
+ console.log("✅ All checks passed — configuration is valid.\n");
67
+ }
68
+ if (errors.length === 0 && warnings.length > 0) {
69
+ console.log(`✅ Configuration is valid (${warnings.length} warning(s)).\n`);
70
+ }
71
+ // In strict mode, warnings are treated as errors
72
+ const exitCode = strict
73
+ ? result.issues.length > 0
74
+ ? 1
75
+ : 0
76
+ : errors.length > 0
77
+ ? 1
78
+ : 0;
79
+ process.exit(exitCode);
@@ -0,0 +1,26 @@
1
+ /**
2
+ * webhook-server.ts
3
+ *
4
+ * Local development server for testing the webhook handler.
5
+ *
6
+ * Starts an HTTP server that receives Sanity webhook payloads, processes
7
+ * them through the WebhookHandler, and logs results. Useful for local
8
+ * development and testing the full event-driven trigger flow.
9
+ *
10
+ * Usage:
11
+ * pnpm webhook-server # start on port 3333
12
+ * WEBHOOK_PORT=8080 pnpm webhook-server # custom port
13
+ *
14
+ * Test with curl:
15
+ * curl -X POST http://localhost:3333/webhook \
16
+ * -H "Content-Type: application/json" \
17
+ * -d '{"operation":"update","result":{"_id":"abc","_type":"article","slug":{"current":"groq-introduction"}}}'
18
+ *
19
+ * Endpoints:
20
+ * POST /webhook — handle a Sanity webhook payload
21
+ * GET /health — handler diagnostics (budget, pending, tracked slugs)
22
+ * GET /mappings — list all tracked document slugs and their areas
23
+ *
24
+ * @see docs/design-docs/report-store/visibility-workflows.md
25
+ */
26
+ export {};