@sanity/ailf 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (444) hide show
  1. package/canonical/grader-references/README.md +2 -2
  2. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  3. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  4. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  5. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  6. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  7. package/config/features.ts +1 -1
  8. package/config/models.ts +28 -23
  9. package/config/sources.ts +1 -1
  10. package/config/thresholds.ts +1 -1
  11. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  13. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  17. package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
  18. package/dist/_vendor/ailf-core/config-helpers.js +29 -0
  19. package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
  20. package/dist/_vendor/ailf-core/examples/index.js +208 -114
  21. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  22. package/dist/_vendor/ailf-core/index.js +1 -0
  23. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  25. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  27. package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
  28. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  29. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  30. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  31. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  32. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  33. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
  34. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
  35. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  36. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  37. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  38. package/dist/_vendor/ailf-core/services/index.js +1 -1
  39. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  40. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
  41. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  42. package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
  43. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
  44. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  45. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  46. package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
  47. package/dist/_vendor/ailf-tasks/cli.js +61 -0
  48. package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
  49. package/dist/_vendor/ailf-tasks/index.js +16 -0
  50. package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
  51. package/dist/_vendor/ailf-tasks/parser.js +73 -0
  52. package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
  53. package/dist/_vendor/ailf-tasks/schemas.js +180 -0
  54. package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
  55. package/dist/_vendor/ailf-tasks/validation.js +162 -0
  56. package/dist/adapters/api-client/remediation.js +2 -2
  57. package/dist/adapters/config-sources/file-config-adapter.js +6 -1
  58. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  59. package/dist/adapters/index.d.ts +0 -1
  60. package/dist/adapters/index.js +0 -1
  61. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  62. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  63. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  64. package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
  65. package/dist/adapters/task-sources/index.d.ts +1 -2
  66. package/dist/adapters/task-sources/index.js +1 -2
  67. package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
  68. package/dist/adapters/task-sources/repo-schemas.js +2 -2
  69. package/dist/adapters/task-sources/repo-task-source.js +1 -1
  70. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  71. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
  73. package/dist/adapters/task-sources/task-file-loader.js +20 -6
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/explain-handler.d.ts +1 -1
  95. package/dist/commands/explain-handler.js +37 -8
  96. package/dist/commands/fetch-docs.js +1 -0
  97. package/dist/commands/generate-configs.d.ts +3 -3
  98. package/dist/commands/generate-configs.js +20 -8
  99. package/dist/commands/init.d.ts +2 -3
  100. package/dist/commands/init.js +56 -170
  101. package/dist/commands/pipeline-action.d.ts +7 -1
  102. package/dist/commands/pipeline-action.js +43 -19
  103. package/dist/commands/pipeline.d.ts +6 -1
  104. package/dist/commands/pipeline.js +7 -2
  105. package/dist/commands/pr-comment.js +1 -0
  106. package/dist/commands/publish.js +1 -0
  107. package/dist/commands/shared/help.js +2 -2
  108. package/dist/commands/update-quality-scores.d.ts +5 -0
  109. package/dist/commands/update-quality-scores.js +20 -0
  110. package/dist/composition-root.d.ts +2 -3
  111. package/dist/composition-root.js +27 -14
  112. package/dist/config/features.ts +23 -0
  113. package/dist/config/models.ts +100 -0
  114. package/dist/config/prompts.ts +16 -0
  115. package/dist/config/rubrics.ts +225 -0
  116. package/dist/config/schedules.ts +47 -0
  117. package/dist/config/sinks.ts +37 -0
  118. package/dist/config/sources.ts +21 -0
  119. package/dist/config/thresholds.ts +61 -0
  120. package/dist/lib/agent-behavior-report.d.ts +8 -0
  121. package/dist/lib/agent-behavior-report.js +185 -0
  122. package/dist/lib/baseline.d.ts +19 -0
  123. package/dist/lib/baseline.js +153 -0
  124. package/dist/lib/calculate-scores.d.ts +23 -0
  125. package/dist/lib/calculate-scores.js +42 -0
  126. package/dist/lib/compare.d.ts +18 -0
  127. package/dist/lib/compare.js +170 -0
  128. package/dist/lib/coverage-audit.d.ts +4 -0
  129. package/dist/lib/coverage-audit.js +42 -0
  130. package/dist/lib/discovery-report.d.ts +13 -0
  131. package/dist/lib/discovery-report.js +57 -0
  132. package/dist/lib/fetch-docs.d.ts +30 -0
  133. package/dist/lib/fetch-docs.js +171 -0
  134. package/dist/lib/generate-configs.d.ts +25 -0
  135. package/dist/lib/generate-configs.js +42 -0
  136. package/dist/lib/grader-api.d.ts +21 -0
  137. package/dist/lib/grader-api.js +34 -0
  138. package/dist/lib/grader-compare.d.ts +19 -0
  139. package/dist/lib/grader-compare.js +91 -0
  140. package/dist/lib/grader-consistency.d.ts +27 -0
  141. package/dist/lib/grader-consistency.js +79 -0
  142. package/dist/lib/grader-sensitivity.d.ts +19 -0
  143. package/dist/lib/grader-sensitivity.js +75 -0
  144. package/dist/lib/grader-validate.d.ts +19 -0
  145. package/dist/lib/grader-validate.js +78 -0
  146. package/dist/lib/measure-retrieval.d.ts +14 -0
  147. package/dist/lib/measure-retrieval.js +71 -0
  148. package/dist/lib/pr-comment.d.ts +16 -0
  149. package/dist/lib/pr-comment.js +28 -0
  150. package/dist/lib/readiness-report.d.ts +13 -0
  151. package/dist/lib/readiness-report.js +108 -0
  152. package/dist/lib/webhook-server.d.ts +11 -0
  153. package/dist/lib/webhook-server.js +24 -0
  154. package/dist/lib/weekly-digest.d.ts +24 -0
  155. package/dist/lib/weekly-digest.js +148 -0
  156. package/dist/orchestration/build-app-context.js +13 -0
  157. package/dist/orchestration/cache-context.d.ts +23 -0
  158. package/dist/orchestration/cache-context.js +43 -0
  159. package/dist/orchestration/env-bridge.d.ts +21 -0
  160. package/dist/orchestration/env-bridge.js +66 -0
  161. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  162. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  163. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  164. package/dist/orchestration/step-runner.js +5 -1
  165. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  166. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  167. package/dist/orchestration/steps/callback-step.js +10 -1
  168. package/dist/orchestration/steps/compare-step.js +6 -3
  169. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  170. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  171. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  172. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  173. package/dist/orchestration/steps/fetch-docs-step.js +30 -16
  174. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  175. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  176. package/dist/orchestration/steps/generate-configs-step.js +50 -15
  177. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  178. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  179. package/dist/orchestration/steps/publish-report-step.js +19 -0
  180. package/dist/orchestration/steps/readiness-step.js +8 -3
  181. package/dist/orchestration/steps/report-step.js +17 -4
  182. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  183. package/dist/orchestration/steps/run-eval-step.js +52 -32
  184. package/dist/pipeline/agent-behavior-report.js +6 -0
  185. package/dist/pipeline/attribution.d.ts +1 -1
  186. package/dist/pipeline/attribution.js +1 -1
  187. package/dist/pipeline/cache.js +29 -15
  188. package/dist/pipeline/calculate-scores.d.ts +2 -0
  189. package/dist/pipeline/calculate-scores.js +70 -33
  190. package/dist/pipeline/checks.d.ts +8 -3
  191. package/dist/pipeline/checks.js +23 -3
  192. package/dist/pipeline/chronic-failures.d.ts +55 -0
  193. package/dist/pipeline/chronic-failures.js +110 -0
  194. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
  195. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  196. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  197. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  198. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  199. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  200. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  201. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  202. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  203. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  204. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  205. package/dist/pipeline/compiler/config-loader.js +42 -2
  206. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  207. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  208. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  209. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  210. package/dist/pipeline/compiler/index.d.ts +2 -5
  211. package/dist/pipeline/compiler/index.js +2 -5
  212. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  213. package/dist/pipeline/compiler/literacy-bridge.js +1 -1
  214. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
  215. package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
  216. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
  217. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
  218. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
  219. package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
  220. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
  221. package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
  222. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
  223. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
  224. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
  225. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
  226. package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
  227. package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
  228. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
  229. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
  230. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
  231. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
  232. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
  233. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
  234. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
  235. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
  241. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
  242. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  251. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  252. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
  253. package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
  254. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  255. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  256. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  257. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  258. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  259. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  260. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  261. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  262. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  263. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  264. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  265. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  266. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  267. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  268. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  269. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  270. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  271. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  272. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  273. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  274. package/dist/pipeline/compiler/task-bridge.js +92 -0
  275. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  276. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  277. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  278. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  279. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  280. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  281. package/dist/pipeline/coverage-audit.d.ts +1 -1
  282. package/dist/pipeline/coverage-audit.js +1 -1
  283. package/dist/pipeline/degradations.d.ts +1 -1
  284. package/dist/pipeline/degradations.js +1 -1
  285. package/dist/pipeline/failure-modes.d.ts +1 -1
  286. package/dist/pipeline/failure-modes.js +13 -1
  287. package/dist/pipeline/gap-analysis.d.ts +1 -1
  288. package/dist/pipeline/gap-analysis.js +3 -1
  289. package/dist/pipeline/generate-configs.d.ts +2 -2
  290. package/dist/pipeline/generate-configs.js +15 -8
  291. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  292. package/dist/pipeline/grader-compare-runner.js +7 -1
  293. package/dist/pipeline/grader-comparison.d.ts +1 -1
  294. package/dist/pipeline/grader-comparison.js +1 -1
  295. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  296. package/dist/pipeline/grader-consistency-runner.js +7 -1
  297. package/dist/pipeline/grader-consistency.d.ts +1 -1
  298. package/dist/pipeline/grader-consistency.js +1 -1
  299. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  300. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  301. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  302. package/dist/pipeline/grader-sensitivity.js +1 -1
  303. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  304. package/dist/pipeline/grader-validate-runner.js +2 -2
  305. package/dist/pipeline/grader-validation.d.ts +1 -1
  306. package/dist/pipeline/grader-validation.js +1 -1
  307. package/dist/pipeline/map-request-to-config.js +15 -2
  308. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  309. package/dist/pipeline/mirror-repo-tasks.js +1 -1
  310. package/dist/pipeline/plan-format.d.ts +1 -1
  311. package/dist/pipeline/plan-format.js +1 -1
  312. package/dist/pipeline/plan.d.ts +1 -1
  313. package/dist/pipeline/plan.js +67 -29
  314. package/dist/pipeline/probe.d.ts +1 -1
  315. package/dist/pipeline/probe.js +1 -1
  316. package/dist/pipeline/readiness-report.d.ts +2 -2
  317. package/dist/pipeline/readiness-report.js +2 -2
  318. package/dist/pipeline/release-classification.d.ts +1 -1
  319. package/dist/pipeline/release-classification.js +1 -1
  320. package/dist/pipeline/release-report.d.ts +1 -1
  321. package/dist/pipeline/release-report.js +1 -1
  322. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  323. package/dist/pipeline/repo-eval-comment.js +1 -1
  324. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  325. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  326. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  327. package/dist/pipeline/resolve-mappings.js +44 -44
  328. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  329. package/dist/pipeline/retrieval-metrics.js +28 -20
  330. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  331. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  332. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  333. package/dist/pipeline/steps/compare-step.js +90 -0
  334. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  335. package/dist/pipeline/steps/eval-step.js +347 -0
  336. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  337. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  338. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  339. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  340. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  341. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  342. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  343. package/dist/pipeline/steps/publish-report-step.js +243 -0
  344. package/dist/pipeline/steps/report-step.d.ts +13 -0
  345. package/dist/pipeline/steps/report-step.js +56 -0
  346. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  347. package/dist/pipeline/steps/update-scores-step.js +42 -0
  348. package/dist/pipeline/targeted-loo.d.ts +1 -1
  349. package/dist/pipeline/targeted-loo.js +1 -1
  350. package/dist/pipeline/thresholds.d.ts +1 -1
  351. package/dist/pipeline/thresholds.js +1 -1
  352. package/dist/pipeline/validate.js +13 -0
  353. package/dist/report-store.d.ts +17 -0
  354. package/dist/report-store.js +24 -0
  355. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  356. package/dist/scripts/agent-behavior-report.js +315 -0
  357. package/dist/scripts/baseline.d.ts +43 -0
  358. package/dist/scripts/baseline.js +267 -0
  359. package/dist/scripts/calculate-scores.d.ts +166 -0
  360. package/dist/scripts/calculate-scores.js +1296 -0
  361. package/dist/scripts/compare.d.ts +22 -0
  362. package/dist/scripts/compare.js +334 -0
  363. package/dist/scripts/coverage-audit.d.ts +44 -0
  364. package/dist/scripts/coverage-audit.js +209 -0
  365. package/dist/scripts/debug-eval.d.ts +19 -0
  366. package/dist/scripts/debug-eval.js +73 -0
  367. package/dist/scripts/discovery-report.d.ts +58 -0
  368. package/dist/scripts/discovery-report.js +250 -0
  369. package/dist/scripts/fetch-docs.d.ts +35 -0
  370. package/dist/scripts/fetch-docs.js +472 -0
  371. package/dist/scripts/generate-configs.d.ts +66 -0
  372. package/dist/scripts/generate-configs.js +459 -0
  373. package/dist/scripts/grader-api.d.ts +27 -0
  374. package/dist/scripts/grader-api.js +206 -0
  375. package/dist/scripts/grader-compare.d.ts +22 -0
  376. package/dist/scripts/grader-compare.js +368 -0
  377. package/dist/scripts/grader-consistency.d.ts +20 -0
  378. package/dist/scripts/grader-consistency.js +313 -0
  379. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  380. package/dist/scripts/grader-sensitivity.js +354 -0
  381. package/dist/scripts/grader-validate.d.ts +19 -0
  382. package/dist/scripts/grader-validate.js +267 -0
  383. package/dist/scripts/measure-retrieval.d.ts +10 -0
  384. package/dist/scripts/measure-retrieval.js +145 -0
  385. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  386. package/dist/scripts/migrate-task-mode.js +1 -1
  387. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  388. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  389. package/dist/scripts/pipeline.d.ts +76 -0
  390. package/dist/scripts/pipeline.js +1031 -0
  391. package/dist/scripts/pr-comment.d.ts +10 -0
  392. package/dist/scripts/pr-comment.js +510 -0
  393. package/dist/scripts/readiness-report.d.ts +88 -0
  394. package/dist/scripts/readiness-report.js +342 -0
  395. package/dist/scripts/update-quality-scores.d.ts +15 -0
  396. package/dist/scripts/update-quality-scores.js +184 -0
  397. package/dist/scripts/validate-task-sources.d.ts +1 -1
  398. package/dist/scripts/validate-task-sources.js +1 -1
  399. package/dist/scripts/validate.d.ts +13 -0
  400. package/dist/scripts/validate.js +79 -0
  401. package/dist/scripts/webhook-server.d.ts +26 -0
  402. package/dist/scripts/webhook-server.js +147 -0
  403. package/dist/scripts/weekly-digest.d.ts +24 -0
  404. package/dist/scripts/weekly-digest.js +144 -0
  405. package/dist/sinks/format-slack.d.ts +64 -0
  406. package/dist/sinks/format-slack.js +306 -0
  407. package/dist/sinks/slack-sink.d.ts +27 -0
  408. package/dist/sinks/slack-sink.js +78 -0
  409. package/dist/sinks/types.d.ts +1 -1
  410. package/dist/sinks/types.js +1 -1
  411. package/dist/sinks/webhook-sink.d.ts +19 -0
  412. package/dist/sinks/webhook-sink.js +50 -0
  413. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  414. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  415. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  416. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  417. package/dist/tasks/literacy/functions.task.ts +70 -0
  418. package/dist/tasks/literacy/groq.task.ts +259 -0
  419. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  420. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  421. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  422. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  423. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  424. package/package.json +25 -25
  425. package/tasks/.expanded.agentic.yaml +280 -0
  426. package/tasks/.expanded.yaml +565 -0
  427. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  428. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  429. package/tasks/literacy/content-lake.task.ts +181 -0
  430. package/tasks/literacy/frameworks.task.ts +1 -0
  431. package/tasks/literacy/functions.task.ts +1 -0
  432. package/tasks/literacy/groq.task.ts +1 -0
  433. package/tasks/literacy/image-handling.task.ts +95 -0
  434. package/tasks/literacy/nextjs-live.task.ts +2 -1
  435. package/tasks/literacy/portable-text.task.ts +169 -0
  436. package/tasks/literacy/studio-setup.task.ts +5 -2
  437. package/tasks/literacy/visual-editing.task.ts +1 -0
  438. package/LICENSE +0 -21
  439. package/tasks/frameworks.yaml +0 -98
  440. package/tasks/functions.yaml +0 -51
  441. package/tasks/groq.yaml +0 -216
  442. package/tasks/nextjs-live.yaml +0 -62
  443. package/tasks/studio-setup.yaml +0 -111
  444. package/tasks/visual-editing.yaml +0 -120
@@ -18,7 +18,7 @@
18
18
  *
19
19
  * @see packages/core/src/services/scoring-engine.ts — the 4-tier engine
20
20
  * @see packages/eval/src/pipeline/calculate-scores.ts — the consumer
21
- * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
21
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
22
22
  */
23
23
  import { aggregateDimensions, computeTaskScore, normalizeScore, } from "../../_vendor/ailf-core/index.js";
24
24
  import { classifyRubric, parseRubricScore } from "../../_vendor/ailf-core/index.js";
@@ -0,0 +1,41 @@
1
+ /**
2
+ * task-bridge.ts — Bidirectional bridge between old TaskDefinition and new LiteracyTaskDefinition.
3
+ *
4
+ * Enables incremental migration: consumers can convert between the two types
5
+ * without changing their internal logic. Once all consumers use
6
+ * GeneralizedTaskDefinition, this module is deleted (Wave 3 task 6).
7
+ *
8
+ * Field mapping (TaskDefinition ↔ LiteracyTaskDefinition):
9
+ * id ↔ id
10
+ * description ↔ title
11
+ * featureArea ↔ area
12
+ * taskPrompt ↔ prompt.text (fallback: prompt.template)
13
+ * canonicalDocs ↔ context.docs
14
+ * referenceSolution ↔ referenceSolution
15
+ * docCoverage ↔ docCoverage
16
+ * assertions ↔ assertions (structurally identical)
17
+ * baseline ↔ baseline (structurally identical)
18
+ * tags ↔ tags
19
+ * status ↔ status
20
+ * extraVars ↔ prompt.vars
21
+ *
22
+ * The assertion and doc-ref sub-types are structurally identical between
23
+ * the old and new type systems, so no field-level remapping is needed
24
+ * for those — only a TypeScript-level cast.
25
+ */
26
+ import type { LiteracyTaskDefinition, TaskDefinition } from "../../_vendor/ailf-core/index.d.ts";
27
+ /**
28
+ * Convert an old-style TaskDefinition to the new LiteracyTaskDefinition.
29
+ *
30
+ * Every field of TaskDefinition has a corresponding field in LiteracyTaskDefinition,
31
+ * so this conversion is lossless.
32
+ */
33
+ export declare function toGeneralized(task: TaskDefinition): LiteracyTaskDefinition;
34
+ /**
35
+ * Convert a new LiteracyTaskDefinition to the old TaskDefinition shape.
36
+ *
37
+ * Fields that only exist on LiteracyTaskDefinition (description, difficulty,
38
+ * metadata, rubric, providers, options, context.fixtures, prompt.systemMessage)
39
+ * are dropped — the old type has no place for them.
40
+ */
41
+ export declare function toLiteracyTask(task: LiteracyTaskDefinition): TaskDefinition;
@@ -0,0 +1,92 @@
1
+ /**
2
+ * task-bridge.ts — Bidirectional bridge between old TaskDefinition and new LiteracyTaskDefinition.
3
+ *
4
+ * Enables incremental migration: consumers can convert between the two types
5
+ * without changing their internal logic. Once all consumers use
6
+ * GeneralizedTaskDefinition, this module is deleted (Wave 3 task 6).
7
+ *
8
+ * Field mapping (TaskDefinition ↔ LiteracyTaskDefinition):
9
+ * id ↔ id
10
+ * description ↔ title
11
+ * featureArea ↔ area
12
+ * taskPrompt ↔ prompt.text (fallback: prompt.template)
13
+ * canonicalDocs ↔ context.docs
14
+ * referenceSolution ↔ referenceSolution
15
+ * docCoverage ↔ docCoverage
16
+ * assertions ↔ assertions (structurally identical)
17
+ * baseline ↔ baseline (structurally identical)
18
+ * tags ↔ tags
19
+ * status ↔ status
20
+ * extraVars ↔ prompt.vars
21
+ *
22
+ * The assertion and doc-ref sub-types are structurally identical between
23
+ * the old and new type systems, so no field-level remapping is needed
24
+ * for those — only a TypeScript-level cast.
25
+ */
26
+ // ---------------------------------------------------------------------------
27
+ // toGeneralized — old TaskDefinition → LiteracyTaskDefinition
28
+ // ---------------------------------------------------------------------------
29
+ /**
30
+ * Convert an old-style TaskDefinition to the new LiteracyTaskDefinition.
31
+ *
32
+ * Every field of TaskDefinition has a corresponding field in LiteracyTaskDefinition,
33
+ * so this conversion is lossless.
34
+ */
35
+ export function toGeneralized(task) {
36
+ const result = {
37
+ mode: "literacy",
38
+ id: task.id,
39
+ title: task.description,
40
+ area: task.featureArea,
41
+ prompt: {
42
+ text: task.taskPrompt,
43
+ ...(task.extraVars != null ? { vars: task.extraVars } : {}),
44
+ },
45
+ context: {
46
+ docs: task.canonicalDocs,
47
+ },
48
+ referenceSolution: task.referenceSolution,
49
+ docCoverage: task.docCoverage,
50
+ assertions: task.assertions,
51
+ };
52
+ // Only set optional fields when present to preserve round-trip identity
53
+ if (task.baseline != null)
54
+ result.baseline = task.baseline;
55
+ if (task.tags != null)
56
+ result.tags = task.tags;
57
+ if (task.status != null)
58
+ result.status = task.status;
59
+ return result;
60
+ }
61
+ // ---------------------------------------------------------------------------
62
+ // toLiteracyTask — LiteracyTaskDefinition → old TaskDefinition
63
+ // ---------------------------------------------------------------------------
64
+ /**
65
+ * Convert a new LiteracyTaskDefinition to the old TaskDefinition shape.
66
+ *
67
+ * Fields that only exist on LiteracyTaskDefinition (description, difficulty,
68
+ * metadata, rubric, providers, options, context.fixtures, prompt.systemMessage)
69
+ * are dropped — the old type has no place for them.
70
+ */
71
+ export function toLiteracyTask(task) {
72
+ const result = {
73
+ id: task.id,
74
+ description: task.title,
75
+ featureArea: task.area ?? "",
76
+ taskPrompt: task.prompt?.text ?? task.prompt?.template ?? "",
77
+ canonicalDocs: (task.context?.docs ?? []),
78
+ referenceSolution: task.referenceSolution ?? "",
79
+ docCoverage: task.docCoverage ?? false,
80
+ assertions: (task.assertions ?? []),
81
+ };
82
+ // Only set optional fields when present to preserve round-trip identity
83
+ if (task.baseline != null)
84
+ result.baseline = task.baseline;
85
+ if (task.tags != null)
86
+ result.tags = task.tags;
87
+ if (task.status != null)
88
+ result.status = task.status;
89
+ if (task.prompt?.vars != null)
90
+ result.extraVars = task.prompt.vars;
91
+ return result;
92
+ }
@@ -11,11 +11,8 @@
11
11
  * - Validate the graph is a DAG (reject cycles)
12
12
  * - Assign execution priority via topological sort
13
13
  *
14
- * This module exists alongside `generate-configs.ts` — it does NOT replace
15
- * the existing codegen path. Phase 7 will swap callers over to the compiler.
16
- *
17
14
  * @see packages/core/src/types/task-graph.ts — TaskGraph types
18
- * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
15
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-2-config-compiler.md
19
16
  */
20
17
  import type { DependencyEdge, FilterOptions, GeneralizedTaskDefinition, TaskGraph, TaskNode } from "../../_vendor/ailf-core/index.d.ts";
21
18
  /** Options for building a task graph */
@@ -11,11 +11,8 @@
11
11
  * - Validate the graph is a DAG (reject cycles)
12
12
  * - Assign execution priority via topological sort
13
13
  *
14
- * This module exists alongside `generate-configs.ts` — it does NOT replace
15
- * the existing codegen path. Phase 7 will swap callers over to the compiler.
16
- *
17
14
  * @see packages/core/src/types/task-graph.ts — TaskGraph types
18
- * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
15
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-2-config-compiler.md
19
16
  */
20
17
  // ---------------------------------------------------------------------------
21
18
  // Public API
@@ -4,7 +4,7 @@
4
4
  * Captures tool calls, token usage, cost, and timing for every evaluation.
5
5
  * Full traces go to blob storage; sanitized summaries to Content Lake.
6
6
  *
7
- * @see docs/exec-plans/architecture-overhaul/phase-6-observability.md
7
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-6-observability.md
8
8
  * @see docs/design-docs/architecture-overhaul/observability-telemetry.md
9
9
  */
10
10
  export { collectTrace, mergeTraces, type ProviderResponse, type RawToolCall, type TraceCollectorOptions, } from "./trace-collector.js";
@@ -4,7 +4,7 @@
4
4
  * Captures tool calls, token usage, cost, and timing for every evaluation.
5
5
  * Full traces go to blob storage; sanitized summaries to Content Lake.
6
6
  *
7
- * @see docs/exec-plans/architecture-overhaul/phase-6-observability.md
7
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-6-observability.md
8
8
  * @see docs/design-docs/architecture-overhaul/observability-telemetry.md
9
9
  */
10
10
  // Trace collection
@@ -12,7 +12,7 @@
12
12
  * recorded in provenance for reproducibility tracking.
13
13
  *
14
14
  * @see docs/design-docs/architecture-overhaul/domain-model.md (VariableEnvelope)
15
- * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
15
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-2-config-compiler.md
16
16
  */
17
17
  import type { VariableDeclaration, VariableEnvelope } from "../../_vendor/ailf-core/index.d.ts";
18
18
  /** Options for variable resolution */
@@ -12,7 +12,7 @@
12
12
  * recorded in provenance for reproducibility tracking.
13
13
  *
14
14
  * @see docs/design-docs/architecture-overhaul/domain-model.md (VariableEnvelope)
15
- * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
15
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-2-config-compiler.md
16
16
  */
17
17
  import { simpleHash } from "./hash.js";
18
18
  // ---------------------------------------------------------------------------
@@ -7,7 +7,7 @@
7
7
  *
8
8
  * Phase 3c of the Scenario Matrix implementation.
9
9
  *
10
- * @see docs/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
10
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
11
11
  */
12
12
  import type { Logger, PluginRegistry } from "../_vendor/ailf-core/index.d.ts";
13
13
  import type { CoverageAuditReport, ProductFeature } from "./types.js";
@@ -7,7 +7,7 @@
7
7
  *
8
8
  * Phase 3c of the Scenario Matrix implementation.
9
9
  *
10
- * @see docs/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
10
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
11
11
  */
12
12
  import { ConsoleLogger } from "../adapters/loggers/index.js";
13
13
  import { tryLoadConfigFile } from "./compiler/config-loader.js";
@@ -12,7 +12,7 @@
12
12
  *
13
13
  * These are deterministic, pure functions — no randomness, no side effects.
14
14
  *
15
- * @see docs/exec-plans/grader-reliability.md — Phase 4
15
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 4
16
16
  */
17
17
  /** A degradation targeting a specific scoring dimension */
18
18
  export interface Degradation {
@@ -12,7 +12,7 @@
12
12
  *
13
13
  * These are deterministic, pure functions — no randomness, no side effects.
14
14
  *
15
- * @see docs/exec-plans/grader-reliability.md — Phase 4
15
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 4
16
16
  */
17
17
  // ---------------------------------------------------------------------------
18
18
  // Task Completion degradations
@@ -13,7 +13,7 @@
13
13
  * When both sources agree, confidence is boosted. When only ceiling
14
14
  * signals are available, they serve as a fallback for unclassified cases.
15
15
  *
16
- * @see docs/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
16
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
17
17
  */
18
18
  import type { FailureMode, FailureModeReport, FeatureScore, GraderJudgment } from "./types.js";
19
19
  /**
@@ -13,7 +13,7 @@
13
13
  * When both sources agree, confidence is boosted. When only ceiling
14
14
  * signals are available, they serve as a fallback for unclassified cases.
15
15
  *
16
- * @see docs/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
16
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
17
17
  */
18
18
  import { detectFeatureArea } from "../_vendor/ailf-core/index.js";
19
19
  // ---------------------------------------------------------------------------
@@ -23,6 +23,7 @@ import { detectFeatureArea } from "../_vendor/ailf-core/index.js";
23
23
  const CLASSIFICATION_THRESHOLD = 60;
24
24
  /** All failure mode types for initializing empty counts */
25
25
  const ALL_MODES = [
26
+ "api-error",
26
27
  "incorrect-docs",
27
28
  "missing-docs",
28
29
  "model-limitation",
@@ -33,6 +34,9 @@ const ALL_MODES = [
33
34
  // ---------------------------------------------------------------------------
34
35
  // Keyword patterns
35
36
  // ---------------------------------------------------------------------------
37
+ /** API error pattern — checked FIRST to prevent timeout errors containing
38
+ * "deprecated" from being misclassified as outdated-docs. */
39
+ const API_ERROR_PATTERN = /\[api-error\]|timeout|timed out|rate limit|429|503|ECONNRESET|ETIMEDOUT|socket hang up|fetch failed/i;
36
40
  const OUTDATED_PATTERN = /deprecated|old api|v[0-9]+ syntax|no longer supported|legacy|previous version|outdated|superseded|replaced by/i;
37
41
  const MISSING_PATTERN = /no documentation|not covered|had to guess|not found|missing.*doc|no.*information|undocumented|couldn't find|without.*documentation/i;
38
42
  const INCORRECT_PATTERN = /contradicts|incorrect.*doc|doc.*incorrect|wrong.*doc|doc.*wrong|documentation says.*but|factual error|inaccurate|misleading.*doc/i;
@@ -226,6 +230,11 @@ function classifyByCeiling(score, ceilingScore, floorScore) {
226
230
  }
227
231
  /** Classify by keyword matching on the reason text */
228
232
  function classifyByKeyword(reason) {
233
+ // API errors checked first — prevents timeout messages containing
234
+ // "deprecated" from being misclassified as outdated-docs.
235
+ if (API_ERROR_PATTERN.test(reason)) {
236
+ return { confidence: "high", mode: "api-error", source: "keyword" };
237
+ }
229
238
  if (OUTDATED_PATTERN.test(reason)) {
230
239
  return { confidence: "high", mode: "outdated-docs", source: "keyword" };
231
240
  }
@@ -321,6 +330,7 @@ function findTopMode(modes) {
321
330
  /** Initialize mode counts to zero */
322
331
  function initModeCounts() {
323
332
  return {
333
+ "api-error": 0,
324
334
  "incorrect-docs": 0,
325
335
  "missing-docs": 0,
326
336
  "model-limitation": 0,
@@ -332,6 +342,8 @@ function initModeCounts() {
332
342
  /** Get icon for a failure mode */
333
343
  function modeIcon(mode) {
334
344
  switch (mode) {
345
+ case "api-error":
346
+ return "⚡";
335
347
  case "incorrect-docs":
336
348
  return "❌";
337
349
  case "missing-docs":
@@ -13,7 +13,7 @@
13
13
  * bottleneck dimension to the median of non-bottlenecked dimensions (not 100).
14
14
  * This produces realistic estimates rather than theoretical maximums.
15
15
  *
16
- * @see docs/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
16
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
17
17
  */
18
18
  import type { FailureModeReport, FeatureScore, GapAnalysisReport, GapEstimate } from "./types.js";
19
19
  /**
@@ -13,7 +13,7 @@
13
13
  * bottleneck dimension to the median of non-bottlenecked dimensions (not 100).
14
14
  * This produces realistic estimates rather than theoretical maximums.
15
15
  *
16
- * @see docs/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
16
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
17
17
  */
18
18
  // ---------------------------------------------------------------------------
19
19
  // Constants
@@ -26,6 +26,7 @@ const DEFAULT_WEIGHTS = {
26
26
  };
27
27
  /** Map failure modes to the dimensions they typically bottleneck */
28
28
  const MODE_BOTTLENECKS = {
29
+ "api-error": [], // Infrastructure issue, not a docs problem
29
30
  "incorrect-docs": ["code-correctness", "task-completion"],
30
31
  "missing-docs": ["task-completion", "doc-coverage"],
31
32
  "model-limitation": [], // Not a docs problem
@@ -35,6 +36,7 @@ const MODE_BOTTLENECKS = {
35
36
  };
36
37
  /** Remediation descriptions by failure mode */
37
38
  const REMEDIATION_MAP = {
39
+ "api-error": "Check model provider config (timeoutMs, maxRetries), API quotas, and task complexity",
38
40
  "incorrect-docs": "Fix factual errors in existing documentation",
39
41
  "missing-docs": "Write new documentation for uncovered functionality",
40
42
  "model-limitation": "Not a documentation problem — track for model improvement",
@@ -12,7 +12,7 @@
12
12
  * the new compiler has been validated in production.
13
13
  *
14
14
  * @see packages/eval/src/pipeline/compiler/ — the new compiler pipeline
15
- * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
15
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
16
16
  *
17
17
  * ---
18
18
  *
@@ -31,7 +31,7 @@
31
31
  * No process.argv parsing. No env var fallbacks. Callers provide typed options.
32
32
  *
33
33
  * @see config/models.yaml — the central model registry
34
- * @see docs/exec-plans/eliminate-lib-layer.md
34
+ * @see docs/archive/exec-plans/eliminate-lib-layer.md
35
35
  */
36
36
  import { type LiteracyTaskDefinition, type Logger } from "../_vendor/ailf-core/index.d.ts";
37
37
  import type { FilterOptions } from "./types.js";
@@ -12,7 +12,7 @@
12
12
  * the new compiler has been validated in production.
13
13
  *
14
14
  * @see packages/eval/src/pipeline/compiler/ — the new compiler pipeline
15
- * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
15
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
16
16
  *
17
17
  * ---
18
18
  *
@@ -31,14 +31,15 @@
31
31
  * No process.argv parsing. No env var fallbacks. Callers provide typed options.
32
32
  *
33
33
  * @see config/models.yaml — the central model registry
34
- * @see docs/exec-plans/eliminate-lib-layer.md
34
+ * @see docs/archive/exec-plans/eliminate-lib-layer.md
35
35
  */
36
- import { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "../_vendor/ailf-core/index.js";
36
+ import { extractModelName, extractProvider, mergeConfig, } from "../_vendor/ailf-core/index.js";
37
37
  import { existsSync, readdirSync, writeFileSync } from "fs";
38
38
  import { resolve } from "path";
39
39
  import { dump } from "js-yaml";
40
40
  import { ConsoleLogger } from "../adapters/loggers/index.js";
41
41
  import { loadConfigFile } from "./compiler/config-loader.js";
42
+ import { modelMatchesLiteracyVariant } from "./compiler/mode-bases/literacy.js";
42
43
  import { LITERACY_PROMPT_TEMPLATES } from "./compiler/mode-handlers/literacy/index.js";
43
44
  import { expandTaskDefinitions, loadAndExpandTasks } from "./expand-tasks.js";
44
45
  import { validateModelsYaml } from "./validate.js";
@@ -135,8 +136,8 @@ const SOURCE_ISOLATION_ASSERT = {
135
136
  // Config generators
136
137
  // ---------------------------------------------------------------------------
137
138
  function generateAgenticConfig(models, tests, prompts, source, searchMode, allowedOrigins) {
138
- const naiveModels = models.models.filter((m) => modelMatchesMode(m, "agentic-naive"));
139
- const optimizedModels = models.models.filter((m) => modelMatchesMode(m, "agentic-optimized"));
139
+ const naiveModels = models.models.filter((m) => modelMatchesLiteracyVariant(m, "agentic-naive"));
140
+ const optimizedModels = models.models.filter((m) => modelMatchesLiteracyVariant(m, "agentic-optimized"));
140
141
  const providers = [];
141
142
  // Build doc source config to inject into providers
142
143
  const resolvedSearchMode = searchMode ?? "open";
@@ -170,6 +171,7 @@ function generateAgenticConfig(models, tests, prompts, source, searchMode, allow
170
171
  model: modelName,
171
172
  provider,
172
173
  }),
174
+ ...(model.timeoutMs ? { timeoutMs: model.timeoutMs } : {}),
173
175
  ...sourceConfig,
174
176
  observe: true,
175
177
  observerOptions: models.defaults.observerOptions ?? {},
@@ -189,6 +191,7 @@ function generateAgenticConfig(models, tests, prompts, source, searchMode, allow
189
191
  model: modelName,
190
192
  provider,
191
193
  }),
194
+ ...(model.timeoutMs ? { timeoutMs: model.timeoutMs } : {}),
192
195
  ...sourceConfig,
193
196
  observe: true,
194
197
  observerOptions: models.defaults.observerOptions ?? {},
@@ -220,9 +223,12 @@ function generateAgenticConfig(models, tests, prompts, source, searchMode, allow
220
223
  };
221
224
  }
222
225
  function generateBaselineConfig(models, tests, prompts) {
223
- const baselineModels = models.models.filter((m) => modelMatchesMode(m, LiteracyVariant.STANDARD));
226
+ const baselineModels = models.models.filter((m) => modelMatchesLiteracyVariant(m, "baseline"));
224
227
  const providers = baselineModels.map((model) => ({
225
- config: mergeConfig(models.defaults, model.config),
228
+ config: {
229
+ ...mergeConfig(models.defaults, model.config),
230
+ ...(model.timeoutMs ? { timeoutMs: model.timeoutMs } : {}),
231
+ },
226
232
  id: model.id,
227
233
  label: model.label,
228
234
  }));
@@ -246,12 +252,13 @@ function generateBaselineConfig(models, tests, prompts) {
246
252
  };
247
253
  }
248
254
  function generateObservedConfig(models, tests, prompts) {
249
- const observedModels = models.models.filter((m) => modelMatchesMode(m, LiteracyVariant.OBSERVED));
255
+ const observedModels = models.models.filter((m) => modelMatchesLiteracyVariant(m, "observed"));
250
256
  const providers = observedModels.map((model) => {
251
257
  const modelName = extractModelName(model.id);
252
258
  return {
253
259
  config: {
254
260
  ...mergeConfig(models.defaults, model.config),
261
+ ...(model.timeoutMs ? { timeoutMs: model.timeoutMs } : {}),
255
262
  modelName,
256
263
  observe: true,
257
264
  recordOptions: models.defaults.observerOptions ?? {},
@@ -10,7 +10,7 @@
10
10
  * Migrated from lib/grader-compare.ts — no process.argv, no process.exit(),
11
11
  * no module-level constants. Accepts rootDir as parameter.
12
12
  *
13
- * @see docs/exec-plans/grader-reliability.md — Phase 3
13
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 3
14
14
  */
15
15
  import type { Logger } from "../_vendor/ailf-core/index.d.ts";
16
16
  import { type GraderComparison } from "./grader-comparison.js";
@@ -10,7 +10,7 @@
10
10
  * Migrated from lib/grader-compare.ts — no process.argv, no process.exit(),
11
11
  * no module-level constants. Accepts rootDir as parameter.
12
12
  *
13
- * @see docs/exec-plans/grader-reliability.md — Phase 3
13
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 3
14
14
  */
15
15
  import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
16
16
  import { join } from "path";
@@ -41,6 +41,12 @@ function classifyDimension(component) {
41
41
  }
42
42
  function detectFeatureArea(description) {
43
43
  const desc = description.toLowerCase();
44
+ if (desc.includes("portable text"))
45
+ return "portable-text";
46
+ if (desc.includes("content lake"))
47
+ return "content-lake";
48
+ if (desc.includes("image handling") || desc.includes("image asset"))
49
+ return "image-handling";
44
50
  if (desc.includes("studio"))
45
51
  return "studio-setup";
46
52
  if (desc.includes("visual") ||
@@ -10,7 +10,7 @@
10
10
  * This module has NO side effects — no file I/O, no API calls.
11
11
  * It operates on pre-collected data only.
12
12
  *
13
- * @see docs/exec-plans/grader-reliability.md — Phase 3
13
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 3
14
14
  */
15
15
  import type { ComparisonReport, ScoreSummary } from "./types.js";
16
16
  /** Per-dimension comparison between two graders */
@@ -10,7 +10,7 @@
10
10
  * This module has NO side effects — no file I/O, no API calls.
11
11
  * It operates on pre-collected data only.
12
12
  *
13
- * @see docs/exec-plans/grader-reliability.md — Phase 3
13
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 3
14
14
  */
15
15
  import { compare } from "./compare.js";
16
16
  import { pearsonCorrelation } from "./grader-validation.js";
@@ -12,7 +12,7 @@
12
12
  * Migrated from lib/grader-consistency.ts — no process.argv, no process.exit(),
13
13
  * no module-level constants.
14
14
  *
15
- * @see docs/exec-plans/grader-reliability.md — Phase 1
15
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 1
16
16
  */
17
17
  import { type Logger } from "../_vendor/ailf-core/index.d.ts";
18
18
  import type { RawPromptfooFile } from "./calculate-scores.js";
@@ -12,7 +12,7 @@
12
12
  * Migrated from lib/grader-consistency.ts — no process.argv, no process.exit(),
13
13
  * no module-level constants.
14
14
  *
15
- * @see docs/exec-plans/grader-reliability.md — Phase 1
15
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 1
16
16
  */
17
17
  import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
18
18
  import { join } from "path";
@@ -44,6 +44,12 @@ function classifyDimension(component) {
44
44
  // ---------------------------------------------------------------------------
45
45
  function detectFeatureArea(description) {
46
46
  const desc = description.toLowerCase();
47
+ if (desc.includes("portable text"))
48
+ return "portable-text";
49
+ if (desc.includes("content lake"))
50
+ return "content-lake";
51
+ if (desc.includes("image handling") || desc.includes("image asset"))
52
+ return "image-handling";
47
53
  if (desc.includes("studio"))
48
54
  return "studio-setup";
49
55
  if (desc.includes("visual") ||
@@ -10,7 +10,7 @@
10
10
  * This module has NO side effects — no file I/O, no API calls.
11
11
  * It operates on pre-collected data only.
12
12
  *
13
- * @see docs/exec-plans/grader-reliability.md — Phase 1
13
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 1
14
14
  */
15
15
  /** Per-dimension consistency aggregates */
16
16
  export interface DimensionConsistency {
@@ -10,7 +10,7 @@
10
10
  * This module has NO side effects — no file I/O, no API calls.
11
11
  * It operates on pre-collected data only.
12
12
  *
13
- * @see docs/exec-plans/grader-reliability.md — Phase 1
13
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 1
14
14
  */
15
15
  // ---------------------------------------------------------------------------
16
16
  // Pure computation
@@ -11,7 +11,7 @@
11
11
  * Migrated from lib/grader-sensitivity.ts — no process.argv, no process.exit(),
12
12
  * no module-level constants. Accepts rootDir as parameter.
13
13
  *
14
- * @see docs/exec-plans/grader-reliability.md — Phase 4
14
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 4
15
15
  */
16
16
  import type { Logger } from "../_vendor/ailf-core/index.d.ts";
17
17
  import { type GraderSensitivityResult } from "./grader-sensitivity.js";
@@ -11,7 +11,7 @@
11
11
  * Migrated from lib/grader-sensitivity.ts — no process.argv, no process.exit(),
12
12
  * no module-level constants. Accepts rootDir as parameter.
13
13
  *
14
- * @see docs/exec-plans/grader-reliability.md — Phase 4
14
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 4
15
15
  */
16
16
  import { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync, } from "fs";
17
17
  import { basename, join } from "path";
@@ -11,7 +11,7 @@
11
11
  *
12
12
  * This module has NO side effects — no file I/O, no API calls.
13
13
  *
14
- * @see docs/exec-plans/grader-reliability.md — Phase 4
14
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 4
15
15
  */
16
16
  /** Sensitivity broken down by degradation type */
17
17
  export interface DegradationSensitivity {
@@ -11,7 +11,7 @@
11
11
  *
12
12
  * This module has NO side effects — no file I/O, no API calls.
13
13
  *
14
- * @see docs/exec-plans/grader-reliability.md — Phase 4
14
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 4
15
15
  */
16
16
  // ---------------------------------------------------------------------------
17
17
  // Pure computation
@@ -11,7 +11,7 @@
11
11
  * Migrated from lib/grader-validate.ts — no process.argv, no process.exit(),
12
12
  * no module-level constants. Accepts rootDir as parameter.
13
13
  *
14
- * @see docs/exec-plans/grader-reliability.md — Phase 2
14
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 2
15
15
  */
16
16
  import type { Logger } from "../_vendor/ailf-core/index.d.ts";
17
17
  import { type GraderValidation } from "./grader-validation.js";
@@ -11,7 +11,7 @@
11
11
  * Migrated from lib/grader-validate.ts — no process.argv, no process.exit(),
12
12
  * no module-level constants. Accepts rootDir as parameter.
13
13
  *
14
- * @see docs/exec-plans/grader-reliability.md — Phase 2
14
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 2
15
15
  */
16
16
  import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync, } from "fs";
17
17
  import { join } from "path";
@@ -43,7 +43,7 @@ function loadReferenceGrades(rootDir) {
43
43
  if (!existsSync(refsDir)) {
44
44
  throw new Error(`Reference grades directory not found: ${refsDir}. ` +
45
45
  "Create canonical/grader-references/ with YAML reference files. " +
46
- "See docs/exec-plans/grader-reliability.md — Phase 2.");
46
+ "See docs/archive/exec-plans/grader-reliability.md — Phase 2.");
47
47
  }
48
48
  const files = readdirSync(refsDir)
49
49
  .filter((f) => f.endsWith(".yaml") || f.endsWith(".yml"))
@@ -11,7 +11,7 @@
11
11
  *
12
12
  * This module has NO side effects — no file I/O, no API calls.
13
13
  *
14
- * @see docs/exec-plans/grader-reliability.md — Phase 2
14
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 2
15
15
  */
16
16
  /** Quality label for a correlation value */
17
17
  export type CorrelationQuality = "excellent" | "good" | "moderate" | "poor" | "very-poor";