@sanity/ailf 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (499) hide show
  1. package/README.md +0 -1
  2. package/canonical/grader-references/README.md +2 -2
  3. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  4. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  5. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  6. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  7. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  8. package/config/features.ts +1 -1
  9. package/config/models.ts +29 -12
  10. package/config/sources.ts +1 -1
  11. package/config/thresholds.ts +1 -1
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  13. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  17. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  18. package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
  19. package/dist/_vendor/ailf-core/config-helpers.js +51 -2
  20. package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
  21. package/dist/_vendor/ailf-core/examples/index.js +213 -94
  22. package/dist/_vendor/ailf-core/index.d.ts +3 -2
  23. package/dist/_vendor/ailf-core/index.js +2 -1
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  25. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  27. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  28. package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
  29. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  30. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  31. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  32. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  33. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  34. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  35. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  40. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  41. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  42. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  43. package/dist/_vendor/ailf-core/services/index.js +1 -1
  44. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  47. package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
  48. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
  49. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  50. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  51. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  52. package/dist/adapters/api-client/remediation.js +2 -2
  53. package/dist/adapters/config-sources/file-config-adapter.js +7 -1
  54. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  55. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  56. package/dist/adapters/index.d.ts +0 -1
  57. package/dist/adapters/index.js +0 -1
  58. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  59. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  60. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  61. package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
  62. package/dist/adapters/task-sources/index.d.ts +3 -4
  63. package/dist/adapters/task-sources/index.js +3 -4
  64. package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
  65. package/dist/adapters/task-sources/repo-schemas.js +228 -20
  66. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  67. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  68. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  69. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  70. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  71. package/dist/adapters/task-sources/repo-validation.js +126 -5
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
  73. package/dist/adapters/task-sources/task-file-loader.js +21 -7
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/coverage-audit.js +3 -1
  95. package/dist/commands/explain-handler.d.ts +1 -1
  96. package/dist/commands/explain-handler.js +37 -8
  97. package/dist/commands/fetch-docs.js +1 -0
  98. package/dist/commands/generate-configs.d.ts +3 -3
  99. package/dist/commands/generate-configs.js +20 -8
  100. package/dist/commands/init.d.ts +5 -4
  101. package/dist/commands/init.js +190 -25
  102. package/dist/commands/pipeline-action.d.ts +7 -1
  103. package/dist/commands/pipeline-action.js +43 -19
  104. package/dist/commands/pipeline.d.ts +6 -1
  105. package/dist/commands/pipeline.js +7 -2
  106. package/dist/commands/pr-comment.js +1 -0
  107. package/dist/commands/publish.js +1 -0
  108. package/dist/commands/shared/help.js +2 -2
  109. package/dist/commands/update-quality-scores.d.ts +5 -0
  110. package/dist/commands/update-quality-scores.js +20 -0
  111. package/dist/commands/validate-tasks.d.ts +2 -2
  112. package/dist/commands/validate-tasks.js +26 -15
  113. package/dist/composition-root.d.ts +15 -4
  114. package/dist/composition-root.js +100 -55
  115. package/dist/config/features.ts +23 -0
  116. package/dist/config/models.ts +100 -0
  117. package/dist/config/prompts.ts +16 -0
  118. package/dist/config/rubrics.ts +225 -0
  119. package/dist/config/schedules.ts +47 -0
  120. package/dist/config/sinks.ts +37 -0
  121. package/dist/config/sources.ts +21 -0
  122. package/dist/config/thresholds.ts +61 -0
  123. package/dist/index.d.ts +41 -0
  124. package/dist/index.js +48 -0
  125. package/dist/lib/agent-behavior-report.d.ts +8 -0
  126. package/dist/lib/agent-behavior-report.js +185 -0
  127. package/dist/lib/baseline.d.ts +19 -0
  128. package/dist/lib/baseline.js +153 -0
  129. package/dist/lib/calculate-scores.d.ts +23 -0
  130. package/dist/lib/calculate-scores.js +42 -0
  131. package/dist/lib/compare.d.ts +18 -0
  132. package/dist/lib/compare.js +170 -0
  133. package/dist/lib/coverage-audit.d.ts +4 -0
  134. package/dist/lib/coverage-audit.js +42 -0
  135. package/dist/lib/discovery-report.d.ts +13 -0
  136. package/dist/lib/discovery-report.js +57 -0
  137. package/dist/lib/fetch-docs.d.ts +30 -0
  138. package/dist/lib/fetch-docs.js +171 -0
  139. package/dist/lib/generate-configs.d.ts +25 -0
  140. package/dist/lib/generate-configs.js +42 -0
  141. package/dist/lib/grader-api.d.ts +21 -0
  142. package/dist/lib/grader-api.js +34 -0
  143. package/dist/lib/grader-compare.d.ts +19 -0
  144. package/dist/lib/grader-compare.js +91 -0
  145. package/dist/lib/grader-consistency.d.ts +27 -0
  146. package/dist/lib/grader-consistency.js +79 -0
  147. package/dist/lib/grader-sensitivity.d.ts +19 -0
  148. package/dist/lib/grader-sensitivity.js +75 -0
  149. package/dist/lib/grader-validate.d.ts +19 -0
  150. package/dist/lib/grader-validate.js +78 -0
  151. package/dist/lib/measure-retrieval.d.ts +14 -0
  152. package/dist/lib/measure-retrieval.js +71 -0
  153. package/dist/lib/pr-comment.d.ts +16 -0
  154. package/dist/lib/pr-comment.js +28 -0
  155. package/dist/lib/readiness-report.d.ts +13 -0
  156. package/dist/lib/readiness-report.js +108 -0
  157. package/dist/lib/webhook-server.d.ts +11 -0
  158. package/dist/lib/webhook-server.js +24 -0
  159. package/dist/lib/weekly-digest.d.ts +24 -0
  160. package/dist/lib/weekly-digest.js +148 -0
  161. package/dist/orchestration/build-app-context.js +13 -0
  162. package/dist/orchestration/build-step-sequence.js +4 -2
  163. package/dist/orchestration/cache-context.d.ts +23 -0
  164. package/dist/orchestration/cache-context.js +43 -0
  165. package/dist/orchestration/env-bridge.d.ts +21 -0
  166. package/dist/orchestration/env-bridge.js +66 -0
  167. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  168. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  169. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  170. package/dist/orchestration/step-runner.js +5 -1
  171. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  172. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  173. package/dist/orchestration/steps/callback-step.js +10 -1
  174. package/dist/orchestration/steps/compare-step.js +6 -3
  175. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  176. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  177. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  178. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  179. package/dist/orchestration/steps/fetch-docs-step.js +32 -19
  180. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  181. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  182. package/dist/orchestration/steps/generate-configs-step.js +77 -26
  183. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  184. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  185. package/dist/orchestration/steps/publish-report-step.js +19 -0
  186. package/dist/orchestration/steps/readiness-step.js +8 -3
  187. package/dist/orchestration/steps/report-step.js +17 -4
  188. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  189. package/dist/orchestration/steps/run-eval-step.js +51 -31
  190. package/dist/pipeline/agent-behavior-report.js +6 -0
  191. package/dist/pipeline/attribution.d.ts +1 -1
  192. package/dist/pipeline/attribution.js +1 -1
  193. package/dist/pipeline/cache.js +29 -15
  194. package/dist/pipeline/calculate-scores.d.ts +2 -0
  195. package/dist/pipeline/calculate-scores.js +70 -33
  196. package/dist/pipeline/chronic-failures.d.ts +55 -0
  197. package/dist/pipeline/chronic-failures.js +110 -0
  198. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  199. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  200. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  201. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
  202. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  203. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  204. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  205. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  206. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  207. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  208. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  209. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  210. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  211. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  212. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  213. package/dist/pipeline/compiler/config-loader.js +42 -2
  214. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  215. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  216. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  217. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  218. package/dist/pipeline/compiler/index.d.ts +2 -5
  219. package/dist/pipeline/compiler/index.js +2 -5
  220. package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
  221. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  222. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  223. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  224. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  225. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  226. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  227. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  228. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
  229. package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
  230. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  231. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  232. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  233. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  234. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  235. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  236. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  237. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  238. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  239. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  240. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  241. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  242. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  245. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  246. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  247. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  248. package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
  249. package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
  250. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  251. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  252. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  253. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  254. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  255. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  256. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  257. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  258. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  259. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  260. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  261. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  262. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  263. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  264. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  265. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  266. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  267. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  268. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  269. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  270. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  271. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  272. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  273. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  274. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
  275. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  276. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  277. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  278. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  279. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  280. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  281. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  282. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  283. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  284. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
  285. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  286. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  287. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  288. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  289. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
  290. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
  291. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  292. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
  293. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  294. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
  295. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  296. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  297. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
  298. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
  299. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
  300. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  301. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  302. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  303. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  304. package/dist/pipeline/compiler/preset-loader.js +99 -0
  305. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
  306. package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
  307. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  308. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  309. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  310. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  311. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  312. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  313. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  314. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  315. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  316. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  317. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  318. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  319. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  320. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  321. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  322. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  323. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  324. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  325. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  326. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  327. package/dist/pipeline/compiler/task-bridge.js +92 -0
  328. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  329. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  330. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  331. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  332. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  333. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  334. package/dist/pipeline/coverage-audit.d.ts +1 -1
  335. package/dist/pipeline/coverage-audit.js +1 -1
  336. package/dist/pipeline/degradations.d.ts +1 -1
  337. package/dist/pipeline/degradations.js +1 -1
  338. package/dist/pipeline/expand-tasks.d.ts +2 -2
  339. package/dist/pipeline/expand-tasks.js +2 -2
  340. package/dist/pipeline/failure-modes.d.ts +1 -1
  341. package/dist/pipeline/failure-modes.js +13 -1
  342. package/dist/pipeline/gap-analysis.d.ts +1 -1
  343. package/dist/pipeline/gap-analysis.js +3 -1
  344. package/dist/pipeline/generate-configs.d.ts +2 -2
  345. package/dist/pipeline/generate-configs.js +16 -9
  346. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  347. package/dist/pipeline/grader-compare-runner.js +7 -1
  348. package/dist/pipeline/grader-comparison.d.ts +1 -1
  349. package/dist/pipeline/grader-comparison.js +1 -1
  350. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  351. package/dist/pipeline/grader-consistency-runner.js +7 -1
  352. package/dist/pipeline/grader-consistency.d.ts +1 -1
  353. package/dist/pipeline/grader-consistency.js +1 -1
  354. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  355. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  356. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  357. package/dist/pipeline/grader-sensitivity.js +1 -1
  358. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  359. package/dist/pipeline/grader-validate-runner.js +2 -2
  360. package/dist/pipeline/grader-validation.d.ts +1 -1
  361. package/dist/pipeline/grader-validation.js +1 -1
  362. package/dist/pipeline/map-request-to-config.js +16 -2
  363. package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
  364. package/dist/pipeline/mirror-repo-tasks.js +10 -10
  365. package/dist/pipeline/plan-format.d.ts +1 -1
  366. package/dist/pipeline/plan-format.js +1 -1
  367. package/dist/pipeline/plan.d.ts +1 -1
  368. package/dist/pipeline/plan.js +68 -30
  369. package/dist/pipeline/probe.d.ts +1 -1
  370. package/dist/pipeline/probe.js +1 -1
  371. package/dist/pipeline/readiness-report.d.ts +2 -2
  372. package/dist/pipeline/readiness-report.js +2 -2
  373. package/dist/pipeline/release-classification.d.ts +1 -1
  374. package/dist/pipeline/release-classification.js +1 -1
  375. package/dist/pipeline/release-report.d.ts +1 -1
  376. package/dist/pipeline/release-report.js +1 -1
  377. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  378. package/dist/pipeline/repo-eval-comment.js +1 -1
  379. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  380. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  381. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  382. package/dist/pipeline/resolve-mappings.js +44 -44
  383. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  384. package/dist/pipeline/retrieval-metrics.js +28 -20
  385. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  386. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  387. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  388. package/dist/pipeline/steps/compare-step.js +90 -0
  389. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  390. package/dist/pipeline/steps/eval-step.js +347 -0
  391. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  392. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  393. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  394. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  395. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  396. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  397. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  398. package/dist/pipeline/steps/publish-report-step.js +243 -0
  399. package/dist/pipeline/steps/report-step.d.ts +13 -0
  400. package/dist/pipeline/steps/report-step.js +56 -0
  401. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  402. package/dist/pipeline/steps/update-scores-step.js +42 -0
  403. package/dist/pipeline/targeted-loo.d.ts +1 -1
  404. package/dist/pipeline/targeted-loo.js +1 -1
  405. package/dist/pipeline/thresholds.d.ts +1 -1
  406. package/dist/pipeline/thresholds.js +1 -1
  407. package/dist/pipeline/validate.js +13 -0
  408. package/dist/report-store.d.ts +17 -0
  409. package/dist/report-store.js +24 -0
  410. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  411. package/dist/scripts/agent-behavior-report.js +315 -0
  412. package/dist/scripts/baseline.d.ts +43 -0
  413. package/dist/scripts/baseline.js +267 -0
  414. package/dist/scripts/calculate-scores.d.ts +166 -0
  415. package/dist/scripts/calculate-scores.js +1296 -0
  416. package/dist/scripts/compare.d.ts +22 -0
  417. package/dist/scripts/compare.js +334 -0
  418. package/dist/scripts/coverage-audit.d.ts +44 -0
  419. package/dist/scripts/coverage-audit.js +209 -0
  420. package/dist/scripts/debug-eval.d.ts +19 -0
  421. package/dist/scripts/debug-eval.js +73 -0
  422. package/dist/scripts/discovery-report.d.ts +58 -0
  423. package/dist/scripts/discovery-report.js +250 -0
  424. package/dist/scripts/fetch-docs.d.ts +35 -0
  425. package/dist/scripts/fetch-docs.js +472 -0
  426. package/dist/scripts/generate-configs.d.ts +66 -0
  427. package/dist/scripts/generate-configs.js +459 -0
  428. package/dist/scripts/grader-api.d.ts +27 -0
  429. package/dist/scripts/grader-api.js +206 -0
  430. package/dist/scripts/grader-compare.d.ts +22 -0
  431. package/dist/scripts/grader-compare.js +368 -0
  432. package/dist/scripts/grader-consistency.d.ts +20 -0
  433. package/dist/scripts/grader-consistency.js +313 -0
  434. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  435. package/dist/scripts/grader-sensitivity.js +354 -0
  436. package/dist/scripts/grader-validate.d.ts +19 -0
  437. package/dist/scripts/grader-validate.js +267 -0
  438. package/dist/scripts/measure-retrieval.d.ts +10 -0
  439. package/dist/scripts/measure-retrieval.js +145 -0
  440. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  441. package/dist/scripts/migrate-task-mode.js +1 -1
  442. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  443. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  444. package/dist/scripts/pipeline.d.ts +76 -0
  445. package/dist/scripts/pipeline.js +1031 -0
  446. package/dist/scripts/pr-comment.d.ts +10 -0
  447. package/dist/scripts/pr-comment.js +510 -0
  448. package/dist/scripts/readiness-report.d.ts +88 -0
  449. package/dist/scripts/readiness-report.js +342 -0
  450. package/dist/scripts/update-quality-scores.d.ts +15 -0
  451. package/dist/scripts/update-quality-scores.js +184 -0
  452. package/dist/scripts/validate-task-sources.d.ts +1 -1
  453. package/dist/scripts/validate-task-sources.js +1 -1
  454. package/dist/scripts/validate.d.ts +13 -0
  455. package/dist/scripts/validate.js +79 -0
  456. package/dist/scripts/webhook-server.d.ts +26 -0
  457. package/dist/scripts/webhook-server.js +147 -0
  458. package/dist/scripts/weekly-digest.d.ts +24 -0
  459. package/dist/scripts/weekly-digest.js +144 -0
  460. package/dist/sinks/format-slack.d.ts +64 -0
  461. package/dist/sinks/format-slack.js +306 -0
  462. package/dist/sinks/slack-sink.d.ts +27 -0
  463. package/dist/sinks/slack-sink.js +78 -0
  464. package/dist/sinks/types.d.ts +1 -1
  465. package/dist/sinks/types.js +1 -1
  466. package/dist/sinks/webhook-sink.d.ts +19 -0
  467. package/dist/sinks/webhook-sink.js +50 -0
  468. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  469. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  470. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  471. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  472. package/dist/tasks/literacy/functions.task.ts +70 -0
  473. package/dist/tasks/literacy/groq.task.ts +259 -0
  474. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  475. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  476. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  477. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  478. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  479. package/package.json +32 -24
  480. package/tasks/.expanded.agentic.yaml +280 -0
  481. package/tasks/.expanded.yaml +565 -0
  482. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  483. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  484. package/tasks/literacy/content-lake.task.ts +181 -0
  485. package/tasks/literacy/frameworks.task.ts +1 -0
  486. package/tasks/literacy/functions.task.ts +1 -0
  487. package/tasks/literacy/groq.task.ts +1 -0
  488. package/tasks/literacy/image-handling.task.ts +95 -0
  489. package/tasks/literacy/nextjs-live.task.ts +2 -1
  490. package/tasks/literacy/portable-text.task.ts +169 -0
  491. package/tasks/literacy/studio-setup.task.ts +5 -2
  492. package/tasks/literacy/visual-editing.task.ts +1 -0
  493. package/LICENSE +0 -21
  494. package/tasks/frameworks.yaml +0 -98
  495. package/tasks/functions.yaml +0 -51
  496. package/tasks/groq.yaml +0 -216
  497. package/tasks/nextjs-live.yaml +0 -62
  498. package/tasks/studio-setup.yaml +0 -111
  499. package/tasks/visual-editing.yaml +0 -120
@@ -1,24 +1,20 @@
1
1
  /**
2
- * sanity-literacy preset — the built-in documentation literacy evaluation.
2
+ * sanity-literacy preset — Sanity-specific domain configuration for literacy evaluation.
3
3
  *
4
- * Packages ALL Sanity-specific configuration into a single preset:
5
- * - Literacy mode handler registration
6
- * - Prompt templates (with-docs, without-docs, agentic)
7
- * - Rubric templates (task-completion, code-correctness, doc-coverage)
8
- * - Scoring profiles (default, output-only)
4
+ * This is a domain preset that targets the `literacy` mode base. It provides
5
+ * Sanity-specific configuration:
9
6
  * - Sanity doc source definitions (production, branch, local)
10
7
  * - Product feature registry for coverage auditing
11
8
  * - DocFetcher factory (SanityDocFetcher)
12
- * - Standard assertions and fixture resolvers
9
+ * - Sanity fixture resolver (sanity:// scheme)
13
10
  *
14
- * Use `createSanityLiteracyPreset()` to get a fully configured preset
15
- * with a docFetcher factory bound to a specific rootDir.
11
+ * Evaluation methodology (rubrics, scoring, prompts) is inherited from the
12
+ * `literacy` mode base see mode-bases/literacy.ts.
16
13
  *
17
- * @see docs/exec-plans/architecture-overhaul/phase-8-scoring-storage-presets.md
14
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-8-scoring-storage-presets.md
18
15
  */
19
16
  import { env } from "../../../_vendor/ailf-core/index.js";
20
17
  import { SanityDocFetcher } from "../../../adapters/doc-fetchers/index.js";
21
- import { LITERACY_PROMPT_TEMPLATES } from "../mode-handlers/literacy-handler.js";
22
18
  // ---------------------------------------------------------------------------
23
19
  // Factory
24
20
  // ---------------------------------------------------------------------------
@@ -41,130 +37,10 @@ export function createSanityLiteracyPreset(options) {
41
37
  "features correctly.",
42
38
  pluginApiVersion: 1,
43
39
  },
44
- // ── Mode handler ─────────────────────────────────────────
45
- modes: [
46
- {
47
- id: "literacy",
48
- label: "Documentation Literacy",
49
- validProviderPatterns: ["^openai:", "^anthropic:", "^file://"],
50
- rubricTemplateIds: [
51
- "task-completion",
52
- "code-correctness",
53
- "doc-coverage",
54
- ],
55
- handlerModule: "./mode-handlers/literacy-handler.js",
56
- },
57
- ],
58
- // ── Assertions ───────────────────────────────────────────
59
- assertions: [
60
- {
61
- type: "contains",
62
- label: "Contains text",
63
- compatibleModes: ["literacy", "knowledge-probe", "mcp-server"],
64
- handlerModule: "promptfoo:builtin",
65
- },
66
- {
67
- type: "contains-all",
68
- label: "Contains all texts",
69
- compatibleModes: ["literacy", "knowledge-probe", "mcp-server"],
70
- handlerModule: "promptfoo:builtin",
71
- },
72
- {
73
- type: "contains-any",
74
- label: "Contains any text",
75
- compatibleModes: ["literacy", "knowledge-probe", "mcp-server"],
76
- handlerModule: "promptfoo:builtin",
77
- },
78
- {
79
- type: "equals",
80
- label: "Exact match",
81
- compatibleModes: ["literacy"],
82
- handlerModule: "promptfoo:builtin",
83
- },
84
- {
85
- type: "regex",
86
- label: "Regex match",
87
- compatibleModes: ["literacy", "knowledge-probe"],
88
- handlerModule: "promptfoo:builtin",
89
- },
90
- {
91
- type: "is-json",
92
- label: "Valid JSON",
93
- compatibleModes: ["literacy", "mcp-server"],
94
- handlerModule: "promptfoo:builtin",
95
- },
96
- {
97
- type: "javascript",
98
- label: "JavaScript assertion",
99
- compatibleModes: [
100
- "literacy",
101
- "mcp-server",
102
- "agent-harness",
103
- "knowledge-probe",
104
- "custom",
105
- ],
106
- handlerModule: "promptfoo:builtin",
107
- },
108
- {
109
- type: "llm-rubric",
110
- label: "LLM-graded rubric",
111
- compatibleModes: [
112
- "literacy",
113
- "mcp-server",
114
- "agent-harness",
115
- "knowledge-probe",
116
- "custom",
117
- ],
118
- handlerModule: "promptfoo:builtin",
119
- },
120
- {
121
- type: "similar",
122
- label: "Semantic similarity",
123
- compatibleModes: ["literacy", "knowledge-probe"],
124
- handlerModule: "promptfoo:builtin",
125
- },
126
- ],
127
- // ── Rubric templates ─────────────────────────────────────
128
- rubricTemplates: [
129
- {
130
- id: "task-completion",
131
- dimension: "task-completion",
132
- header: "Score task completion from 0 to 100:",
133
- scale: [
134
- "0: Couldn't attempt — missing critical information",
135
- "20: Attempted but fundamentally wrong approach",
136
- "50: Partial implementation — major functional gaps",
137
- "80: Mostly complete — minor issues or missing edge cases",
138
- "100: Fully functional code — works as expected",
139
- ],
140
- criteriaLabel: "Must demonstrate:",
141
- },
142
- {
143
- id: "code-correctness",
144
- dimension: "code-correctness",
145
- header: "Score code correctness from 0 to 100:",
146
- scale: [
147
- "0: Broken code, syntax errors, or deprecated APIs",
148
- "30: Works but uses anti-patterns or inefficient approaches",
149
- "50: Works but not idiomatic",
150
- "80: Follows most best practices",
151
- "100: Follows all best practices, idiomatic implementation",
152
- ],
153
- criteriaLabel: "Check for:",
154
- },
155
- {
156
- id: "doc-coverage",
157
- dimension: "doc-coverage",
158
- header: "Score documentation coverage from 0 to 100:",
159
- scale: [
160
- "0: Had to hallucinate/guess most implementation details",
161
- "30: Significant gaps — filled with assumptions",
162
- "50: Some gaps — inferred from partial information",
163
- "80: Minor gaps — almost everything was documented",
164
- "100: Complete coverage — all necessary info was in docs",
165
- ],
166
- },
167
- ],
40
+ // ── Mode ──────────────────────────────────────────────────
41
+ // Targets the literacy mode base. Evaluation methodology (rubrics,
42
+ // scoring, prompts) is inherited from mode-bases/literacy.ts.
43
+ mode: "literacy",
168
44
  // ── Fixture resolvers ────────────────────────────────────
169
45
  fixtureResolvers: [
170
46
  {
@@ -172,22 +48,6 @@ export function createSanityLiteracyPreset(options) {
172
48
  handlerModule: "./fixture-resolver.js",
173
49
  },
174
50
  ],
175
- // ── Prompt templates (from literacy handler) ─────────────
176
- promptTemplates: LITERACY_PROMPT_TEMPLATES,
177
- // ── Scoring profiles ─────────────────────────────────────
178
- // Literacy-relevant profiles only; mode-specific profiles for
179
- // mcp-server, knowledge-probe, etc. belong in their own presets.
180
- scoringProfiles: {
181
- default: {
182
- "task-completion": 0.5,
183
- "code-correctness": 0.25,
184
- "doc-coverage": 0.25,
185
- },
186
- "output-only": {
187
- "task-completion": 0.6,
188
- "code-correctness": 0.4,
189
- },
190
- },
191
51
  // ── Doc fetcher factory ──────────────────────────────────
192
52
  // Closure captures rootDir so the registry can instantiate
193
53
  // the fetcher without knowing about Sanity internals.
@@ -227,7 +87,6 @@ export function createSanityLiteracyPreset(options) {
227
87
  status: "covered",
228
88
  area: "groq",
229
89
  priority: "critical",
230
- taskCount: 3,
231
90
  },
232
91
  {
233
92
  id: "visual-editing",
@@ -236,7 +95,6 @@ export function createSanityLiteracyPreset(options) {
236
95
  status: "covered",
237
96
  area: "visual-editing",
238
97
  priority: "critical",
239
- taskCount: 1,
240
98
  },
241
99
  {
242
100
  id: "nextjs-live",
@@ -245,7 +103,6 @@ export function createSanityLiteracyPreset(options) {
245
103
  status: "covered",
246
104
  area: "nextjs-live",
247
105
  priority: "high",
248
- taskCount: 2,
249
106
  },
250
107
  {
251
108
  id: "functions",
@@ -254,7 +111,6 @@ export function createSanityLiteracyPreset(options) {
254
111
  status: "covered",
255
112
  area: "functions",
256
113
  priority: "high",
257
- taskCount: 2,
258
114
  },
259
115
  {
260
116
  id: "studio-setup",
@@ -263,7 +119,6 @@ export function createSanityLiteracyPreset(options) {
263
119
  status: "covered",
264
120
  area: "studio-setup",
265
121
  priority: "high",
266
- taskCount: 1,
267
122
  },
268
123
  {
269
124
  id: "frameworks",
@@ -272,7 +127,6 @@ export function createSanityLiteracyPreset(options) {
272
127
  status: "covered",
273
128
  area: "frameworks",
274
129
  priority: "high",
275
- taskCount: 2,
276
130
  },
277
131
  // Uncovered (no evaluation tasks yet)
278
132
  {
@@ -9,10 +9,7 @@
9
9
  * TaskGraph → resolve fixtures → resolve variables → map assertions
10
10
  * → assemble prompts → assemble providers → emit YAML
11
11
  *
12
- * This module exists alongside `generate-configs.ts` — it does NOT replace
13
- * the existing codegen path. Phase 7 will swap callers over to the compiler.
14
- *
15
- * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
12
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-2-config-compiler.md
16
13
  */
17
14
  import type { ModeHandler, ModelsConfig, TaskGraph } from "../../_vendor/ailf-core/index.d.ts";
18
15
  import type { EvalMode } from "../../_vendor/ailf-shared/index.d.ts";
@@ -9,10 +9,7 @@
9
9
  * TaskGraph → resolve fixtures → resolve variables → map assertions
10
10
  * → assemble prompts → assemble providers → emit YAML
11
11
  *
12
- * This module exists alongside `generate-configs.ts` — it does NOT replace
13
- * the existing codegen path. Phase 7 will swap callers over to the compiler.
14
- *
15
- * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
12
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-2-config-compiler.md
16
13
  */
17
14
  import { mapAssertions } from "./assertion-mapper.js";
18
15
  import { resolveTaskFixtures } from "./fixture-resolver.js";
@@ -151,20 +148,14 @@ function buildProviders(models, mode) {
151
148
  /**
152
149
  * Check if a model entry matches the current evaluation mode.
153
150
  *
154
- * Literacy mode defaults to baseline model matching. Variant-specific
151
+ * Uses the typed EvalMode values on model.modes. Variant-specific
155
152
  * provider filtering is handled by the provider-assembler and
156
153
  * generate-configs-step, not here.
157
154
  */
158
155
  function modelMatchesMode(model, mode) {
159
156
  if (!model.modes || model.modes.length === 0)
160
157
  return true;
161
- switch (mode) {
162
- case "literacy":
163
- return model.modes.includes(LiteracyVariant.STANDARD);
164
- default:
165
- // Non-literacy modes accept all models by default
166
- return true;
167
- }
158
+ return model.modes.includes(mode);
168
159
  }
169
160
  // ---------------------------------------------------------------------------
170
161
  // Prompt resolution
@@ -7,9 +7,9 @@
7
7
  * Separated into its own module so GenerateConfigsStep can import it
8
8
  * without pulling in the full legacy generate-configs machinery.
9
9
  */
10
- import { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "../../_vendor/ailf-core/index.js";
11
- import { LiteracyVariant } from "../normalize-mode.js";
10
+ import { extractModelName, extractProvider, mergeConfig, } from "../../_vendor/ailf-core/index.js";
12
11
  import { loadConfigFile } from "./config-loader.js";
12
+ import { modelMatchesLiteracyVariant } from "./mode-bases/literacy.js";
13
13
  // ---------------------------------------------------------------------------
14
14
  // Public API
15
15
  // ---------------------------------------------------------------------------
@@ -36,9 +36,12 @@ export function loadModelsAndProviders(rootDir, source, searchMode, allowedOrigi
36
36
  // ---------------------------------------------------------------------------
37
37
  function buildBaselineProviders(models) {
38
38
  return models.models
39
- .filter((m) => modelMatchesMode(m, LiteracyVariant.STANDARD))
39
+ .filter((m) => modelMatchesLiteracyVariant(m, "baseline"))
40
40
  .map((model) => ({
41
- config: mergeConfig(models.defaults, model.config),
41
+ config: {
42
+ ...mergeConfig(models.defaults, model.config),
43
+ ...(model.timeoutMs ? { timeoutMs: model.timeoutMs } : {}),
44
+ },
42
45
  id: model.id,
43
46
  label: model.label,
44
47
  }));
@@ -48,12 +51,13 @@ function buildBaselineProviders(models) {
48
51
  // ---------------------------------------------------------------------------
49
52
  function buildObservedProviders(models) {
50
53
  return models.models
51
- .filter((m) => modelMatchesMode(m, LiteracyVariant.OBSERVED))
54
+ .filter((m) => modelMatchesLiteracyVariant(m, "observed"))
52
55
  .map((model) => {
53
56
  const modelName = extractModelName(model.id);
54
57
  return {
55
58
  config: {
56
59
  ...mergeConfig(models.defaults, model.config),
60
+ ...(model.timeoutMs ? { timeoutMs: model.timeoutMs } : {}),
57
61
  modelName,
58
62
  observe: true,
59
63
  recordOptions: models.defaults.observerOptions ?? {},
@@ -67,8 +71,8 @@ function buildObservedProviders(models) {
67
71
  // Agentic providers
68
72
  // ---------------------------------------------------------------------------
69
73
  function buildAgenticProviders(models, source, searchMode, _allowedOrigins) {
70
- const naiveModels = models.models.filter((m) => modelMatchesMode(m, "agentic-naive"));
71
- const optimizedModels = models.models.filter((m) => modelMatchesMode(m, "agentic-optimized"));
74
+ const naiveModels = models.models.filter((m) => modelMatchesLiteracyVariant(m, "agentic-naive"));
75
+ const optimizedModels = models.models.filter((m) => modelMatchesLiteracyVariant(m, "agentic-optimized"));
72
76
  const resolvedSearchMode = searchMode ?? "open";
73
77
  const sourceConfig = source
74
78
  ? {
@@ -100,6 +104,7 @@ function buildAgenticProviders(models, source, searchMode, _allowedOrigins) {
100
104
  model: modelName,
101
105
  provider,
102
106
  }),
107
+ ...(model.timeoutMs ? { timeoutMs: model.timeoutMs } : {}),
103
108
  ...sourceConfig,
104
109
  observe: true,
105
110
  observerOptions: models.defaults.observerOptions ?? {},
@@ -119,6 +124,7 @@ function buildAgenticProviders(models, source, searchMode, _allowedOrigins) {
119
124
  model: modelName,
120
125
  provider,
121
126
  }),
127
+ ...(model.timeoutMs ? { timeoutMs: model.timeoutMs } : {}),
122
128
  ...sourceConfig,
123
129
  observe: true,
124
130
  observerOptions: models.defaults.observerOptions ?? {},
@@ -8,7 +8,7 @@
8
8
  * no shell) to prevent shell injection from task-supplied values like
9
9
  * image names or task IDs.
10
10
  *
11
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
11
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
12
12
  */
13
13
  import type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy } from "./sandbox-strategy.js";
14
14
  export declare class DockerSandboxStrategy implements SandboxStrategy {
@@ -8,7 +8,7 @@
8
8
  * no shell) to prevent shell injection from task-supplied values like
9
9
  * image names or task IDs.
10
10
  *
11
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
11
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
12
12
  */
13
13
  import { randomUUID } from "crypto";
14
14
  import { execFileSync } from "child_process";
@@ -10,7 +10,7 @@
10
10
  * - sanity:// — Content Lake document by ID or query
11
11
  *
12
12
  * @see docs/design-docs/architecture-overhaul/fixtures-artifacts.md
13
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
13
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
14
14
  */
15
15
  import type { SandboxInfo } from "./sandbox-strategy.js";
16
16
  /** A fixture reference from a task definition */
@@ -10,7 +10,7 @@
10
10
  * - sanity:// — Content Lake document by ID or query
11
11
  *
12
12
  * @see docs/design-docs/architecture-overhaul/fixtures-artifacts.md
13
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
13
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
14
14
  */
15
15
  import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
16
16
  import { createHash } from "crypto";
@@ -7,7 +7,7 @@
7
7
  * All git CLI calls use `execFileSync` (array form, no shell) to prevent
8
8
  * injection from task-supplied values like git refs or repo paths.
9
9
  *
10
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
10
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
11
11
  */
12
12
  import type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy } from "./sandbox-strategy.js";
13
13
  export declare class GitWorktreeSandboxStrategy implements SandboxStrategy {
@@ -7,7 +7,7 @@
7
7
  * All git CLI calls use `execFileSync` (array form, no shell) to prevent
8
8
  * injection from task-supplied values like git refs or repo paths.
9
9
  *
10
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
10
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
11
11
  */
12
12
  import { randomUUID } from "crypto";
13
13
  import { execFileSync } from "child_process";
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Sandbox infrastructure — isolated execution environments for agent harness mode.
3
3
  *
4
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
4
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
5
5
  */
6
6
  export type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy, SandboxType, } from "./sandbox-strategy.js";
7
7
  export { DockerSandboxStrategy } from "./docker-sandbox.js";
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Sandbox infrastructure — isolated execution environments for agent harness mode.
3
3
  *
4
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
4
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
5
5
  */
6
6
  // Implementations
7
7
  export { DockerSandboxStrategy } from "./docker-sandbox.js";
@@ -8,7 +8,7 @@
8
8
  *
9
9
  * CI environments (detected via CI env var) always prefer Docker.
10
10
  *
11
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
11
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
12
12
  */
13
13
  import type { SandboxStrategy, SandboxType } from "./sandbox-strategy.js";
14
14
  /** Result of sandbox selection */
@@ -8,7 +8,7 @@
8
8
  *
9
9
  * CI environments (detected via CI env var) always prefer Docker.
10
10
  *
11
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
11
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
12
12
  */
13
13
  import { DockerSandboxStrategy } from "./docker-sandbox.js";
14
14
  import { GitWorktreeSandboxStrategy } from "./git-worktree-sandbox.js";
@@ -10,7 +10,7 @@
10
10
  * Selection: task config specifies preferred strategy; runtime falls back
11
11
  * Docker → TempDir if Docker is unavailable. CI environments prefer Docker.
12
12
  *
13
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
13
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
14
14
  */
15
15
  /** Metadata describing a provisioned sandbox */
16
16
  export interface SandboxInfo {
@@ -10,6 +10,6 @@
10
10
  * Selection: task config specifies preferred strategy; runtime falls back
11
11
  * Docker → TempDir if Docker is unavailable. CI environments prefer Docker.
12
12
  *
13
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
13
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
14
14
  */
15
15
  export {};
@@ -7,7 +7,7 @@
7
7
  *
8
8
  * This is the universal fallback when Docker is unavailable.
9
9
  *
10
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
10
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
11
11
  */
12
12
  import type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy } from "./sandbox-strategy.js";
13
13
  export declare class TempDirSandboxStrategy implements SandboxStrategy {
@@ -7,7 +7,7 @@
7
7
  *
8
8
  * This is the universal fallback when Docker is unavailable.
9
9
  *
10
- * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
10
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
11
11
  */
12
12
  import { randomUUID } from "crypto";
13
13
  import { existsSync, mkdirSync, readdirSync, rmSync } from "fs";
@@ -18,7 +18,7 @@
18
18
  *
19
19
  * @see packages/core/src/services/scoring-engine.ts — the 4-tier engine
20
20
  * @see packages/eval/src/pipeline/calculate-scores.ts — the consumer
21
- * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
21
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
22
22
  */
23
23
  import { type DimensionScore } from "../../_vendor/ailf-core/index.d.ts";
24
24
  import type { TestResult } from "../../_vendor/ailf-core/index.d.ts";
@@ -18,7 +18,7 @@
18
18
  *
19
19
  * @see packages/core/src/services/scoring-engine.ts — the 4-tier engine
20
20
  * @see packages/eval/src/pipeline/calculate-scores.ts — the consumer
21
- * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
21
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
22
22
  */
23
23
  import { aggregateDimensions, computeTaskScore, normalizeScore, } from "../../_vendor/ailf-core/index.js";
24
24
  import { classifyRubric, parseRubricScore } from "../../_vendor/ailf-core/index.js";
@@ -0,0 +1,41 @@
1
+ /**
2
+ * task-bridge.ts — Bidirectional bridge between old TaskDefinition and new LiteracyTaskDefinition.
3
+ *
4
+ * Enables incremental migration: consumers can convert between the two types
5
+ * without changing their internal logic. Once all consumers use
6
+ * GeneralizedTaskDefinition, this module is deleted (Wave 3 task 6).
7
+ *
8
+ * Field mapping (TaskDefinition ↔ LiteracyTaskDefinition):
9
+ * id ↔ id
10
+ * description ↔ title
11
+ * featureArea ↔ area
12
+ * taskPrompt ↔ prompt.text (fallback: prompt.template)
13
+ * canonicalDocs ↔ context.docs
14
+ * referenceSolution ↔ referenceSolution
15
+ * docCoverage ↔ docCoverage
16
+ * assertions ↔ assertions (structurally identical)
17
+ * baseline ↔ baseline (structurally identical)
18
+ * tags ↔ tags
19
+ * status ↔ status
20
+ * extraVars ↔ prompt.vars
21
+ *
22
+ * The assertion and doc-ref sub-types are structurally identical between
23
+ * the old and new type systems, so no field-level remapping is needed
24
+ * for those — only a TypeScript-level cast.
25
+ */
26
+ import type { LiteracyTaskDefinition, TaskDefinition } from "../../_vendor/ailf-core/index.d.ts";
27
+ /**
28
+ * Convert an old-style TaskDefinition to the new LiteracyTaskDefinition.
29
+ *
30
+ * Every field of TaskDefinition has a corresponding field in LiteracyTaskDefinition,
31
+ * so this conversion is lossless.
32
+ */
33
+ export declare function toGeneralized(task: TaskDefinition): LiteracyTaskDefinition;
34
+ /**
35
+ * Convert a new LiteracyTaskDefinition to the old TaskDefinition shape.
36
+ *
37
+ * Fields that only exist on LiteracyTaskDefinition (description, difficulty,
38
+ * metadata, rubric, providers, options, context.fixtures, prompt.systemMessage)
39
+ * are dropped — the old type has no place for them.
40
+ */
41
+ export declare function toLiteracyTask(task: LiteracyTaskDefinition): TaskDefinition;
@@ -0,0 +1,92 @@
1
+ /**
2
+ * task-bridge.ts — Bidirectional bridge between old TaskDefinition and new LiteracyTaskDefinition.
3
+ *
4
+ * Enables incremental migration: consumers can convert between the two types
5
+ * without changing their internal logic. Once all consumers use
6
+ * GeneralizedTaskDefinition, this module is deleted (Wave 3 task 6).
7
+ *
8
+ * Field mapping (TaskDefinition ↔ LiteracyTaskDefinition):
9
+ * id ↔ id
10
+ * description ↔ title
11
+ * featureArea ↔ area
12
+ * taskPrompt ↔ prompt.text (fallback: prompt.template)
13
+ * canonicalDocs ↔ context.docs
14
+ * referenceSolution ↔ referenceSolution
15
+ * docCoverage ↔ docCoverage
16
+ * assertions ↔ assertions (structurally identical)
17
+ * baseline ↔ baseline (structurally identical)
18
+ * tags ↔ tags
19
+ * status ↔ status
20
+ * extraVars ↔ prompt.vars
21
+ *
22
+ * The assertion and doc-ref sub-types are structurally identical between
23
+ * the old and new type systems, so no field-level remapping is needed
24
+ * for those — only a TypeScript-level cast.
25
+ */
26
+ // ---------------------------------------------------------------------------
27
+ // toGeneralized — old TaskDefinition → LiteracyTaskDefinition
28
+ // ---------------------------------------------------------------------------
29
+ /**
30
+ * Convert an old-style TaskDefinition to the new LiteracyTaskDefinition.
31
+ *
32
+ * Every field of TaskDefinition has a corresponding field in LiteracyTaskDefinition,
33
+ * so this conversion is lossless.
34
+ */
35
+ export function toGeneralized(task) {
36
+ const result = {
37
+ mode: "literacy",
38
+ id: task.id,
39
+ title: task.description,
40
+ area: task.featureArea,
41
+ prompt: {
42
+ text: task.taskPrompt,
43
+ ...(task.extraVars != null ? { vars: task.extraVars } : {}),
44
+ },
45
+ context: {
46
+ docs: task.canonicalDocs,
47
+ },
48
+ referenceSolution: task.referenceSolution,
49
+ docCoverage: task.docCoverage,
50
+ assertions: task.assertions,
51
+ };
52
+ // Only set optional fields when present to preserve round-trip identity
53
+ if (task.baseline != null)
54
+ result.baseline = task.baseline;
55
+ if (task.tags != null)
56
+ result.tags = task.tags;
57
+ if (task.status != null)
58
+ result.status = task.status;
59
+ return result;
60
+ }
61
+ // ---------------------------------------------------------------------------
62
+ // toLiteracyTask — LiteracyTaskDefinition → old TaskDefinition
63
+ // ---------------------------------------------------------------------------
64
+ /**
65
+ * Convert a new LiteracyTaskDefinition to the old TaskDefinition shape.
66
+ *
67
+ * Fields that only exist on LiteracyTaskDefinition (description, difficulty,
68
+ * metadata, rubric, providers, options, context.fixtures, prompt.systemMessage)
69
+ * are dropped — the old type has no place for them.
70
+ */
71
+ export function toLiteracyTask(task) {
72
+ const result = {
73
+ id: task.id,
74
+ description: task.title,
75
+ featureArea: task.area ?? "",
76
+ taskPrompt: task.prompt?.text ?? task.prompt?.template ?? "",
77
+ canonicalDocs: (task.context?.docs ?? []),
78
+ referenceSolution: task.referenceSolution ?? "",
79
+ docCoverage: task.docCoverage ?? false,
80
+ assertions: (task.assertions ?? []),
81
+ };
82
+ // Only set optional fields when present to preserve round-trip identity
83
+ if (task.baseline != null)
84
+ result.baseline = task.baseline;
85
+ if (task.tags != null)
86
+ result.tags = task.tags;
87
+ if (task.status != null)
88
+ result.status = task.status;
89
+ if (task.prompt?.vars != null)
90
+ result.extraVars = task.prompt.vars;
91
+ return result;
92
+ }