@sanity/ailf 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (444) hide show
  1. package/canonical/grader-references/README.md +2 -2
  2. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  3. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  4. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  5. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  6. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  7. package/config/features.ts +1 -1
  8. package/config/models.ts +28 -23
  9. package/config/sources.ts +1 -1
  10. package/config/thresholds.ts +1 -1
  11. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  13. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  17. package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
  18. package/dist/_vendor/ailf-core/config-helpers.js +29 -0
  19. package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
  20. package/dist/_vendor/ailf-core/examples/index.js +208 -114
  21. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  22. package/dist/_vendor/ailf-core/index.js +1 -0
  23. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  25. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  27. package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
  28. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  29. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  30. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  31. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  32. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  33. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
  34. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
  35. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  36. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  37. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  38. package/dist/_vendor/ailf-core/services/index.js +1 -1
  39. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  40. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
  41. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  42. package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
  43. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
  44. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  45. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  46. package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
  47. package/dist/_vendor/ailf-tasks/cli.js +61 -0
  48. package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
  49. package/dist/_vendor/ailf-tasks/index.js +16 -0
  50. package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
  51. package/dist/_vendor/ailf-tasks/parser.js +73 -0
  52. package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
  53. package/dist/_vendor/ailf-tasks/schemas.js +180 -0
  54. package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
  55. package/dist/_vendor/ailf-tasks/validation.js +162 -0
  56. package/dist/adapters/api-client/remediation.js +2 -2
  57. package/dist/adapters/config-sources/file-config-adapter.js +6 -1
  58. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  59. package/dist/adapters/index.d.ts +0 -1
  60. package/dist/adapters/index.js +0 -1
  61. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  62. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  63. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  64. package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
  65. package/dist/adapters/task-sources/index.d.ts +1 -2
  66. package/dist/adapters/task-sources/index.js +1 -2
  67. package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
  68. package/dist/adapters/task-sources/repo-schemas.js +2 -2
  69. package/dist/adapters/task-sources/repo-task-source.js +1 -1
  70. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  71. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
  73. package/dist/adapters/task-sources/task-file-loader.js +20 -6
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/explain-handler.d.ts +1 -1
  95. package/dist/commands/explain-handler.js +37 -8
  96. package/dist/commands/fetch-docs.js +1 -0
  97. package/dist/commands/generate-configs.d.ts +3 -3
  98. package/dist/commands/generate-configs.js +20 -8
  99. package/dist/commands/init.d.ts +2 -3
  100. package/dist/commands/init.js +56 -170
  101. package/dist/commands/pipeline-action.d.ts +7 -1
  102. package/dist/commands/pipeline-action.js +43 -19
  103. package/dist/commands/pipeline.d.ts +6 -1
  104. package/dist/commands/pipeline.js +7 -2
  105. package/dist/commands/pr-comment.js +1 -0
  106. package/dist/commands/publish.js +1 -0
  107. package/dist/commands/shared/help.js +2 -2
  108. package/dist/commands/update-quality-scores.d.ts +5 -0
  109. package/dist/commands/update-quality-scores.js +20 -0
  110. package/dist/composition-root.d.ts +2 -3
  111. package/dist/composition-root.js +27 -14
  112. package/dist/config/features.ts +23 -0
  113. package/dist/config/models.ts +100 -0
  114. package/dist/config/prompts.ts +16 -0
  115. package/dist/config/rubrics.ts +225 -0
  116. package/dist/config/schedules.ts +47 -0
  117. package/dist/config/sinks.ts +37 -0
  118. package/dist/config/sources.ts +21 -0
  119. package/dist/config/thresholds.ts +61 -0
  120. package/dist/lib/agent-behavior-report.d.ts +8 -0
  121. package/dist/lib/agent-behavior-report.js +185 -0
  122. package/dist/lib/baseline.d.ts +19 -0
  123. package/dist/lib/baseline.js +153 -0
  124. package/dist/lib/calculate-scores.d.ts +23 -0
  125. package/dist/lib/calculate-scores.js +42 -0
  126. package/dist/lib/compare.d.ts +18 -0
  127. package/dist/lib/compare.js +170 -0
  128. package/dist/lib/coverage-audit.d.ts +4 -0
  129. package/dist/lib/coverage-audit.js +42 -0
  130. package/dist/lib/discovery-report.d.ts +13 -0
  131. package/dist/lib/discovery-report.js +57 -0
  132. package/dist/lib/fetch-docs.d.ts +30 -0
  133. package/dist/lib/fetch-docs.js +171 -0
  134. package/dist/lib/generate-configs.d.ts +25 -0
  135. package/dist/lib/generate-configs.js +42 -0
  136. package/dist/lib/grader-api.d.ts +21 -0
  137. package/dist/lib/grader-api.js +34 -0
  138. package/dist/lib/grader-compare.d.ts +19 -0
  139. package/dist/lib/grader-compare.js +91 -0
  140. package/dist/lib/grader-consistency.d.ts +27 -0
  141. package/dist/lib/grader-consistency.js +79 -0
  142. package/dist/lib/grader-sensitivity.d.ts +19 -0
  143. package/dist/lib/grader-sensitivity.js +75 -0
  144. package/dist/lib/grader-validate.d.ts +19 -0
  145. package/dist/lib/grader-validate.js +78 -0
  146. package/dist/lib/measure-retrieval.d.ts +14 -0
  147. package/dist/lib/measure-retrieval.js +71 -0
  148. package/dist/lib/pr-comment.d.ts +16 -0
  149. package/dist/lib/pr-comment.js +28 -0
  150. package/dist/lib/readiness-report.d.ts +13 -0
  151. package/dist/lib/readiness-report.js +108 -0
  152. package/dist/lib/webhook-server.d.ts +11 -0
  153. package/dist/lib/webhook-server.js +24 -0
  154. package/dist/lib/weekly-digest.d.ts +24 -0
  155. package/dist/lib/weekly-digest.js +148 -0
  156. package/dist/orchestration/build-app-context.js +13 -0
  157. package/dist/orchestration/cache-context.d.ts +23 -0
  158. package/dist/orchestration/cache-context.js +43 -0
  159. package/dist/orchestration/env-bridge.d.ts +21 -0
  160. package/dist/orchestration/env-bridge.js +66 -0
  161. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  162. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  163. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  164. package/dist/orchestration/step-runner.js +5 -1
  165. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  166. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  167. package/dist/orchestration/steps/callback-step.js +10 -1
  168. package/dist/orchestration/steps/compare-step.js +6 -3
  169. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  170. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  171. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  172. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  173. package/dist/orchestration/steps/fetch-docs-step.js +30 -16
  174. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  175. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  176. package/dist/orchestration/steps/generate-configs-step.js +50 -15
  177. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  178. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  179. package/dist/orchestration/steps/publish-report-step.js +19 -0
  180. package/dist/orchestration/steps/readiness-step.js +8 -3
  181. package/dist/orchestration/steps/report-step.js +17 -4
  182. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  183. package/dist/orchestration/steps/run-eval-step.js +52 -32
  184. package/dist/pipeline/agent-behavior-report.js +6 -0
  185. package/dist/pipeline/attribution.d.ts +1 -1
  186. package/dist/pipeline/attribution.js +1 -1
  187. package/dist/pipeline/cache.js +29 -15
  188. package/dist/pipeline/calculate-scores.d.ts +2 -0
  189. package/dist/pipeline/calculate-scores.js +70 -33
  190. package/dist/pipeline/checks.d.ts +8 -3
  191. package/dist/pipeline/checks.js +23 -3
  192. package/dist/pipeline/chronic-failures.d.ts +55 -0
  193. package/dist/pipeline/chronic-failures.js +110 -0
  194. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
  195. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  196. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  197. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  198. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  199. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  200. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  201. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  202. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  203. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  204. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  205. package/dist/pipeline/compiler/config-loader.js +42 -2
  206. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  207. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  208. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  209. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  210. package/dist/pipeline/compiler/index.d.ts +2 -5
  211. package/dist/pipeline/compiler/index.js +2 -5
  212. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  213. package/dist/pipeline/compiler/literacy-bridge.js +1 -1
  214. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
  215. package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
  216. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
  217. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
  218. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
  219. package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
  220. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
  221. package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
  222. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
  223. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
  224. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
  225. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
  226. package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
  227. package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
  228. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
  229. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
  230. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
  231. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
  232. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
  233. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
  234. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
  235. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
  241. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
  242. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  251. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  252. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
  253. package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
  254. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  255. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  256. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  257. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  258. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  259. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  260. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  261. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  262. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  263. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  264. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  265. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  266. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  267. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  268. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  269. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  270. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  271. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  272. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  273. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  274. package/dist/pipeline/compiler/task-bridge.js +92 -0
  275. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  276. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  277. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  278. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  279. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  280. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  281. package/dist/pipeline/coverage-audit.d.ts +1 -1
  282. package/dist/pipeline/coverage-audit.js +1 -1
  283. package/dist/pipeline/degradations.d.ts +1 -1
  284. package/dist/pipeline/degradations.js +1 -1
  285. package/dist/pipeline/failure-modes.d.ts +1 -1
  286. package/dist/pipeline/failure-modes.js +13 -1
  287. package/dist/pipeline/gap-analysis.d.ts +1 -1
  288. package/dist/pipeline/gap-analysis.js +3 -1
  289. package/dist/pipeline/generate-configs.d.ts +2 -2
  290. package/dist/pipeline/generate-configs.js +15 -8
  291. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  292. package/dist/pipeline/grader-compare-runner.js +7 -1
  293. package/dist/pipeline/grader-comparison.d.ts +1 -1
  294. package/dist/pipeline/grader-comparison.js +1 -1
  295. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  296. package/dist/pipeline/grader-consistency-runner.js +7 -1
  297. package/dist/pipeline/grader-consistency.d.ts +1 -1
  298. package/dist/pipeline/grader-consistency.js +1 -1
  299. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  300. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  301. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  302. package/dist/pipeline/grader-sensitivity.js +1 -1
  303. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  304. package/dist/pipeline/grader-validate-runner.js +2 -2
  305. package/dist/pipeline/grader-validation.d.ts +1 -1
  306. package/dist/pipeline/grader-validation.js +1 -1
  307. package/dist/pipeline/map-request-to-config.js +15 -2
  308. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  309. package/dist/pipeline/mirror-repo-tasks.js +1 -1
  310. package/dist/pipeline/plan-format.d.ts +1 -1
  311. package/dist/pipeline/plan-format.js +1 -1
  312. package/dist/pipeline/plan.d.ts +1 -1
  313. package/dist/pipeline/plan.js +67 -29
  314. package/dist/pipeline/probe.d.ts +1 -1
  315. package/dist/pipeline/probe.js +1 -1
  316. package/dist/pipeline/readiness-report.d.ts +2 -2
  317. package/dist/pipeline/readiness-report.js +2 -2
  318. package/dist/pipeline/release-classification.d.ts +1 -1
  319. package/dist/pipeline/release-classification.js +1 -1
  320. package/dist/pipeline/release-report.d.ts +1 -1
  321. package/dist/pipeline/release-report.js +1 -1
  322. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  323. package/dist/pipeline/repo-eval-comment.js +1 -1
  324. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  325. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  326. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  327. package/dist/pipeline/resolve-mappings.js +44 -44
  328. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  329. package/dist/pipeline/retrieval-metrics.js +28 -20
  330. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  331. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  332. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  333. package/dist/pipeline/steps/compare-step.js +90 -0
  334. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  335. package/dist/pipeline/steps/eval-step.js +347 -0
  336. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  337. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  338. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  339. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  340. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  341. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  342. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  343. package/dist/pipeline/steps/publish-report-step.js +243 -0
  344. package/dist/pipeline/steps/report-step.d.ts +13 -0
  345. package/dist/pipeline/steps/report-step.js +56 -0
  346. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  347. package/dist/pipeline/steps/update-scores-step.js +42 -0
  348. package/dist/pipeline/targeted-loo.d.ts +1 -1
  349. package/dist/pipeline/targeted-loo.js +1 -1
  350. package/dist/pipeline/thresholds.d.ts +1 -1
  351. package/dist/pipeline/thresholds.js +1 -1
  352. package/dist/pipeline/validate.js +13 -0
  353. package/dist/report-store.d.ts +17 -0
  354. package/dist/report-store.js +24 -0
  355. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  356. package/dist/scripts/agent-behavior-report.js +315 -0
  357. package/dist/scripts/baseline.d.ts +43 -0
  358. package/dist/scripts/baseline.js +267 -0
  359. package/dist/scripts/calculate-scores.d.ts +166 -0
  360. package/dist/scripts/calculate-scores.js +1296 -0
  361. package/dist/scripts/compare.d.ts +22 -0
  362. package/dist/scripts/compare.js +334 -0
  363. package/dist/scripts/coverage-audit.d.ts +44 -0
  364. package/dist/scripts/coverage-audit.js +209 -0
  365. package/dist/scripts/debug-eval.d.ts +19 -0
  366. package/dist/scripts/debug-eval.js +73 -0
  367. package/dist/scripts/discovery-report.d.ts +58 -0
  368. package/dist/scripts/discovery-report.js +250 -0
  369. package/dist/scripts/fetch-docs.d.ts +35 -0
  370. package/dist/scripts/fetch-docs.js +472 -0
  371. package/dist/scripts/generate-configs.d.ts +66 -0
  372. package/dist/scripts/generate-configs.js +459 -0
  373. package/dist/scripts/grader-api.d.ts +27 -0
  374. package/dist/scripts/grader-api.js +206 -0
  375. package/dist/scripts/grader-compare.d.ts +22 -0
  376. package/dist/scripts/grader-compare.js +368 -0
  377. package/dist/scripts/grader-consistency.d.ts +20 -0
  378. package/dist/scripts/grader-consistency.js +313 -0
  379. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  380. package/dist/scripts/grader-sensitivity.js +354 -0
  381. package/dist/scripts/grader-validate.d.ts +19 -0
  382. package/dist/scripts/grader-validate.js +267 -0
  383. package/dist/scripts/measure-retrieval.d.ts +10 -0
  384. package/dist/scripts/measure-retrieval.js +145 -0
  385. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  386. package/dist/scripts/migrate-task-mode.js +1 -1
  387. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  388. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  389. package/dist/scripts/pipeline.d.ts +76 -0
  390. package/dist/scripts/pipeline.js +1031 -0
  391. package/dist/scripts/pr-comment.d.ts +10 -0
  392. package/dist/scripts/pr-comment.js +510 -0
  393. package/dist/scripts/readiness-report.d.ts +88 -0
  394. package/dist/scripts/readiness-report.js +342 -0
  395. package/dist/scripts/update-quality-scores.d.ts +15 -0
  396. package/dist/scripts/update-quality-scores.js +184 -0
  397. package/dist/scripts/validate-task-sources.d.ts +1 -1
  398. package/dist/scripts/validate-task-sources.js +1 -1
  399. package/dist/scripts/validate.d.ts +13 -0
  400. package/dist/scripts/validate.js +79 -0
  401. package/dist/scripts/webhook-server.d.ts +26 -0
  402. package/dist/scripts/webhook-server.js +147 -0
  403. package/dist/scripts/weekly-digest.d.ts +24 -0
  404. package/dist/scripts/weekly-digest.js +144 -0
  405. package/dist/sinks/format-slack.d.ts +64 -0
  406. package/dist/sinks/format-slack.js +306 -0
  407. package/dist/sinks/slack-sink.d.ts +27 -0
  408. package/dist/sinks/slack-sink.js +78 -0
  409. package/dist/sinks/types.d.ts +1 -1
  410. package/dist/sinks/types.js +1 -1
  411. package/dist/sinks/webhook-sink.d.ts +19 -0
  412. package/dist/sinks/webhook-sink.js +50 -0
  413. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  414. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  415. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  416. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  417. package/dist/tasks/literacy/functions.task.ts +70 -0
  418. package/dist/tasks/literacy/groq.task.ts +259 -0
  419. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  420. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  421. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  422. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  423. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  424. package/package.json +25 -25
  425. package/tasks/.expanded.agentic.yaml +280 -0
  426. package/tasks/.expanded.yaml +565 -0
  427. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  428. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  429. package/tasks/literacy/content-lake.task.ts +181 -0
  430. package/tasks/literacy/frameworks.task.ts +1 -0
  431. package/tasks/literacy/functions.task.ts +1 -0
  432. package/tasks/literacy/groq.task.ts +1 -0
  433. package/tasks/literacy/image-handling.task.ts +95 -0
  434. package/tasks/literacy/nextjs-live.task.ts +2 -1
  435. package/tasks/literacy/portable-text.task.ts +169 -0
  436. package/tasks/literacy/studio-setup.task.ts +5 -2
  437. package/tasks/literacy/visual-editing.task.ts +1 -0
  438. package/LICENSE +0 -21
  439. package/tasks/frameworks.yaml +0 -98
  440. package/tasks/functions.yaml +0 -51
  441. package/tasks/groq.yaml +0 -216
  442. package/tasks/nextjs-live.yaml +0 -62
  443. package/tasks/studio-setup.yaml +0 -111
  444. package/tasks/visual-editing.yaml +0 -120
@@ -11,7 +11,7 @@
11
11
  *
12
12
  * This module has NO side effects — no file I/O, no API calls.
13
13
  *
14
- * @see docs/exec-plans/grader-reliability.md — Phase 2
14
+ * @see docs/archive/exec-plans/grader-reliability.md — Phase 2
15
15
  */
16
16
  // ---------------------------------------------------------------------------
17
17
  // Pure computation
@@ -1,3 +1,4 @@
1
+ import { resolve } from "node:path";
1
2
  import { normalizeMode } from "./normalize-mode.js";
2
3
  /**
3
4
  * Map a PipelineRequest to a ResolvedConfig.
@@ -19,13 +20,20 @@ import { normalizeMode } from "./normalize-mode.js";
19
20
  export function mapRequestToConfig(request, rootDir) {
20
21
  // Normalize mode so downstream pipeline code only sees canonical names.
21
22
  // The API may receive legacy names ("baseline", "full") from older clients.
22
- const { mode, variant } = normalizeMode(request.mode ?? "full");
23
+ const { mode, variant: normalizedVariant } = normalizeMode(request.mode ?? "full");
24
+ // Explicit variant from request takes precedence over one derived from
25
+ // legacy mode normalization. This supports the canonical form:
26
+ // { mode: "literacy", variant: "baseline" }
27
+ // while preserving backward compatibility with:
28
+ // { mode: "baseline" } → normalizeMode → { mode: "literacy", variant: "baseline" }
29
+ const variant = request.variant ?? normalizedVariant;
23
30
  // API-triggered evaluations (identified by jobId) default to publish: true.
24
31
  // Without this, the job's reportId is always null and GET /v1/reports/:id
25
32
  // has nothing to return.
26
33
  const publishDefault = !!request.jobId;
27
34
  return {
28
35
  rootDir,
36
+ outputDir: resolve(rootDir, "results", "latest"),
29
37
  mode,
30
38
  variant,
31
39
  debug: mapDebug(request.debug),
@@ -66,6 +74,10 @@ export function mapRequestToConfig(request, rootDir) {
66
74
  callerGit: request.callerGit,
67
75
  callback: request.callback,
68
76
  jobId: request.jobId,
77
+ captureEnabled: false,
78
+ captureDir: undefined,
79
+ captureCompress: true,
80
+ captureExtras: true,
69
81
  remote: false,
70
82
  apiUrl: "https://ailf-api.sanity.build",
71
83
  presets: request.presets,
@@ -84,12 +96,13 @@ function mapDebug(debug) {
84
96
  };
85
97
  }
86
98
  function mapTaskSourceType(taskMode) {
87
- if (taskMode === "content-lake" || taskMode === "yaml")
99
+ if (taskMode === "content-lake")
88
100
  return taskMode;
89
101
  // "inline" means the caller sent inline tasks that will be materialized
90
102
  // to a temp directory and loaded via --repo-tasks-path. Use "repo" to
91
103
  // ensure ONLY those tasks are used (no Content Lake merge).
92
104
  if (taskMode === "inline")
93
105
  return "repo";
106
+ // "yaml" was removed — treat it as default (Content Lake)
94
107
  return undefined;
95
108
  }
@@ -10,7 +10,7 @@
10
10
  * means unchanged tasks are skipped. Changed tasks are upserted via
11
11
  * createOrReplace.
12
12
  *
13
- * @see docs/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
13
+ * @see docs/archive/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
14
14
  */
15
15
  import type { SanityClient } from "@sanity/client";
16
16
  import { type LiteracyTaskDefinition, type Logger } from "../_vendor/ailf-core/index.d.ts";
@@ -10,7 +10,7 @@
10
10
  * means unchanged tasks are skipped. Changed tasks are upserted via
11
11
  * createOrReplace.
12
12
  *
13
- * @see docs/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
13
+ * @see docs/archive/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
14
14
  */
15
15
  import { createHash } from "crypto";
16
16
  import { readFileSync } from "fs";
@@ -7,7 +7,7 @@
7
7
  * emoji markers, alignment, and color-coding (via unicode markers).
8
8
  * The JSON formatter produces machine-readable output for CI/CD.
9
9
  *
10
- * @see docs/exec-plans/execution-preview.md
10
+ * @see docs/archive/exec-plans/execution-preview.md
11
11
  */
12
12
  import type { ExecutionPlan } from "./plan.js";
13
13
  /**
@@ -7,7 +7,7 @@
7
7
  * emoji markers, alignment, and color-coding (via unicode markers).
8
8
  * The JSON formatter produces machine-readable output for CI/CD.
9
9
  *
10
- * @see docs/exec-plans/execution-preview.md
10
+ * @see docs/archive/exec-plans/execution-preview.md
11
11
  */
12
12
  import { formatCost } from "../agent-observer/pricing.js";
13
13
  // ---------------------------------------------------------------------------
@@ -7,7 +7,7 @@
7
7
  * anything. Calls existing pure functions (task expansion, model loading,
8
8
  * cache hashing, pricing) and composes them into an `ExecutionPlan`.
9
9
  *
10
- * @see docs/exec-plans/execution-preview.md
10
+ * @see docs/archive/exec-plans/execution-preview.md
11
11
  */
12
12
  import type { DebugOptions, EvalMode } from "./types.js";
13
13
  import { LiteracyVariant } from "./normalize-mode.js";
@@ -7,16 +7,17 @@
7
7
  * anything. Calls existing pure functions (task expansion, model loading,
8
8
  * cache hashing, pricing) and composes them into an `ExecutionPlan`.
9
9
  *
10
- * @see docs/exec-plans/execution-preview.md
10
+ * @see docs/archive/exec-plans/execution-preview.md
11
11
  */
12
12
  import { existsSync, readdirSync, statSync } from "fs";
13
13
  import { resolve } from "path";
14
+ import { createLiteracyModeBase, modelMatchesLiteracyVariant, } from "./compiler/mode-bases/literacy.js";
14
15
  import { lookupPricing } from "../agent-observer/pricing.js";
15
16
  import { RepoTaskSource } from "../adapters/task-sources/repo-task-source.js";
16
17
  import { loadAllTsTaskFiles } from "../adapters/task-sources/task-file-loader.js";
17
18
  import { lookupCache } from "./cache.js";
18
19
  import { compileLiteracyTasks } from "./compiler/literacy-bridge.js";
19
- import { tryLoadConfigFile } from "./compiler/config-loader.js";
20
+ import { resolveVendoredSubdir, tryLoadConfigFile, } from "./compiler/config-loader.js";
20
21
  import { LiteracyVariant } from "./normalize-mode.js";
21
22
  import { validateConfiguration } from "./validate.js";
22
23
  /**
@@ -44,33 +45,35 @@ function loadModelsFile(rootDir) {
44
45
  const result = tryLoadConfigFile("models", rootDir);
45
46
  return result?.data ?? null;
46
47
  }
48
+ const _literacyBase = createLiteracyModeBase();
47
49
  /**
48
- * Map eval mode + variant to the model "modes" array values from models config.
50
+ * Check whether a model participates in a given eval mode + optional variant.
49
51
  *
50
- * Literacy mode uses the variant to determine which model sub-modes match.
51
- * Non-literacy modes accept all models by default (filtering is done
52
- * elsewhere for those modes).
52
+ * For literacy mode, checks both mode enrollment and variant participation
53
+ * via the shared `modelMatchesLiteracyVariant` helper. For non-literacy
54
+ * modes, checks mode enrollment only.
53
55
  */
54
- function modeMatchesModelModes(mode, modelModes, variant) {
55
- if (!modelModes || modelModes.length === 0)
56
- return true;
57
- if (mode === "literacy") {
56
+ function modeMatchesModel(mode, model, variant) {
57
+ // Check basic mode enrollment
58
+ if (model.modes &&
59
+ model.modes.length > 0 &&
60
+ !model.modes.includes(mode)) {
61
+ return false;
62
+ }
63
+ // For literacy mode with a variant, check variant participation
64
+ if (mode === "literacy" && variant) {
58
65
  switch (variant) {
59
66
  case LiteracyVariant.AGENTIC:
60
- return (modelModes.includes("agentic-naive") ||
61
- modelModes.includes("agentic-optimized"));
62
- case LiteracyVariant.OBSERVED:
63
- return modelModes.includes(LiteracyVariant.OBSERVED);
67
+ return (modelMatchesLiteracyVariant(model, "agentic-naive") ||
68
+ modelMatchesLiteracyVariant(model, "agentic-optimized"));
64
69
  case LiteracyVariant.FULL:
65
- return (modelModes.includes(LiteracyVariant.STANDARD) ||
66
- modelModes.includes("agentic-naive") ||
67
- modelModes.includes("agentic-optimized"));
68
- case LiteracyVariant.STANDARD:
70
+ return (modelMatchesLiteracyVariant(model, "baseline") ||
71
+ modelMatchesLiteracyVariant(model, "agentic-naive") ||
72
+ modelMatchesLiteracyVariant(model, "agentic-optimized"));
69
73
  default:
70
- return modelModes.includes(LiteracyVariant.STANDARD);
74
+ return modelMatchesLiteracyVariant(model, variant);
71
75
  }
72
76
  }
73
- // Non-literacy modes accept all models by default
74
77
  return true;
75
78
  }
76
79
  // ---------------------------------------------------------------------------
@@ -139,8 +142,8 @@ export async function buildPipelinePlan(opts, rootDir) {
139
142
  const modelsForCompile = loadModelsFile(rootDir);
140
143
  const graderProvider = modelsForCompile?.grader?.id ?? "openai:chat:gpt-4o";
141
144
  const modelEntries = (modelsForCompile?.models ?? []).map((m) => ({ id: m.id, label: m.label }));
142
- // Load *.task.ts files from tasks/<mode>/
143
- const modeTasksDir = resolve(rootDir, "tasks", opts.mode);
145
+ // Load *.task.ts files from tasks/<mode>/ (or dist/tasks/<mode>/ when vendored)
146
+ const modeTasksDir = resolveVendoredSubdir(rootDir, `tasks/${opts.mode}`);
144
147
  if (existsSync(modeTasksDir)) {
145
148
  const rawTasks = await loadAllTsTaskFiles(modeTasksDir);
146
149
  if (rawTasks.length > 0) {
@@ -148,9 +151,16 @@ export async function buildPipelinePlan(opts, rootDir) {
148
151
  const handlerModulePath = `./compiler/mode-handlers/${opts.mode}/index.js`;
149
152
  const mod = await import(handlerModulePath);
150
153
  const handler = mod.handler;
154
+ const skippedByMode = new Map();
151
155
  for (const rawFile of rawTasks) {
152
156
  for (const taskDef of rawFile.tasks) {
153
157
  const task = taskDef;
158
+ // Filter to matching mode (skip tasks from other modes in same dir)
159
+ if ("mode" in task && task.mode !== opts.mode) {
160
+ const taskMode = task.mode ?? "unknown";
161
+ skippedByMode.set(taskMode, (skippedByMode.get(taskMode) ?? 0) + 1);
162
+ continue;
163
+ }
154
164
  // Apply area/task/tag filter
155
165
  if (filter) {
156
166
  if (filter.areas?.length &&
@@ -192,6 +202,13 @@ export async function buildPipelinePlan(opts, rootDir) {
192
202
  }
193
203
  }
194
204
  }
205
+ if (skippedByMode.size > 0) {
206
+ const summary = [...skippedByMode.entries()]
207
+ .map(([m, n]) => `${n} ${m}`)
208
+ .join(", ");
209
+ const total = [...skippedByMode.values()].reduce((a, b) => a + b, 0);
210
+ warnings.push(`Skipped ${total} task(s) with non-matching mode (${summary}). Current pipeline mode: ${opts.mode}. Run with --mode <mode> to include them.`);
211
+ }
195
212
  }
196
213
  }
197
214
  }
@@ -203,13 +220,29 @@ export async function buildPipelinePlan(opts, rootDir) {
203
220
  if (opts.repoTasksPath) {
204
221
  try {
205
222
  const repoSource = new RepoTaskSource(opts.repoTasksPath);
206
- // Type-narrow to literacy tasks — compileLiteracyTasks accepts LiteracyTaskDefinition[]
207
- const repoTasks = (await repoSource.loadTasks(filter)).filter((t) => t.mode === "literacy");
223
+ const allRepoTasks = await repoSource.loadTasks(filter);
224
+ // Filter to current mode tasks
225
+ const repoTasks = allRepoTasks.filter((t) => t.mode === opts.mode);
226
+ const skippedRepoTasks = allRepoTasks.length - repoTasks.length;
227
+ if (skippedRepoTasks > 0) {
228
+ const skippedModes = new Map();
229
+ for (const t of allRepoTasks) {
230
+ if (t.mode !== opts.mode) {
231
+ skippedModes.set(t.mode, (skippedModes.get(t.mode) ?? 0) + 1);
232
+ }
233
+ }
234
+ const summary = [...skippedModes.entries()]
235
+ .map(([m, n]) => `${n} ${m}`)
236
+ .join(", ");
237
+ warnings.push(`Skipped ${skippedRepoTasks} repo task(s) with non-matching mode (${summary}). Current pipeline mode: ${opts.mode}. Run with --mode <mode> to include them.`);
238
+ }
208
239
  repoTaskCount = repoTasks.length;
209
- if (repoTaskCount > 0) {
240
+ if (repoTaskCount > 0 && opts.mode === "literacy") {
241
+ // Literacy-specific compilation for repo tasks (detailed test expansion)
242
+ const literacyRepoTasks = repoTasks.filter((t) => t.mode === "literacy");
210
243
  const modelsForCompile = loadModelsFile(rootDir);
211
244
  const graderProvider = modelsForCompile?.grader?.id ?? "openai:chat:gpt-4o";
212
- const compileResult = compileLiteracyTasks(repoTasks, {
245
+ const compileResult = compileLiteracyTasks(literacyRepoTasks, {
213
246
  rootDir,
214
247
  evalMode: opts.variant === LiteracyVariant.AGENTIC
215
248
  ? LiteracyVariant.AGENTIC
@@ -231,6 +264,11 @@ export async function buildPipelinePlan(opts, rootDir) {
231
264
  }
232
265
  }
233
266
  }
267
+ else if (repoTaskCount > 0) {
268
+ // Non-literacy modes: approximate 1 test per task (compilation not
269
+ // supported for non-literacy repo tasks in the explain preview yet)
270
+ totalTests += repoTaskCount;
271
+ }
234
272
  }
235
273
  catch {
236
274
  warnings.push(`Failed to scan repo tasks at ${opts.repoTasksPath} — count may be underestimated`);
@@ -244,19 +282,19 @@ export async function buildPipelinePlan(opts, rootDir) {
244
282
  const models = [];
245
283
  let graderModelName = "";
246
284
  if (modelsFile) {
247
- const activeModels = modelsFile.models.filter((m) => modeMatchesModelModes(opts.mode, m.modes, opts.variant));
285
+ const activeModels = modelsFile.models.filter((m) => modeMatchesModel(opts.mode, m, opts.variant));
248
286
  // For agentic mode, each model appears twice (naive + optimized)
249
287
  for (const m of activeModels) {
250
288
  const modelName = extractModelName(m.id);
251
289
  if (opts.variant === LiteracyVariant.AGENTIC) {
252
- if (m.modes?.includes("agentic-naive")) {
290
+ if (modelMatchesLiteracyVariant(m, "agentic-naive")) {
253
291
  models.push({
254
292
  id: m.id,
255
293
  label: `${m.label} (Naive)`,
256
294
  modelName,
257
295
  });
258
296
  }
259
- if (m.modes?.includes("agentic-optimized")) {
297
+ if (modelMatchesLiteracyVariant(m, "agentic-optimized")) {
260
298
  models.push({
261
299
  id: m.id,
262
300
  label: `${m.label} (Optimized)`,
@@ -14,7 +14,7 @@
14
14
  * not "are these docs good enough?" The output is always labeled as
15
15
  * directional and never displayed on the same scale as scored evaluations.
16
16
  *
17
- * @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
17
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
18
18
  */
19
19
  import type { ProbeResult } from "./types.js";
20
20
  /** Generic probe prompt template */
@@ -14,7 +14,7 @@
14
14
  * not "are these docs good enough?" The output is always labeled as
15
15
  * directional and never displayed on the same scale as scored evaluations.
16
16
  *
17
- * @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
17
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
18
18
  */
19
19
  // ---------------------------------------------------------------------------
20
20
  // Constants
@@ -13,8 +13,8 @@
13
13
  * - generateReadinessReport() — builds the structured report
14
14
  * - formatReadinessMarkdown() — renders the report as markdown
15
15
  *
16
- * @see docs/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
17
- * @see docs/exec-plans/eliminate-lib-layer.md
16
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
17
+ * @see docs/archive/exec-plans/eliminate-lib-layer.md
18
18
  */
19
19
  import type { ThresholdConfig } from "./schemas.js";
20
20
  import type { GapAnalysisReport, GapEstimate, ScoreSummary, ThresholdEvaluation, ThresholdViolation } from "./types.js";
@@ -13,8 +13,8 @@
13
13
  * - generateReadinessReport() — builds the structured report
14
14
  * - formatReadinessMarkdown() — renders the report as markdown
15
15
  *
16
- * @see docs/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
17
- * @see docs/exec-plans/eliminate-lib-layer.md
16
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
17
+ * @see docs/archive/exec-plans/eliminate-lib-layer.md
18
18
  */
19
19
  import { evaluateThresholds } from "./thresholds.js";
20
20
  // ---------------------------------------------------------------------------
@@ -15,7 +15,7 @@
15
15
  * - **not-applicable**: Updated, removed, or unchanged documents (these
16
16
  * follow the standard before/after comparison flow from Phase 2).
17
17
  *
18
- * @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
18
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
19
19
  */
20
20
  import type { ClassifiedReleaseDocument, ProductFeature, ReleaseClassification } from "./types.js";
21
21
  import type { ReverseMapping } from "./reverse-mapping.js";
@@ -15,7 +15,7 @@
15
15
  * - **not-applicable**: Updated, removed, or unchanged documents (these
16
16
  * follow the standard before/after comparison flow from Phase 2).
17
17
  *
18
- * @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
18
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
19
19
  */
20
20
  // ---------------------------------------------------------------------------
21
21
  // Public API
@@ -10,7 +10,7 @@
10
10
  * attribution (2c), and probe results (4b) into the document × area × task
11
11
  * impact matrix specified by Scenario 2.4.
12
12
  *
13
- * @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
13
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
14
14
  */
15
15
  import type { AttributionReport, ComparisonReport, ProbeResult, ReleaseClassification, ReleaseImpactReport } from "./types.js";
16
16
  /**
@@ -10,7 +10,7 @@
10
10
  * attribution (2c), and probe results (4b) into the document × area × task
11
11
  * impact matrix specified by Scenario 2.4.
12
12
  *
13
- * @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
13
+ * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
14
14
  */
15
15
  // ---------------------------------------------------------------------------
16
16
  // Public API
@@ -12,7 +12,7 @@
12
12
  * - Clear "what does this mean?" context
13
13
  * - skip-ailf bypass instructions
14
14
  *
15
- * @see docs/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
15
+ * @see docs/archive/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
16
16
  * @see packages/eval/src/pipeline/repo-threshold-evaluator.ts
17
17
  */
18
18
  import type { ComparisonReport, ScoreSummary } from "./types.js";
@@ -12,7 +12,7 @@
12
12
  * - Clear "what does this mean?" context
13
13
  * - skip-ailf bypass instructions
14
14
  *
15
- * @see docs/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
15
+ * @see docs/archive/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
16
16
  * @see packages/eval/src/pipeline/repo-threshold-evaluator.ts
17
17
  */
18
18
  // ---------------------------------------------------------------------------
@@ -10,7 +10,7 @@
10
10
  * thresholds are per-area, defined by the AILF team, and drive
11
11
  * readiness reports.
12
12
  *
13
- * @see docs/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
13
+ * @see docs/archive/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
14
14
  * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
15
15
  */
16
16
  import type { ScoreSummary } from "./types.js";
@@ -10,7 +10,7 @@
10
10
  * thresholds are per-area, defined by the AILF team, and drive
11
11
  * readiness reports.
12
12
  *
13
- * @see docs/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
13
+ * @see docs/archive/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
14
14
  * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
15
15
  */
16
16
  // ---------------------------------------------------------------------------
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * pipeline/resolve-mappings.ts
3
3
  *
4
- * Extracts canonical mappings from inline task YAML definitions.
5
- * Each task file contains canonical_docs and reference_solution fields
6
- * directly — there is no separate mappings.yaml file.
4
+ * Extracts canonical mappings from task definitions (*.task.ts files).
5
+ * Each task contains context.docs and referenceSolution fields
6
+ * directly — there is no separate mappings file.
7
7
  *
8
8
  * The output shape matches what downstream consumers expect so
9
9
  * fetch-docs, validate, and calculate-scores work without changes.
@@ -24,12 +24,12 @@ export interface ResolvedMappings {
24
24
  }>;
25
25
  }
26
26
  /**
27
- * Extract inline canonical mappings from task YAML files.
28
- * Only tasks with both an id and canonical_docs fields are included.
27
+ * Extract canonical mappings from *.task.ts files in tasks/literacy/.
28
+ * Only tasks with context.docs and referenceSolution are included.
29
29
  */
30
30
  export declare function extractInlineMappings(rootDir: string): ResolvedMappings;
31
31
  /**
32
- * Resolve canonical mappings from inline task YAML definitions.
32
+ * Resolve canonical mappings from task definitions.
33
33
  * This is the single source of truth — there is no external mappings file.
34
34
  */
35
35
  export declare function resolveMappings(rootDir: string): ResolvedMappings;
@@ -1,72 +1,72 @@
1
1
  /**
2
2
  * pipeline/resolve-mappings.ts
3
3
  *
4
- * Extracts canonical mappings from inline task YAML definitions.
5
- * Each task file contains canonical_docs and reference_solution fields
6
- * directly — there is no separate mappings.yaml file.
4
+ * Extracts canonical mappings from task definitions (*.task.ts files).
5
+ * Each task contains context.docs and referenceSolution fields
6
+ * directly — there is no separate mappings file.
7
7
  *
8
8
  * The output shape matches what downstream consumers expect so
9
9
  * fetch-docs, validate, and calculate-scores work without changes.
10
10
  */
11
- import { existsSync, readFileSync, readdirSync } from "fs";
12
- import { resolve } from "path";
13
- import { load } from "js-yaml";
11
+ import { existsSync } from "fs";
12
+ import { discoverTsTaskFiles, loadTsTaskFileSync, } from "../adapters/task-sources/task-file-loader.js";
13
+ import { resolveVendoredSubdir } from "./compiler/config-loader.js";
14
14
  // ---------------------------------------------------------------------------
15
15
  // Resolution
16
16
  // ---------------------------------------------------------------------------
17
17
  /**
18
- * Extract inline canonical mappings from task YAML files.
19
- * Only tasks with both an id and canonical_docs fields are included.
18
+ * Extract canonical mappings from *.task.ts files in tasks/literacy/.
19
+ * Only tasks with context.docs and referenceSolution are included.
20
20
  */
21
21
  export function extractInlineMappings(rootDir) {
22
- const tasksDir = resolve(rootDir, "tasks");
22
+ const tasksDir = resolveVendoredSubdir(rootDir, "tasks/literacy");
23
23
  const result = { feature_areas: {} };
24
24
  if (!existsSync(tasksDir))
25
25
  return result;
26
- const yamlFiles = readdirSync(tasksDir)
27
- .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
28
- .sort();
29
- for (const file of yamlFiles) {
30
- const featureArea = file.replace(/\.(yaml|yml)$/, "");
31
- const filePath = resolve(tasksDir, file);
32
- const raw = readFileSync(filePath, "utf-8");
33
- const parsed = load(raw);
34
- if (!Array.isArray(parsed))
35
- continue;
36
- const tasks = [];
37
- for (const entry of parsed) {
38
- if (!isInlineTaskWithMappings(entry))
26
+ const files = discoverTsTaskFiles(tasksDir);
27
+ for (const file of files) {
28
+ const loaded = loadTsTaskFileSync(file);
29
+ for (const task of loaded.tasks) {
30
+ const t = task;
31
+ const area = typeof t.area === "string" ? t.area : undefined;
32
+ const id = typeof t.id === "string" ? t.id : undefined;
33
+ const title = typeof t.title === "string" ? t.title : "";
34
+ const referenceSolution = typeof t.referenceSolution === "string" ? t.referenceSolution : "";
35
+ if (!area || !id)
39
36
  continue;
40
- tasks.push({
41
- canonical_docs: entry.canonical_docs,
42
- description: entry.description,
43
- id: entry.id,
44
- reference_solution: entry.reference_solution,
37
+ // Extract docs from context.docs (GeneralizedDocRef[])
38
+ const context = t.context;
39
+ const docs = [];
40
+ if (context?.docs && Array.isArray(context.docs)) {
41
+ for (const doc of context.docs) {
42
+ const d = doc;
43
+ if (typeof d.slug === "string") {
44
+ docs.push({
45
+ slug: d.slug,
46
+ reason: typeof d.reason === "string" ? d.reason : "",
47
+ });
48
+ }
49
+ }
50
+ }
51
+ if (docs.length === 0 || !referenceSolution)
52
+ continue;
53
+ if (!result.feature_areas[area]) {
54
+ result.feature_areas[area] = { tasks: [] };
55
+ }
56
+ result.feature_areas[area].tasks.push({
57
+ canonical_docs: docs,
58
+ description: title,
59
+ id,
60
+ reference_solution: referenceSolution,
45
61
  });
46
62
  }
47
- if (tasks.length > 0) {
48
- result.feature_areas[featureArea] = { tasks };
49
- }
50
63
  }
51
64
  return result;
52
65
  }
53
66
  /**
54
- * Resolve canonical mappings from inline task YAML definitions.
67
+ * Resolve canonical mappings from task definitions.
55
68
  * This is the single source of truth — there is no external mappings file.
56
69
  */
57
70
  export function resolveMappings(rootDir) {
58
71
  return extractInlineMappings(rootDir);
59
72
  }
60
- // ---------------------------------------------------------------------------
61
- // Helpers
62
- // ---------------------------------------------------------------------------
63
- function isInlineTaskWithMappings(entry) {
64
- if (typeof entry !== "object" || entry === null)
65
- return false;
66
- const e = entry;
67
- return (typeof e.id === "string" &&
68
- typeof e.description === "string" &&
69
- Array.isArray(e.canonical_docs) &&
70
- e.canonical_docs.length > 0 &&
71
- typeof e.reference_solution === "string");
72
- }
@@ -2,9 +2,9 @@
2
2
  * pipeline/retrieval-metrics.ts
3
3
  *
4
4
  * Computes retrieval precision and recall by comparing agent-retrieved
5
- * doc slugs against canonical_docs defined in task YAML files.
5
+ * doc slugs against canonical_docs defined in task definitions.
6
6
  *
7
- * This is a pure computation module — no file I/O beyond reading task YAMLs.
7
+ * This is a pure computation module — no file I/O beyond reading task files.
8
8
  */
9
9
  import type { RetrievalMetrics, TaskRetrievalMetrics } from "./types.js";
10
10
  export interface AgenticBehaviorData {
@@ -30,7 +30,7 @@ export declare function computeRetrievalMetrics(rootDir: string, behaviors: Agen
30
30
  */
31
31
  export declare function computeTaskMetrics(taskId: string, area: string, retrieved: string[], canonical: Set<string>): TaskRetrievalMetrics;
32
32
  /**
33
- * Load canonical_docs from all task YAML files.
33
+ * Load canonical docs from *.task.ts files in tasks/literacy/.
34
34
  * Returns a map of taskId → { slugs: Set<string>, area: string }.
35
35
  */
36
36
  export declare function loadCanonicalDocs(rootDir: string): Map<string, {